from:"Brian Gerst"

Re: [PATCH v3 05/11] x86/entry: Convert ret_from_fork to C

2021-03-04 Thread Brian Gerst

On Thu, Mar 4, 2021 at 2:16 PM Andy Lutomirski  wrote:
>
> ret_from_fork is written in asm, slightly differently, for x86_32 and
> x86_64.  Convert it to C.
>
> This is a straight conversion without any particular cleverness.  As a
> further cleanup, the code that sets up the ret_from_fork argument registers
> could be adjusted to put the arguments in the correct registers.

An alternative would be to stash the function pointer and argument in
the pt_regs of the new kthread task, and test for PF_KTHREAD instead.
That would eliminate the need to pass those two values to the C
version of ret_from_fork().

> This will cause the ORC unwinder to find pt_regs even for kernel threads on
> x86_64.  This seems harmless.
>
> The 32-bit comment above the now-deleted schedule_tail_wrapper was
> obsolete: the encode_frame_pointer mechanism (see copy_thread()) solves the
> same problem more cleanly.
>
> Cc: Josh Poimboeuf 
> Signed-off-by: Andy Lutomirski 
> ---
>  arch/x86/entry/common.c  | 23 ++
>  arch/x86/entry/entry_32.S| 51 +---
>  arch/x86/entry/entry_64.S| 33 +
>  arch/x86/include/asm/switch_to.h |  2 +-
>  arch/x86/kernel/process.c|  2 +-
>  arch/x86/kernel/process_32.c |  2 +-
>  arch/x86/kernel/unwind_orc.c |  2 +-
>  7 files changed, 43 insertions(+), 72 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 95776f16c1cb..ef1c65938a6b 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -214,6 +214,29 @@ SYSCALL_DEFINE0(ni_syscall)
> return -ENOSYS;
>  }
>
> +void ret_from_fork(struct task_struct *prev,
> +  int (*kernel_thread_fn)(void *),
> +  void *kernel_thread_arg,
> +  struct pt_regs *user_regs);
> +
> +__visible void noinstr ret_from_fork(struct task_struct *prev,
> +int (*kernel_thread_fn)(void *),
> +void *kernel_thread_arg,
> +struct pt_regs *user_regs)
> +{
> +   instrumentation_begin();
> +
> +   schedule_tail(prev);
> +
> +   if (kernel_thread_fn) {

This should have an unlikely(), as kernel threads should be the rare case.

--
Brian Gerst

Re: [PATCH V2 3/6] x86_32/sysenter: switch to the task stack without emptying the entry stack

2021-01-26 Thread Brian Gerst

On Mon, Jan 25, 2021 at 11:35 AM Lai Jiangshan  wrote:
>
> From: Lai Jiangshan 
>
> Like the way x86_64 uses the "old" stack, we can save the entry stack
> pointer to a register and switch to the task stack.  So that we have
> space on the "old" stack to save more things or scratch registers.
>
> Signed-off-by: Lai Jiangshan 
> ---
>  arch/x86/entry/entry_32.S | 11 +--
>  1 file changed, 5 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index 3b4d1a63d1f0..4513702ba45d 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -905,19 +905,18 @@ SYM_FUNC_START(entry_SYSENTER_32)
> pushl   %eax
> BUG_IF_WRONG_CR3 no_user_check=1
> SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
> -   popl%eax
> -   popfl
>
> -   /* Stack empty again, switch to task stack */
> -   movlTSS_entry2task_stack(%esp), %esp
> +   /* Switch to task stack */
> +   movl%esp, %eax
> +   movl(2*4+TSS_entry2task_stack)(%esp), %esp
>
>  .Lsysenter_past_esp:
> pushl   $__USER_DS  /* pt_regs->ss */
> pushl   $0  /* pt_regs->sp (placeholder) */
> -   pushfl  /* pt_regs->flags (except IF = 0) */
> +   pushl   4(%eax) /* pt_regs->flags (except IF = 0) */

__KERNEL_DS isn't loaded at this point, so this needs an explicit %ss:
override.  You probably didn't catch this because the default
__USER_DS was still loaded.

> pushl   $__USER_CS  /* pt_regs->cs */
> pushl   $0  /* pt_regs->ip = 0 (placeholder) */
> -   pushl   %eax/* pt_regs->orig_ax */
> +   pushl   (%eax)  /* pt_regs->orig_ax */

Add an %ss: override here too.

> SAVE_ALL pt_regs_ax=$-ENOSYS/* save rest, stack already switched 
> */
>
> /*
> --
> 2.19.1.6.gb485710b
>

--
Brian Gerst

Re: [PATCH] x86/vm86/32: Remove VM86_SCREEN_BITMAP support

2021-01-08 Thread Brian Gerst

On Fri, Jan 8, 2021 at 1:59 PM Andy Lutomirski  wrote:
>
> The implementation was rather buggy.  It unconditionally marked PTEs
> read-only, even for VM_SHARED mappings.  I'm not sure whether this is
> actually a problem, but it certainly seems unwise.  More importantly, it
> released the mmap lock before flushing the TLB, which could allow a racing
> CoW operation to falsely believe that the underlying memory was not
> writable.
>
> I can't find any users at all of this mechanism, so just remove it.
>
> Cc: Andrea Arcangeli 
> Cc: Linux-MM 
> Cc: Jason Gunthorpe 
> Cc: x...@kernel.org
> Cc: Linus Torvalds 
> Cc: Matthew Wilcox 
> Cc: Jann Horn 
> Cc: Jan Kara 
> Cc: Yu Zhao 
> Cc: Peter Xu 
> Signed-off-by: Andy Lutomirski 
> ---
>  arch/x86/include/uapi/asm/vm86.h |  2 +-
>  arch/x86/kernel/vm86_32.c| 55 ++--
>  2 files changed, 10 insertions(+), 47 deletions(-)
>
> diff --git a/arch/x86/include/uapi/asm/vm86.h 
> b/arch/x86/include/uapi/asm/vm86.h
> index d2ee4e307ef8..50004fb4590d 100644
> --- a/arch/x86/include/uapi/asm/vm86.h
> +++ b/arch/x86/include/uapi/asm/vm86.h
> @@ -106,7 +106,7 @@ struct vm86_struct {
>  /*
>   * flags masks
>   */
> -#define VM86_SCREEN_BITMAP 0x0001
> +#define VM86_SCREEN_BITMAP 0x0001/* no longer supported */
>
>  struct vm86plus_info_struct {
> unsigned long force_return_for_pic:1;
> diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
> index 764573de3996..28b9e8d511e1 100644
> --- a/arch/x86/kernel/vm86_32.c
> +++ b/arch/x86/kernel/vm86_32.c
> @@ -160,49 +160,6 @@ void save_v86_state(struct kernel_vm86_regs *regs, int 
> retval)
> do_exit(SIGSEGV);
>  }
>
> -static void mark_screen_rdonly(struct mm_struct *mm)
> -{
> -   struct vm_area_struct *vma;
> -   spinlock_t *ptl;
> -   pgd_t *pgd;
> -   p4d_t *p4d;
> -   pud_t *pud;
> -   pmd_t *pmd;
> -   pte_t *pte;
> -   int i;
> -
> -   mmap_write_lock(mm);
> -   pgd = pgd_offset(mm, 0xA);
> -   if (pgd_none_or_clear_bad(pgd))
> -   goto out;
> -   p4d = p4d_offset(pgd, 0xA);
> -   if (p4d_none_or_clear_bad(p4d))
> -   goto out;
> -   pud = pud_offset(p4d, 0xA);
> -   if (pud_none_or_clear_bad(pud))
> -   goto out;
> -   pmd = pmd_offset(pud, 0xA);
> -
> -   if (pmd_trans_huge(*pmd)) {
> -   vma = find_vma(mm, 0xA);
> -   split_huge_pmd(vma, pmd, 0xA);
> -   }
> -   if (pmd_none_or_clear_bad(pmd))
> -   goto out;
> -   pte = pte_offset_map_lock(mm, pmd, 0xA, );
> -   for (i = 0; i < 32; i++) {
> -   if (pte_present(*pte))
> -   set_pte(pte, pte_wrprotect(*pte));
> -   pte++;
> -   }
> -   pte_unmap_unlock(pte, ptl);
> -out:
> -   mmap_write_unlock(mm);
> -   flush_tlb_mm_range(mm, 0xA, 0xA + 32*PAGE_SIZE, PAGE_SHIFT, 
> false);
> -}
> -
> -
> -
>  static int do_vm86_irq_handling(int subfunction, int irqnumber);
>  static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
>
> @@ -282,6 +239,15 @@ static long do_sys_vm86(struct vm86plus_struct __user 
> *user_vm86, bool plus)
> offsetof(struct vm86_struct, int_revectored)))
> return -EFAULT;
>
> +
> +   /* VM86_SCREEN_BITMAP had numerous bugs and appears to have no users. 
> */
> +   if (v.flags & VM86_SCREEN_BITMAP) {
> +   char comm[TASK_COMM_LEN];
> +
> +   pr_info_once("vm86: '%s' uses VM86_SCREEN_BITMAP, which is no 
> longer supported\n", get_task_comm(comm, current);
> +   return -EINVAL;
> +   }
> +
> memset(, 0, sizeof(vm86regs));
>
> vm86regs.pt.bx = v.regs.ebx;
> @@ -370,9 +336,6 @@ static long do_sys_vm86(struct vm86plus_struct __user 
> *user_vm86, bool plus)
> update_task_stack(tsk);
> preempt_enable();
>
> -   if (vm86->flags & VM86_SCREEN_BITMAP)
> -   mark_screen_rdonly(tsk->mm);
> -
> memcpy((struct kernel_vm86_regs *)regs, , sizeof(vm86regs));
> return regs->ax;
>  }

You can also remove screen_bitmap from struct vm86 (the kernel
internal structure), and the check_v8086_mode() function.

--
Brian Gerst

[tip: x86/urgent] fanotify: Fix sys_fanotify_mark() on native x86-32

2020-12-28 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID: 2ca408d9c749c32288bc28725f9f12ba30299e8f
Gitweb:
https://git.kernel.org/tip/2ca408d9c749c32288bc28725f9f12ba30299e8f
Author:Brian Gerst 
AuthorDate:Mon, 30 Nov 2020 17:30:59 -05:00
Committer: Borislav Petkov 
CommitterDate: Mon, 28 Dec 2020 11:58:59 +01:00

fanotify: Fix sys_fanotify_mark() on native x86-32

Commit

  121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 
64-bit arguments")

converted native x86-32 which take 64-bit arguments to use the
compat handlers to allow conversion to passing args via pt_regs.
sys_fanotify_mark() was however missed, as it has a general compat
handler. Add a config option that will use the syscall wrapper that
takes the split args for native 32-bit.

 [ bp: Fix typo in Kconfig help text. ]

Fixes: 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls 
taking 64-bit arguments")
Reported-by: Paweł Jasiak 
Signed-off-by: Brian Gerst 
Signed-off-by: Borislav Petkov 
Acked-by: Jan Kara 
Acked-by: Andy Lutomirski 
Link: https://lkml.kernel.org/r/20201130223059.101286-1-brge...@gmail.com
---
 arch/Kconfig   |  6 ++
 arch/x86/Kconfig   |  1 +
 fs/notify/fanotify/fanotify_user.c | 17 +++--
 include/linux/syscalls.h   | 24 
 4 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 78c6f05..24862d1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
bool
 
+config ARCH_SPLIT_ARG64
+   bool
+   help
+  If a 32-bit architecture requires 64-bit arguments to be split into
+  pairs of 32-bit arguments, select this option.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7b6dd10..21f8511 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,6 +19,7 @@ config X86_32
select KMAP_LOCAL
select MODULES_USE_ELF_REL
select OLD_SIGACTION
+   select ARCH_SPLIT_ARG64
 
 config X86_64
def_bool y
diff --git a/fs/notify/fanotify/fanotify_user.c 
b/fs/notify/fanotify/fanotify_user.c
index 3e01d8f..dcab112 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1285,26 +1285,23 @@ fput_and_out:
return ret;
 }
 
+#ifndef CONFIG_ARCH_SPLIT_ARG64
 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
  __u64, mask, int, dfd,
  const char  __user *, pathname)
 {
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
 }
+#endif
 
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(fanotify_mark,
+#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
+SYSCALL32_DEFINE6(fanotify_mark,
int, fanotify_fd, unsigned int, flags,
-   __u32, mask0, __u32, mask1, int, dfd,
+   SC_ARG64(mask), int, dfd,
const char  __user *, pathname)
 {
-   return do_fanotify_mark(fanotify_fd, flags,
-#ifdef __BIG_ENDIAN
-   ((__u64)mask0 << 32) | mask1,
-#else
-   ((__u64)mask1 << 32) | mask0,
-#endif
-dfd, pathname);
+   return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
+   dfd, pathname);
 }
 #endif
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f3929af..7688bc9 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct 
trace_event_call *tp_event)
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 #endif /* __SYSCALL_DEFINEx */
 
+/* For split 64-bit arguments on 32-bit architectures */
+#ifdef __LITTLE_ENDIAN
+#define SC_ARG64(name) u32, name##_lo, u32, name##_hi
+#else
+#define SC_ARG64(name) u32, name##_hi, u32, name##_lo
+#endif
+#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
+
+#ifdef CONFIG_COMPAT
+#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
+#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
+#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
+#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4
+#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
+#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
+#else
+#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
+#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
+#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
+#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4
+#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5
+#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
+#endif
+
 /*
  * Called before coming back to user-mode. Returning to user

Re: [PATCH] fanotify: Fix sys_fanotify_mark() on native x86-32

2020-12-01 Thread Brian Gerst

On Tue, Dec 1, 2020 at 12:34 PM Andy Lutomirski  wrote:
>
> On Tue, Dec 1, 2020 at 9:23 AM Andy Lutomirski  wrote:
> >
> > On Mon, Nov 30, 2020 at 2:31 PM Brian Gerst  wrote:
> > >
> > > Commit 121b32a58a3a converted native x86-32 which take 64-bit arguments to
> > > use the compat handlers to allow conversion to passing args via pt_regs.
> > > sys_fanotify_mark() was however missed, as it has a general compat 
> > > handler.
> > > Add a config option that will use the syscall wrapper that takes the split
> > > args for native 32-bit.
> > >
> > > Reported-by: Paweł Jasiak 
> > > Fixes: 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for 
> > > syscalls taking 64-bit arguments")
> > > Signed-off-by: Brian Gerst 
> > > ---
> > >  arch/Kconfig   |  6 ++
> > >  arch/x86/Kconfig   |  1 +
> > >  fs/notify/fanotify/fanotify_user.c | 17 +++--
> > >  include/linux/syscalls.h   | 24 
> > >  4 files changed, 38 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/arch/Kconfig b/arch/Kconfig
> > > index 090ef3566c56..452cc127c285 100644
> > > --- a/arch/Kconfig
> > > +++ b/arch/Kconfig
> > > @@ -1045,6 +1045,12 @@ config HAVE_STATIC_CALL_INLINE
> > > bool
> > > depends on HAVE_STATIC_CALL
> > >
> > > +config ARCH_SPLIT_ARG64
> > > +   bool
> > > +   help
> > > +  If a 32-bit architecture requires 64-bit arguments to be split 
> > > into
> > > +  pairs of 32-bit arguemtns, select this option.
> >
> > You misspelled arguments.  You might also want to clarify that, for
> > 64-bit arches, this means that compat syscalls split their arguments.
>
> No, that's backwards.  Maybe it should be depends !64BIT instead.
>
> But I'm really quite confused about something: what's special about
> x86 here?

x86 is special because of the pt_regs-based syscall interface.  It
would be nice to get all arches to that point eventually.

> Are there really Linux arches (compat or 32-bit native)
> that *don't* split arguments like this?  Sure, some arches probably
> work the same way that x86 used to in which the compiler did the
> splitting by magic for us, but that was always a bit of a kludge.
> Could this change maybe be made unconditional?

It probably can be made unconditional.  That will take some research
on which arches have the implicit alignment requirement.  From looking
at the existing compat handlers, ARM, MIPS, and PowerPC 32-bit ABIs
need alignment.

--
Brian Gerst

[PATCH] fanotify: Fix sys_fanotify_mark() on native x86-32

2020-11-30 Thread Brian Gerst

Commit 121b32a58a3a converted native x86-32 which take 64-bit arguments to
use the compat handlers to allow conversion to passing args via pt_regs.
sys_fanotify_mark() was however missed, as it has a general compat handler.
Add a config option that will use the syscall wrapper that takes the split
args for native 32-bit.

Reported-by: Paweł Jasiak 
Fixes: 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls 
taking 64-bit arguments")
Signed-off-by: Brian Gerst 
---
 arch/Kconfig   |  6 ++
 arch/x86/Kconfig   |  1 +
 fs/notify/fanotify/fanotify_user.c | 17 +++--
 include/linux/syscalls.h   | 24 
 4 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 090ef3566c56..452cc127c285 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1045,6 +1045,12 @@ config HAVE_STATIC_CALL_INLINE
bool
depends on HAVE_STATIC_CALL
 
+config ARCH_SPLIT_ARG64
+   bool
+   help
+  If a 32-bit architecture requires 64-bit arguments to be split into
+  pairs of 32-bit arguemtns, select this option.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e4499b01ae9a..41189d3de9fb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,6 +19,7 @@ config X86_32
select KMAP_LOCAL
select MODULES_USE_ELF_REL
select OLD_SIGACTION
+   select ARCH_SPLIT_ARG64
 
 config X86_64
def_bool y
diff --git a/fs/notify/fanotify/fanotify_user.c 
b/fs/notify/fanotify/fanotify_user.c
index 3e01d8f2ab90..dcab112e1f00 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1285,26 +1285,23 @@ static int do_fanotify_mark(int fanotify_fd, unsigned 
int flags, __u64 mask,
return ret;
 }
 
+#ifndef CONFIG_ARCH_SPLIT_ARG64
 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
  __u64, mask, int, dfd,
  const char  __user *, pathname)
 {
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
 }
+#endif
 
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(fanotify_mark,
+#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
+SYSCALL32_DEFINE6(fanotify_mark,
int, fanotify_fd, unsigned int, flags,
-   __u32, mask0, __u32, mask1, int, dfd,
+   SC_ARG64(mask), int, dfd,
const char  __user *, pathname)
 {
-   return do_fanotify_mark(fanotify_fd, flags,
-#ifdef __BIG_ENDIAN
-   ((__u64)mask0 << 32) | mask1,
-#else
-   ((__u64)mask1 << 32) | mask0,
-#endif
-dfd, pathname);
+   return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
+   dfd, pathname);
 }
 #endif
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 37bea07c12f2..aea0ce9f3b74 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct 
trace_event_call *tp_event)
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 #endif /* __SYSCALL_DEFINEx */
 
+/* For split 64-bit arguments on 32-bit architectures */
+#ifdef __LITTLE_ENDIAN
+#define SC_ARG64(name) u32, name##_lo, u32, name##_hi
+#else
+#define SC_ARG64(name) u32, name##_hi, u32, name##_lo
+#endif
+#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
+
+#ifdef CONFIG_COMPAT
+#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
+#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
+#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
+#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4
+#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
+#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
+#else
+#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
+#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
+#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
+#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4
+#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5
+#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
+#endif
+
 /*
  * Called before coming back to user-mode. Returning to user-mode with an
  * address limit different than USER_DS can allow to overwrite kernel memory.
-- 
2.26.2

Re: [PATCH 1/2] x86/stackprotector/32: Make the canary into a regular percpu variable

2020-10-06 Thread Brian Gerst

On Mon, Oct 5, 2020 at 3:31 PM Andy Lutomirski  wrote:
>
> On 32-bit kernels, the stackprotector canary is quite nasty -- it is
> stored at %gs:(20), which is nasty because 32-bit kernels use %fs for
> percpu storage.  It's even nastier because it means that whether %gs
> contains userspace state or kernel state while running kernel code
> sepends on whether stackprotector is enabled (this is
> CONFIG_X86_32_LAZY_GS), and this setting radically changes the way
> that segment selectors work.  Supporting both variants is a
> maintenance and testing mess.
>
> Merely rearranging so that percpu and the stack canary
> share the same segment would be messy as the 32-bit percpu address
> layout isn't currently compatible with putting a variable at a fixed
> offset.
>
> Fortunately, GCC 8.1 added options that allow the stack canary to be
> accessed as %fs:stack_canary, effectively turning it into an ordinary
> percpu variable.  This lets us get rid of all of the code to manage
> the stack canary GDT descriptor and the CONFIG_X86_32_LAZY_GS mess.
>
> This patch forcibly disables stackprotector on older compilers that
> don't support the new options and makes the stack canary into a
> percpu variable.

This doesn't consider !SMP builds, where per-cpu variable are just
normal variables, and the FS segment is disabled.  Unfortunately,
-mstack-protector-guard-reg=ds is not accepted.  The simplest fix
would be to define  __KERNEL_PERCPU when either SMP or STACKPROTECTOR
are enabled.

--
Brian Gerst

Re: [PATCH 0/2] Clean up x86_32 stackprotector

2020-10-05 Thread Brian Gerst

On Mon, Oct 5, 2020 at 3:30 PM Andy Lutomirski  wrote:
>
> x86_32 stackprotector is a maintenance nightmare.  Clean it up.  This
> disables stackprotector on x86_32 on GCC 8.1 and on all clang
> versions -- I'll file a bug for the latter.

This should be doable on 64-bit too.  All that would need to be done
is to remove the zero-base of the percpu segment (which would simplify
alot of other code).

--
Brian Gerst

Re: [PATCH 1/1] efi/libstub/x86: simplify efi_is_native()

2020-10-03 Thread Brian Gerst

On Sat, Oct 3, 2020 at 2:05 AM Heinrich Schuchardt  wrote:
>
> CONFIG_EFI_MIXED depends on CONFIG_X86_64=y.
> There is no need to check CONFIG_X86_64 again.
>
> Signed-off-by: Heinrich Schuchardt 
> ---
>  arch/x86/include/asm/efi.h | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
> index b9c2667ac46c..ab28bf1c74cf 100644
> --- a/arch/x86/include/asm/efi.h
> +++ b/arch/x86/include/asm/efi.h
> @@ -223,8 +223,6 @@ static inline bool efi_is_64bit(void)
>
>  static inline bool efi_is_native(void)
>  {
> -   if (!IS_ENABLED(CONFIG_X86_64))
> -   return true;
> return efi_is_64bit();
>  }

This would then return false for native 32-bit.

--
Brian Gerst

Re: [PATCH 5/9] fs: remove various compat readv/writev helpers

2020-09-23 Thread Brian Gerst

On Wed, Sep 23, 2020 at 12:39 PM Al Viro  wrote:
>
> On Wed, Sep 23, 2020 at 03:59:01PM +0100, Al Viro wrote:
>
> > > That's a very good question.  But it does not just compile but actually
> > > works.  Probably because all the syscall wrappers mean that we don't
> > > actually generate the normal names.  I just tried this:
> > >
> > > --- a/include/linux/syscalls.h
> > > +++ b/include/linux/syscalls.h
> > > @@ -468,7 +468,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t 
> > > offset,
> > >  asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t 
> > > count);
> > >  asmlinkage long sys_write(unsigned int fd, const char __user *buf,
> > > size_t count);
> > > -asmlinkage long sys_readv(unsigned long fd,
> > > +asmlinkage long sys_readv(void *fd,
> > >
> > > for fun, and the compiler doesn't care either..
> >
> > Try to build it for sparc or ppc...
>
> FWIW, declarations in syscalls.h used to serve 4 purposes:
> 1) syscall table initializers needed symbols declared
> 2) direct calls needed the same
> 3) catching mismatches between the declarations and definitions
> 4) centralized list of all syscalls
>
> (2) has been (thankfully) reduced for some time; in any case, ksys_... is
> used for the remaining ones.
>
> (1) and (3) are served by syscalls.h in architectures other than x86, arm64
> and s390.  On those 3 (1) is done otherwise (near the syscall table 
> initializer)
> and (3) is not done at all.
>
> I wonder if we should do something like
>
> SYSCALL_DECLARE3(readv, unsigned long, fd, const struct iovec __user *, vec,
>  unsigned long, vlen);
> in syscalls.h instead, and not under that ifdef.
>
> Let it expand to declaration of sys_...() in generic case and, on x86, into
> __do_sys_...() and __ia32_sys_...()/__x64_sys_...(), with types matching
> what SYSCALL_DEFINE ends up using.
>
> Similar macro would cover compat_sys_...() declarations.  That would
> restore mismatch checking for x86 and friends.  AFAICS, the cost wouldn't
> be terribly high - cpp would have more to chew through in syscalls.h,
> but it shouldn't be all that costly.  Famous last words, of course...
>
> Does anybody see fundamental problems with that?

I think this would be a good idea.  I have been working on a patchset
to clean up the conditional syscall handling (sys_ni.c), and conflicts
with the prototypes in syscalls.h have been getting in the way.
Having the prototypes use SYSCALL_DECLAREx(...) would solve that
issue.

--
Brian Gerst

Re: [PATCH 1/4] x86: add __X32_COND_SYSCALL() macro

2020-09-19 Thread Brian Gerst

An alternative to the patch I proposed earlier would be to use aliases
with the __x32_ prefix for the common syscalls.

--
Brian Gerst

On Sat, Sep 19, 2020 at 1:14 PM  wrote:
>
> On September 19, 2020 9:23:22 AM PDT, Andy Lutomirski  wrote:
> >On Fri, Sep 18, 2020 at 10:35 PM Christoph Hellwig 
> >wrote:
> >>
> >> On Fri, Sep 18, 2020 at 03:24:36PM +0200, Arnd Bergmann wrote:
> >> > sys_move_pages() is an optional syscall, and once we remove
> >> > the compat version of it in favor of the native one with an
> >> > in_compat_syscall() check, the x32 syscall table refers to
> >> > a __x32_sys_move_pages symbol that may not exist when the
> >> > syscall is disabled.
> >> >
> >> > Change the COND_SYSCALL() definition on x86 to also include
> >> > the redirection for x32.
> >> >
> >> > Signed-off-by: Arnd Bergmann 
> >>
> >> Adding the x86 maintainers and Brian Gerst.  Brian proposed another
> >> problem to the mess that most of the compat syscall handlers used by
> >> x32 here:
> >>
> >>https://lkml.org/lkml/2020/6/16/664
> >>
> >> hpa didn't particularly like it, but with your and my pending series
> >> we'll soon use more native than compat syscalls for x32, so something
> >> will need to change..
> >
> >I'm fine with either solution.
>
> My main objection was naming. x64 is a widely used synonym for x86-64, and so 
> that is confusing.
>
> --
> Sent from my Android device with K-9 Mail. Please excuse my brevity.

Re: [PATCH 01/13] x86/entry: Fix AC assertion

2020-09-02 Thread Brian Gerst

On Wed, Sep 2, 2020 at 12:31 PM  wrote:
>
> On Wed, Sep 02, 2020 at 06:24:27PM +0200, Jürgen Groß wrote:
> > On 02.09.20 17:58, Brian Gerst wrote:
> > > On Wed, Sep 2, 2020 at 9:38 AM Peter Zijlstra  
> > > wrote:
> > > >
> > > > From: Peter Zijlstra 
> > > >
> > > > The WARN added in commit 3c73b81a9164 ("x86/entry, selftests: Further
> > > > improve user entry sanity checks") unconditionally triggers on my IVB
> > > > machine because it does not support SMAP.
> > > >
> > > > For !SMAP hardware we patch out CLAC/STAC instructions and thus if
> > > > userspace sets AC, we'll still have it set after entry.
> > > >
> > > > Fixes: 3c73b81a9164 ("x86/entry, selftests: Further improve user entry 
> > > > sanity checks")
> > > > Signed-off-by: Peter Zijlstra (Intel) 
> > > > Acked-by: Andy Lutomirski 
> > > > ---
> > > >   arch/x86/include/asm/entry-common.h |   11 +--
> > > >   1 file changed, 9 insertions(+), 2 deletions(-)
> > > >
> > > > --- a/arch/x86/include/asm/entry-common.h
> > > > +++ b/arch/x86/include/asm/entry-common.h
> > > > @@ -18,8 +18,16 @@ static __always_inline void arch_check_u
> > > >   * state, not the interrupt state as imagined by Xen.
> > > >   */
> > > >  unsigned long flags = native_save_fl();
> > > > -   WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
> > > > - X86_EFLAGS_NT));
> > > > +   unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
> > > > +
> > > > +   /*
> > > > +* For !SMAP hardware we patch out CLAC on entry.
> > > > +*/
> > > > +   if (boot_cpu_has(X86_FEATURE_SMAP) ||
> > > > +   (IS_ENABLED(CONFIG_64_BIT) && 
> > > > boot_cpu_has(X86_FEATURE_XENPV)))
> > > > +       mask |= X86_EFLAGS_AC;
> > >
> > > Is the explicit Xen check necessary?  IIRC the Xen hypervisor will
> > > filter out the SMAP bit in the cpuid pvop.
> >
> > Right, and this test will nevertheless result in setting AC in the mask.
> > IIRC this was the main objective here.
>
> Correct, this asserts that 64bit Xen-PV will never have AC set; it had
> better not have it set since it runs in ring 3.

Ok.  That should be added to the comment to avoid confusion.

--
Brian Gerst

Re: [PATCH 01/13] x86/entry: Fix AC assertion

2020-09-02 Thread Brian Gerst

On Wed, Sep 2, 2020 at 9:38 AM Peter Zijlstra  wrote:
>
> From: Peter Zijlstra 
>
> The WARN added in commit 3c73b81a9164 ("x86/entry, selftests: Further
> improve user entry sanity checks") unconditionally triggers on my IVB
> machine because it does not support SMAP.
>
> For !SMAP hardware we patch out CLAC/STAC instructions and thus if
> userspace sets AC, we'll still have it set after entry.
>
> Fixes: 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity 
> checks")
> Signed-off-by: Peter Zijlstra (Intel) 
> Acked-by: Andy Lutomirski 
> ---
>  arch/x86/include/asm/entry-common.h |   11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
>
> --- a/arch/x86/include/asm/entry-common.h
> +++ b/arch/x86/include/asm/entry-common.h
> @@ -18,8 +18,16 @@ static __always_inline void arch_check_u
>  * state, not the interrupt state as imagined by Xen.
>  */
> unsigned long flags = native_save_fl();
> -   WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
> - X86_EFLAGS_NT));
> +   unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
> +
> +   /*
> +* For !SMAP hardware we patch out CLAC on entry.
> +*/
> +   if (boot_cpu_has(X86_FEATURE_SMAP) ||
> +   (IS_ENABLED(CONFIG_64_BIT) && 
> boot_cpu_has(X86_FEATURE_XENPV)))
> +   mask |= X86_EFLAGS_AC;

Is the explicit Xen check necessary?  IIRC the Xen hypervisor will
filter out the SMAP bit in the cpuid pvop.

--
Brian Gerst

Re: ptrace_syscall_32 is failing

2020-08-29 Thread Brian Gerst

On Sat, Aug 29, 2020 at 12:52 PM Andy Lutomirski  wrote:
>
> Seems to be a recent regression, maybe related to entry/exit work changes.
>
> # ./tools/testing/selftests/x86/ptrace_syscall_32
> [RUN]Check int80 return regs
> [OK]getpid() preserves regs
> [OK]kill(getpid(), SIGUSR1) preserves regs
> [RUN]Check AT_SYSINFO return regs
> [OK]getpid() preserves regs
> [OK]kill(getpid(), SIGUSR1) preserves regs
> [RUN]ptrace-induced syscall restart
> Child will make one syscall
> [RUN]SYSEMU
> [FAIL]Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732)
> [RUN]Restart the syscall (ip = 0xf7f3b549)
> [OK]Restarted nr and args are correct
> [RUN]Change nr and args and restart the syscall (ip = 0xf7f3b549)
> [OK]Replacement nr and args are correct
> [OK]Child exited cleanly
> [RUN]kernel syscall restart under ptrace
> Child will take a nap until signaled
> [RUN]SYSCALL
> [FAIL]Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732)
> [RUN]SYSCALL
> [OK]Args after SIGUSR1 are correct (ax = -514)
> [OK]Child got SIGUSR1
> [RUN]Step again
> [OK]pause(2) restarted correctly

Bisected to commit 0b085e68f407 ("x86/entry: Consolidate 32/64 bit
syscall entry").
It looks like it is because syscall_enter_from_user_mode() is called
before reading the 6th argument from the user stack.

--
Brian Gerst

Re: [patch V9 21/39] x86/irq: Convey vector as argument and not in ptregs

2020-08-25 Thread Brian Gerst

omplete_move(struct irq_cfg *cfg)
> > {
> >  __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
> > }
> >
> > in arch/x86/kernel/apic/vector.c.
> >
>
> Thanks a lot, I stumbled over the same thing just after I sent the email
> as well and had been trying to see if I can quickly patch it up before I
> fall asleep :).
>
> The code path above is used by the APIC vector move (irqbalance) logic,
> which explains why not everyone was seeing issues.
>
> So with 633260fa1 applied, we never get out of moving state for our IRQ
> because orig_ax is always -1. That means we send an IPI to the cleanup
> vector on every single device interrupt, completely occupying the poor
> CPU that we moved the IRQ from.
>
> I've confirmed that the patch below fixes the issue and will send a
> proper, more complete patch on top of mainline with fancy description
> and stable tag tomorrow.
>
>
> Alex
>
>
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index e7434cd..a474e6e 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -734,7 +734,6 @@ SYM_CODE_START_LOCAL(common_spurious)
> callinterrupt_entry
> UNWIND_HINT_REGS indirect=1
> movqORIG_RAX(%rdi), %rsi    /* get vector from stack */
> -   movq$-1, ORIG_RAX(%rdi) /* no syscall to restart */
> callsmp_spurious_interrupt  /* rdi points to pt_regs */
> jmp ret_from_intr
>   SYM_CODE_END(common_spurious)
> @@ -746,7 +745,6 @@ SYM_CODE_START_LOCAL(common_interrupt)
> callinterrupt_entry
> UNWIND_HINT_REGS indirect=1
> movqORIG_RAX(%rdi), %rsi/* get vector from stack */
> -   movq$-1, ORIG_RAX(%rdi) /* no syscall to restart */
> calldo_IRQ  /* rdi points to pt_regs */
> /* 0(%rsp): old RSP */
>   ret_from_intr:
> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
> index 67768e5443..5b6f74e 100644
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -934,7 +934,7 @@ static void __irq_complete_move(struct irq_cfg *cfg,
> unsigned vector)
>
>   void irq_complete_move(struct irq_cfg *cfg)
>   {
> -   __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
> +   __irq_complete_move(cfg, get_irq_regs()->orig_ax);
>   }

I think you need to also truncate the vector to 8-bits, since it now
gets sign-extended when pushed into the orig_ax slot.

--
Brian Gerst

Re: [PATCH v2] x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM

2020-08-25 Thread Brian Gerst

On Tue, Aug 25, 2020 at 6:44 AM Thomas Gleixner  wrote:
>
> On Fri, Aug 21 2020 at 11:35, Brian Gerst wrote:
> > On Fri, Aug 21, 2020 at 10:22 AM Sean Christopherson
> >> >  .macro GET_PERCPU_BASE reg:req
> >> > - ALTERNATIVE \
> >> > - "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \
> >> > - "RDPID  \reg", \
> >>
> >> This was the only user of the RDPID macro, I assume we want to yank that 
> >> out
> >> as well?
> >
> > No.  That one should be kept until the minimum binutils version is
> > raised to one that supports the RDPID opcode.
>
> The macro is unused and nothing in the kernel can use RDPID as we just
> established.

It is opencoded in vdso_read_cpunode(), but the RDPID macro can't be
used there.  So you are correct, it can be removed.

--
Brian Gerst

Re: [PATCH v2] x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM

2020-08-21 Thread Brian Gerst

On Fri, Aug 21, 2020 at 10:22 AM Sean Christopherson
 wrote:
>
> On Fri, Aug 21, 2020 at 06:52:29AM -0400, Paolo Bonzini wrote:
> > From: Sean Christopherson 
> >
> > Don't use RDPID in the paranoid entry flow, as it can consume a KVM
> > guest's MSR_TSC_AUX value if an NMI arrives during KVM's run loop.
> >
> > In general, the kernel does not need TSC_AUX because it can just use
> > __this_cpu_read(cpu_number) to read the current processor id.  It can
> > also just block preemption and thread migration at its will, therefore
> > it has no need for the atomic rdtsc+vgetcpu provided by RDTSCP.  For this
> > reason, as a performance optimization, KVM loads the guest's TSC_AUX when
> > a CPU first enters its run loop.  On AMD's SVM, it doesn't restore the
> > host's value until the CPU exits the run loop; VMX is even more aggressive
> > and defers restoring the host's value until the CPU returns to userspace.
> >
> > This optimization obviously relies on the kernel not consuming TSC_AUX,
> > which falls apart if an NMI arrives during the run loop and uses RDPID.
> > Removing it would be painful, as both SVM and VMX would need to context
> > switch the MSR on every VM-Enter (for a cost of 2x WRMSR), whereas using
> > LSL instead RDPID is a minor blip.
> >
> > Both SAVE_AND_SET_GSBASE and GET_PERCPU_BASE are only used in paranoid 
> > entry,
> > therefore the patch can just remove the RDPID alternative.
> >
> > Fixes: eaad981291ee3 ("x86/entry/64: Introduce the FIND_PERCPU_BASE macro")
> > Cc: Dave Hansen 
> > Cc: Chang Seok Bae 
> > Cc: Peter Zijlstra 
> > Cc: Sasha Levin 
> > Cc: Paolo Bonzini 
> > Cc: k...@vger.kernel.org
> > Reported-by: Tom Lendacky 
> > Debugged-by: Tom Lendacky 
> > Suggested-by: Andy Lutomirski 
> > Suggested-by: Peter Zijlstra 
> > Signed-off-by: Sean Christopherson 
> > Signed-off-by: Paolo Bonzini 
> > ---
> >  arch/x86/entry/calling.h | 10 ++
> >  1 file changed, 6 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
> > index 98e4d8886f11..ae9b0d4615b3 100644
> > --- a/arch/x86/entry/calling.h
> > +++ b/arch/x86/entry/calling.h
> > @@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel 
> > is built with
> >   * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
> >   * We normally use %gs for accessing per-CPU data, but we are setting up
> >   * %gs here and obviously can not use %gs itself to access per-CPU data.
> > + *
> > + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
> > + * may not restore the host's value until the CPU returns to userspace.
> > + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
> > + * while running KVM's run loop.
> >   */
> >  .macro GET_PERCPU_BASE reg:req
> > - ALTERNATIVE \
> > - "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \
> > - "RDPID  \reg", \
>
> This was the only user of the RDPID macro, I assume we want to yank that out
> as well?

No.  That one should be kept until the minimum binutils version is
raised to one that supports the RDPID opcode.

--
Brian Gerst

Re: [PATCH v2] x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM

2020-08-21 Thread Brian Gerst

On Fri, Aug 21, 2020 at 6:56 AM Paolo Bonzini  wrote:
>
> From: Sean Christopherson 
>
> Don't use RDPID in the paranoid entry flow, as it can consume a KVM
> guest's MSR_TSC_AUX value if an NMI arrives during KVM's run loop.
>
> In general, the kernel does not need TSC_AUX because it can just use
> __this_cpu_read(cpu_number) to read the current processor id.  It can
> also just block preemption and thread migration at its will, therefore
> it has no need for the atomic rdtsc+vgetcpu provided by RDTSCP.  For this
> reason, as a performance optimization, KVM loads the guest's TSC_AUX when
> a CPU first enters its run loop.  On AMD's SVM, it doesn't restore the
> host's value until the CPU exits the run loop; VMX is even more aggressive
> and defers restoring the host's value until the CPU returns to userspace.
>
> This optimization obviously relies on the kernel not consuming TSC_AUX,
> which falls apart if an NMI arrives during the run loop and uses RDPID.
> Removing it would be painful, as both SVM and VMX would need to context
> switch the MSR on every VM-Enter (for a cost of 2x WRMSR), whereas using
> LSL instead RDPID is a minor blip.
>
> Both SAVE_AND_SET_GSBASE and GET_PERCPU_BASE are only used in paranoid entry,
> therefore the patch can just remove the RDPID alternative.
>
> Fixes: eaad981291ee3 ("x86/entry/64: Introduce the FIND_PERCPU_BASE macro")
> Cc: Dave Hansen 
> Cc: Chang Seok Bae 
> Cc: Peter Zijlstra 
> Cc: Sasha Levin 
> Cc: Paolo Bonzini 
> Cc: k...@vger.kernel.org
> Reported-by: Tom Lendacky 
> Debugged-by: Tom Lendacky 
> Suggested-by: Andy Lutomirski 
> Suggested-by: Peter Zijlstra 
> Signed-off-by: Sean Christopherson 
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/entry/calling.h | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
> index 98e4d8886f11..ae9b0d4615b3 100644
> --- a/arch/x86/entry/calling.h
> +++ b/arch/x86/entry/calling.h
> @@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel 
> is built with
>   * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
>   * We normally use %gs for accessing per-CPU data, but we are setting up
>   * %gs here and obviously can not use %gs itself to access per-CPU data.
> + *
> + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
> + * may not restore the host's value until the CPU returns to userspace.
> + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
> + * while running KVM's run loop.
>   */
>  .macro GET_PERCPU_BASE reg:req
> -   ALTERNATIVE \
> -   "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \
> -   "RDPID  \reg", \
> -   X86_FEATURE_RDPID
> +   LOAD_CPU_AND_NODE_SEG_LIMIT \reg
> andq$VDSO_CPUNODE_MASK, \reg
> movq__per_cpu_offset(, \reg, 8), \reg
>  .endm

LOAD_CPU_AND_NODE_SEG_LIMIT can be merged into this, as its only
purpose was to work around using CPP macros in an alternative.

--
Brian Gerst

Re: [RFC][PATCH 4/7] x86/debug: Move historical SYSENTER junk into exc_debug_kernel()

2020-08-20 Thread Brian Gerst

On Thu, Aug 20, 2020 at 6:53 AM Peter Zijlstra  wrote:
>
>
> Signed-off-by: Peter Zijlstra (Intel) 
> ---
>  arch/x86/kernel/traps.c |   24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -820,18 +820,6 @@ static void handle_debug(struct pt_regs
> goto out;
> }
>
> -   if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
> -   /*
> -* Historical junk that used to handle SYSENTER 
> single-stepping.
> -* This should be unreachable now.  If we survive for a while
> -* without anyone hitting this warning, we'll turn this into
> -* an oops.
> -*/
> -   tsk->thread.debugreg6 &= ~DR_STEP;
> -   set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
> -   regs->flags &= ~X86_EFLAGS_TF;
> -   }
> -
> si_code = get_si_code(tsk->thread.debugreg6);
> if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
> send_sigtrap(regs, 0, si_code);
> @@ -874,6 +862,18 @@ static __always_inline void exc_debug_ke
> if (kprobe_debug_handler(regs))
> goto out;
>
> +   if (WARN_ON_ONCE(dr6 & DR_STEP)) {
> +   /*
> +* Historical junk that used to handle SYSENTER 
> single-stepping.
> +* This should be unreachable now.  If we survive for a while
> +* without anyone hitting this warning, we'll turn this into
> +* an oops.
> +*/
> +   dr6 &= ~DR_STEP;
> +   set_thread_flag(TIF_SINGLESTEP);
> +   regs->flags &= ~X86_EFLAGS_TF;
> +   }
> +
> handle_debug(regs, dr6, false);
>
>  out:

Can this be removed or changed to a BUG()?  The warning has been there
since 2016 and nobody has apparently complained about it.

--
Brian Gerst

Re: [tip: x86/entry] x86/entry: Consolidate 32/64 bit syscall entry

2020-07-26 Thread Brian Gerst

 __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
>  {
> -   enter_from_user_mode(regs);
> -   instrumentation_begin();
> +   unsigned int nr = syscall_32_enter(regs);
>
> -   local_irq_enable();
> -   do_syscall_32_irqs_on(regs);
> -
> -   instrumentation_end();
> -   exit_to_user_mode();
> +   do_syscall_32_irqs_on(regs, nr);
> +   syscall_return_slowpath(regs);
>  }
>
> -static bool __do_fast_syscall_32(struct pt_regs *regs)
> +static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)

Can __do_fast_syscall_32() be merged back into do_fast_syscall_32()
now that both are marked noinstr?

--
Brian Gerst

[tip: x86/asm] x86/percpu: Remove unused PER_CPU() macro

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: 4719ffecbb0659faf1fd39f4b8eb2674f0042890
Gitweb:
https://git.kernel.org/tip/4719ffecbb0659faf1fd39f4b8eb2674f0042890
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:24 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:43 +02:00

x86/percpu: Remove unused PER_CPU() macro

Also remove now unused __percpu_mov_op.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Linus Torvalds 
Acked-by: Dennis Zhou 
Link: 
https://lkml.kernel.org/r/20200720204925.3654302-11-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 18 --
 1 file changed, 18 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index cf2b9c2..a3c33b7 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -4,33 +4,15 @@
 
 #ifdef CONFIG_X86_64
 #define __percpu_seg   gs
-#define __percpu_mov_opmovq
 #else
 #define __percpu_seg   fs
-#define __percpu_mov_opmovl
 #endif
 
 #ifdef __ASSEMBLY__
 
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *var - variable name
- *reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- *PER_CPU(cpu_gdt_descr, %ebx)
- */
 #ifdef CONFIG_SMP
-#define PER_CPU(var, reg)  \
-   __percpu_mov_op %__percpu_seg:this_cpu_off, reg;\
-   lea var(reg), reg
 #define PER_CPU_VAR(var)   %__percpu_seg:var
 #else /* ! SMP */
-#define PER_CPU(var, reg)  __percpu_mov_op $var, reg
 #define PER_CPU_VAR(var)   var
 #endif /* SMP */

[tip: x86/asm] x86/percpu: Clean up percpu_stable_op()

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: c94055fe93c8d00bfa23fa2cb9af080f7fc53aa0
Gitweb:
https://git.kernel.org/tip/c94055fe93c8d00bfa23fa2cb9af080f7fc53aa0
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:23 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:42 +02:00

x86/percpu: Clean up percpu_stable_op()

Use __pcpu_size_call_return() to simplify this_cpu_read_stable().
Also remove __bad_percpu_size() which is now unused.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: 
https://lkml.kernel.org/r/20200720204925.3654302-10-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 41 +-
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 7efc0b5..cf2b9c2 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -85,7 +85,6 @@
 
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
-extern void __bad_percpu_size(void);
 
 #define __pcpu_type_1 u8
 #define __pcpu_type_2 u16
@@ -167,33 +166,13 @@ do {  
\
(typeof(_var))(unsigned long) pfo_val__;\
 })
 
-#define percpu_stable_op(op, var)  \
-({ \
-   typeof(var) pfo_ret__;  \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm(op "b "__percpu_arg(P1)",%0"\
-   : "=q" (pfo_ret__)  \
-   : "p" (&(var)));\
-   break;  \
-   case 2: \
-   asm(op "w "__percpu_arg(P1)",%0"\
-   : "=r" (pfo_ret__)  \
-   : "p" (&(var)));\
-   break;  \
-   case 4: \
-   asm(op "l "__percpu_arg(P1)",%0"\
-   : "=r" (pfo_ret__)  \
-   : "p" (&(var)));\
-   break;  \
-   case 8: \
-   asm(op "q "__percpu_arg(P1)",%0"\
-   : "=r" (pfo_ret__)  \
-   : "p" (&(var)));\
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pfo_ret__;  \
+#define percpu_stable_op(size, op, _var)   \
+({ \
+   __pcpu_type_##size pfo_val__;   \
+   asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]")   \
+   : [val] __pcpu_reg_##size("=", pfo_val__)   \
+   : [var] "p" (&(_var))); \
+   (typeof(_var))(unsigned long) pfo_val__;\
 })
 
 /*
@@ -258,7 +237,11 @@ do {   
\
  * per-thread variables implemented as per-cpu variables and thus
  * stable for the duration of the respective task.
  */
-#define this_cpu_read_stable(var)  percpu_stable_op("mov", var)
+#define this_cpu_read_stable_1(pcp)percpu_stable_op(1, "mov", pcp)
+#define this_cpu_read_stable_2(pcp)percpu_stable_op(2, "mov", pcp)
+#define this_cpu_read_stable_4(pcp)percpu_stable_op(4, "mov", pcp)
+#define this_cpu_read_stable_8(pcp)percpu_stable_op(8, "mov", pcp)
+#define this_cpu_read_stable(pcp)  
__pcpu_size_call_return(this_cpu_read_stable_, pcp)
 
 #define raw_cpu_read_1(pcp)percpu_from_op(1, , "mov", pcp)
 #define raw_cpu_read_2(pcp)percpu_from_op(2, , "mov", pcp)

[tip: x86/asm] x86/percpu: Clean up percpu_cmpxchg_op()

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: ebcd580bed4a357ea894e6878d9099b3919f727f
Gitweb:
https://git.kernel.org/tip/ebcd580bed4a357ea894e6878d9099b3919f727f
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:22 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:42 +02:00

x86/percpu: Clean up percpu_cmpxchg_op()

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: https://lkml.kernel.org/r/20200720204925.3654302-9-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 58 ++
 1 file changed, 18 insertions(+), 40 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ac6d7e7..7efc0b5 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -236,39 +236,17 @@ do {  
\
  * cmpxchg has no such implied lock semantics as a result it is much
  * more efficient for cpu local operations.
  */
-#define percpu_cmpxchg_op(qual, var, oval, nval)   \
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval)  \
 ({ \
-   typeof(var) pco_ret__;  \
-   typeof(var) pco_old__ = (oval); \
-   typeof(var) pco_new__ = (nval); \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("cmpxchgb %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "q" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 2: \
-   asm qual ("cmpxchgw %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 4: \
-   asm qual ("cmpxchgl %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 8: \
-   asm qual ("cmpxchgq %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pco_ret__;  \
+   __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval);   \
+   __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval);   \
+   asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]",   \
+   __percpu_arg([var]))\
+ : [oval] "+a" (pco_old__),\
+   [var] "+m" (_var)   \
+ : [nval] __pcpu_reg_##size(, pco_new__)   \
+ : "memory");  \
+   (typeof(_var))(unsigned long) pco_old__;

[tip: x86/asm] x86/percpu: Clean up percpu_add_return_op()

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: bbff583b84a130d4d1234d68906c41690575be36
Gitweb:
https://git.kernel.org/tip/bbff583b84a130d4d1234d68906c41690575be36
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:20 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:41 +02:00

x86/percpu: Clean up percpu_add_return_op()

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: https://lkml.kernel.org/r/20200720204925.3654302-7-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 51 ++
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 9bb5440..0776a11 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -199,34 +199,15 @@ do {  
\
 /*
  * Add return operation
  */
-#define percpu_add_return_op(qual, var, val)   \
+#define percpu_add_return_op(size, qual, _var, _val)   \
 ({ \
-   typeof(var) paro_ret__ = val;   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("xaddb %0, "__percpu_arg(1)   \
-   : "+q" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 2: \
-   asm qual ("xaddw %0, "__percpu_arg(1)   \
-   : "+r" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 4: \
-   asm qual ("xaddl %0, "__percpu_arg(1)   \
-   : "+r" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 8: \
-   asm qual ("xaddq %0, "__percpu_arg(1)   \
-   : "+r" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   paro_ret__ += val;  \
-   paro_ret__; \
+   __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val);   \
+   asm qual (__pcpu_op2_##size("xadd", "%[tmp]",   \
+__percpu_arg([var]))   \
+ : [tmp] __pcpu_reg_##size("+", paro_tmp__),   \
+   [var] "+m" (_var)   \
+ : : "memory");\
+   (typeof(_var))(unsigned long) (paro_tmp__ + _val);  \
 })
 
 /*
@@ -377,16 +358,16 @@ do {  
\
 #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
 #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
 
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, 
val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, 
val)
+#d

[tip: x86/asm] x86/percpu: Clean up percpu_add_op()

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: 33e5614a435ff8047d768e6501454ae1cc7f131f
Gitweb:
https://git.kernel.org/tip/33e5614a435ff8047d768e6501454ae1cc7f131f
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:18 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:40 +02:00

x86/percpu: Clean up percpu_add_op()

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: https://lkml.kernel.org/r/20200720204925.3654302-5-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h |  99 +++--
 1 file changed, 22 insertions(+), 77 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a40d2e0..2a24f3c 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -130,64 +130,32 @@ do {  
\
: [val] __pcpu_reg_imm_##size(pto_val__));  \
 } while (0)
 
+#define percpu_unary_op(size, qual, op, _var)  \
+({ \
+   asm qual (__pcpu_op1_##size(op, __percpu_arg([var]))\
+   : [var] "+m" (_var));   \
+})
+
 /*
  * Generate a percpu add to memory instruction and optimize code
  * if one is added or subtracted.
  */
-#define percpu_add_op(qual, var, val)  \
+#define percpu_add_op(size, qual, var, val)\
 do {   \
-   typedef typeof(var) pao_T__;\
const int pao_ID__ = (__builtin_constant_p(val) &&  \
  ((val) == 1 || (val) == -1)) ?\
(int)(val) : 0; \
if (0) {\
-   pao_T__ pao_tmp__;  \
+   typeof(var) pao_tmp__;  \
pao_tmp__ = (val);  \
(void)pao_tmp__;\
}   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
-   else\
-   asm qual ("addb %1, "__percpu_arg(0)\
-   : "+m" (var)\
-   : "qi" ((pao_T__)(val)));   \
-   break;  \
-   case 2: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
-   else\
-   asm qual ("addw %1, "__percpu_arg(0)\
-   : "+m" (var)\
-   : "ri" ((pao_T__)(val)));   \
-   break;  \
-   case 4: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decl "__percpu_arg(0) : "+m"

[tip: x86/asm] x86/percpu: Clean up percpu_from_op()

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: bb631e3002840706362a7d76e3ebb3604cce91a7
Gitweb:
https://git.kernel.org/tip/bb631e3002840706362a7d76e3ebb3604cce91a7
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:17 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:39 +02:00

x86/percpu: Clean up percpu_from_op()

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: https://lkml.kernel.org/r/20200720204925.3654302-4-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 50 ++
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index fb280fb..a40d2e0 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -190,33 +190,13 @@ do {  
\
}   \
 } while (0)
 
-#define percpu_from_op(qual, op, var)  \
-({ \
-   typeof(var) pfo_ret__;  \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual (op "b "__percpu_arg(1)",%0"   \
-   : "=q" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 2: \
-   asm qual (op "w "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 4: \
-   asm qual (op "l "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 8: \
-   asm qual (op "q "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pfo_ret__;  \
+#define percpu_from_op(size, qual, op, _var)   \
+({ \
+   __pcpu_type_##size pfo_val__;   \
+   asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]")  \
+   : [val] __pcpu_reg_##size("=", pfo_val__)   \
+   : [var] "m" (_var));\
+   (typeof(_var))(unsigned long) pfo_val__;\
 })
 
 #define percpu_stable_op(op, var)  \
@@ -401,9 +381,9 @@ do {
\
  */
 #define this_cpu_read_stable(var)  percpu_stable_op("mov", var)
 
-#define raw_cpu_read_1(pcp)percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_2(pcp)percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_4(pcp)percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_1(pcp)percpu_from_op(1, , "mov", pcp)
+#define raw_cpu_read_2(pcp)percpu_from_op(2, , "mov", pcp)
+#define raw_cpu_read_4(pcp)percpu_from_op(4, , "mov", pcp)
 
 #define raw_cpu_write_1(pcp, val)  percpu_to_op(1, , "mov", (pcp), val)
 #define raw_cpu_write_2(pcp, val)  percpu_to_op(2, , "mov", (pcp), val)
@@ -433,9 +413,9 @@ do {
\
 #define raw_cpu_xchg_2(pcp, val)   raw_percpu_xchg_op(pcp, val)
 #define raw_cpu_xchg_4(pcp, val)   raw_percpu_xchg_op(pcp, val)
 
-#define this_cpu_read_1(pcp)   percpu_from_op(volatile, "mov", pcp)

[tip: x86/asm] x86/percpu: Clean up percpu_to_op()

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: c175acc14719e69ecec4dafbb642a7f38c76c064
Gitweb:
https://git.kernel.org/tip/c175acc14719e69ecec4dafbb642a7f38c76c064
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:16 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:39 +02:00

x86/percpu: Clean up percpu_to_op()

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: https://lkml.kernel.org/r/20200720204925.3654302-3-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 90 +-
 1 file changed, 35 insertions(+), 55 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 19838e4..fb280fb 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -117,37 +117,17 @@ extern void __bad_percpu_size(void);
 #define __pcpu_reg_imm_4(x) "ri" (x)
 #define __pcpu_reg_imm_8(x) "re" (x)
 
-#define percpu_to_op(qual, op, var, val)   \
-do {   \
-   typedef typeof(var) pto_T__;\
-   if (0) {\
-   pto_T__ pto_tmp__;  \
-   pto_tmp__ = (val);  \
-   (void)pto_tmp__;\
-   }   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual (op "b %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "qi" ((pto_T__)(val)));   \
-   break;  \
-   case 2: \
-   asm qual (op "w %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "ri" ((pto_T__)(val)));   \
-   break;  \
-   case 4: \
-   asm qual (op "l %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "ri" ((pto_T__)(val)));   \
-   break;  \
-   case 8: \
-   asm qual (op "q %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "re" ((pto_T__)(val)));   \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
+#define percpu_to_op(size, qual, op, _var, _val)   \
+do {   \
+   __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);\
+   if (0) {\
+   typeof(_var) pto_tmp__; \
+   pto_tmp__ = (_val); \
+   (void)pto_tmp__;\
+   }   \
+   asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var]))   \
+   : [var] "+m" (_var) \
+   : [val] __pcpu_reg_imm_##size(pto_val__));  \
 } while (0)
 
 /*
@@ -425,18 +405,18 @@ do {  
\
 #define raw_cpu_read_2(pcp)percpu_from_op(, "mov", pcp)
 #define raw_cpu_read_4(pcp)percpu_from_op(, "mov", pcp)
 
-#define raw_cpu_write_1(pcp, val)  percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val)  percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val)  percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_1(pcp, val)  percpu_to_op(1, , "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val)  percpu_to_op(2, , "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val)  percpu_to_op(4, , "mov", (pcp), v

[tip: x86/asm] x86/percpu: Clean up percpu_xchg_op()

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: 73ca542fbabb68deaa90130a8153cab1fa8288fe
Gitweb:
https://git.kernel.org/tip/73ca542fbabb68deaa90130a8153cab1fa8288fe
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:21 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:41 +02:00

x86/percpu: Clean up percpu_xchg_op()

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: https://lkml.kernel.org/r/20200720204925.3654302-8-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 61 ++
 1 file changed, 18 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0776a11..ac6d7e7 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -215,46 +215,21 @@ do {  
\
  * expensive due to the implied lock prefix.  The processor cannot prefetch
  * cachelines if xchg is used.
  */
-#define percpu_xchg_op(qual, var, nval)
\
+#define percpu_xchg_op(size, qual, _var, _nval)
\
 ({ \
-   typeof(var) pxo_ret__;  \
-   typeof(var) pxo_new__ = (nval); \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%al"  \
-   "\n1:\tcmpxchgb %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "q" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 2: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%ax"  \
-   "\n1:\tcmpxchgw %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 4: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
-   "\n1:\tcmpxchgl %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 8: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
-   "\n1:\tcmpxchgq %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pxo_ret__;  \
+   __pcpu_type_##size pxo_old__;   \
+   __pcpu_type_##size pxo

[tip: x86/asm] x86/percpu: Introduce size abstraction macros

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: 6865dc3ae93b9acb336ca48bd7b2db3446d89370
Gitweb:
https://git.kernel.org/tip/6865dc3ae93b9acb336ca48bd7b2db3446d89370
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:15 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:39 +02:00

x86/percpu: Introduce size abstraction macros

In preparation for cleaning up the percpu operations, define macros for
abstraction based on the width of the operation.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: https://lkml.kernel.org/r/20200720204925.3654302-2-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 2278797..19838e4 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -87,6 +87,36 @@
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);
 
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
+
+#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0x))
+#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0x))
+#define __pcpu_cast_8(val) ((u64)(val))
+
+#define __pcpu_op1_1(op, dst) op "b " dst
+#define __pcpu_op1_2(op, dst) op "w " dst
+#define __pcpu_op1_4(op, dst) op "l " dst
+#define __pcpu_op1_8(op, dst) op "q " dst
+
+#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
+#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
+#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
+#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
+
+#define __pcpu_reg_1(mod, x) mod "q" (x)
+#define __pcpu_reg_2(mod, x) mod "r" (x)
+#define __pcpu_reg_4(mod, x) mod "r" (x)
+#define __pcpu_reg_8(mod, x) mod "r" (x)
+
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
+
 #define percpu_to_op(qual, op, var, val)   \
 do {   \
typedef typeof(var) pto_T__;\

[tip: x86/asm] x86/percpu: Remove "e" constraint from XADD

2020-07-23 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: e4d16defbbde028aeab2026995f0ced4240df6d6
Gitweb:
https://git.kernel.org/tip/e4d16defbbde028aeab2026995f0ced4240df6d6
Author:Brian Gerst 
AuthorDate:Mon, 20 Jul 2020 13:49:19 -07:00
Committer: Thomas Gleixner 
CommitterDate: Thu, 23 Jul 2020 11:46:40 +02:00

x86/percpu: Remove "e" constraint from XADD

The "e" constraint represents a constant, but the XADD instruction doesn't
accept immediate operands.

Signed-off-by: Brian Gerst 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Thomas Gleixner 
Tested-by: Nick Desaulniers 
Tested-by: Sedat Dilek 
Reviewed-by: Nick Desaulniers 
Acked-by: Linus Torvalds 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Dennis Zhou 
Link: https://lkml.kernel.org/r/20200720204925.3654302-6-ndesaulni...@google.com

---
 arch/x86/include/asm/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 2a24f3c..9bb5440 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -220,7 +220,7 @@ do {
\
break;  \
case 8: \
asm qual ("xaddq %0, "__percpu_arg(1)   \
-   : "+re" (paro_ret__), "+m" (var)\
+   : "+r" (paro_ret__), "+m" (var) \
: : "memory");  \
break;  \
default: __bad_percpu_size();   \

[PATCH 3/3] x86: Clean up do_fast_syscall_32() tests

2020-07-18 Thread Brian Gerst

Reorganize the tests for SYSEXITS/SYSRETL, cleaning up comments and merging
native and compat code.

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/common.c  | 85 ++--
 arch/x86/entry/entry_32.S|  6 +--
 arch/x86/entry/entry_64_compat.S | 13 ++---
 arch/x86/include/asm/segment.h   |  1 +
 arch/x86/include/asm/syscall.h   |  2 +-
 5 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index bdb4c15b8610e..df1497fa554b8 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -500,10 +500,24 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs 
*regs)
exit_to_user_mode();
 }
 
+/* Returns true to return using SYSEXIT/SYSRETL, or false to return using IRET 
*/
 static bool __do_fast_syscall_32(struct pt_regs *regs)
 {
+   /*
+* Called using the internal vDSO SYSENTER/SYSCALL32 calling
+* convention.  Adjust regs so it looks like we entered using int80.
+*/
+   unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
+   vdso_image_32.sym_int80_landing_pad;
int res;
 
+   /*
+* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
+* so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
+* Fix it up.
+*/
+   regs->ip = landing_pad;
+
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
/*
@@ -522,34 +536,39 @@ static bool __do_fast_syscall_32(struct pt_regs *regs)
regs->ax = -EFAULT;
local_irq_disable();
__prepare_exit_to_usermode(regs);
+   /* Keep it simple: use IRET. */
return false;
}
 
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs);
+
+   /* XEN PV guests always use IRET path */
+   if (static_cpu_has(X86_FEATURE_XENPV))
+   return false;
+
+   /* CS and SS must match values set in MSR_STAR */
+   if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS))
+   return false;
+
+   /* EIP must point to the VDSO landing pad */
+   if (unlikely(regs->ip != landing_pad))
+   return false;
+
+   /* The TF, RF, and VM flags must be restored with IRET */
+   if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | 
X86_EFLAGS_VM)))
+   return false;
+
+   /* Return with SYSEXIT/SYSRETL */
return true;
 }
 
-/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
-__visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
+__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
 {
-   /*
-* Called using the internal vDSO SYSENTER/SYSCALL32 calling
-* convention.  Adjust regs so it looks like we entered using int80.
-*/
-   unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
-   vdso_image_32.sym_int80_landing_pad;
bool success;
 
check_user_regs(regs);
 
-   /*
-* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
-* so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
-* Fix it up.
-*/
-   regs->ip = landing_pad;
-
enter_from_user_mode();
instrumentation_begin();
 
@@ -559,42 +578,10 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs 
*regs)
instrumentation_end();
exit_to_user_mode();
 
-   /* If it failed, keep it simple: use IRET. */
-   if (!success)
-   return 0;
-
-#ifdef CONFIG_X86_64
-   /*
-* Opportunistic SYSRETL: if possible, try to return using SYSRETL.
-* SYSRETL is available on all 64-bit CPUs, so we don't need to
-* bother with SYSEXIT.
-*
-* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
-* because the ECX fixup above will ensure that this is essentially
-* never the case.
-*/
-   return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
-   regs->ip == landing_pad &&
-   (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
-#else
-   /*
-* Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
-*
-* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
-* because the ECX fixup above will ensure that this is essentially
-* never the case.
-*
-* We don't allow syscalls at all from VM86 mode, but we still
-* need to check VM, because we might be returning from sys_vm86.
-*/
-   return regs->cs == __USER_CS && regs->ss == __USER_DS &&
-   regs->ip == l

[PATCH 1/3] x86-64: Move SYSRET validation code to C

2020-07-18 Thread Brian Gerst

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/calling.h   | 10 +
 arch/x86/entry/common.c| 56 ++-
 arch/x86/entry/entry_64.S  | 71 ++
 arch/x86/include/asm/syscall.h |  2 +-
 4 files changed, 60 insertions(+), 79 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 98e4d8886f11c..904477d3e388f 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -147,27 +147,19 @@ For 32-bit we have the following conventions - kernel is 
built with
 
 .endm
 
-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
+.macro POP_REGS pop_rdi=1
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
-   .if \skip_r11rcx
-   popq %rsi
-   .else
popq %r11
-   .endif
popq %r10
popq %r9
popq %r8
popq %rax
-   .if \skip_r11rcx
-   popq %rsi
-   .else
popq %rcx
-   .endif
popq %rdx
popq %rsi
.if \pop_rdi
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 54ad1890aefca..9e01445f6679c 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -365,9 +365,11 @@ __visible noinstr void syscall_return_slowpath(struct 
pt_regs *regs)
 }
 
 #ifdef CONFIG_X86_64
-__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+__visible noinstr bool do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
struct thread_info *ti;
+   long rip;
+   unsigned int shift_rip;
 
check_user_regs(regs);
 
@@ -394,6 +396,58 @@ __visible noinstr void do_syscall_64(unsigned long nr, 
struct pt_regs *regs)
 
instrumentation_end();
exit_to_user_mode();
+
+   /*
+* Check that the register state is valid for using SYSRET to exit
+* to userspace.  Otherwise use the slower IRET exit path.
+*/
+
+   /* SYSRET requires RCX == RIP and R11 = EFLAGS */
+   if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+   return false;
+
+   /* CS and SS must match values set in MSR_STAR */
+   if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+   return false;
+
+   /*
+* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+* in kernel space.  This essentially lets the user take over
+* the kernel, since userspace controls RSP.
+*
+* Change top bits to match most significant bit (47th or 56th bit
+* depending on paging mode) in the address.
+*/
+   shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
+   rip = (long) regs->ip;
+   rip <<= shift_rip;
+   rip >>= shift_rip;
+   if (unlikely((unsigned long) rip != regs->ip))
+   return false;
+
+   /*
+* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+* restore RF properly. If the slowpath sets it for whatever reason, we
+* need to restore it correctly.
+*
+* SYSRET can restore TF, but unlike IRET, restoring TF results in a
+* trap from userspace immediately after SYSRET.  This would cause an
+* infinite loop whenever #DB happens with register state that satisfies
+* the opportunistic SYSRET conditions.  For example, single-stepping
+* this user code:
+*
+*   movq   $stuck_here, %rcx
+*   pushfq
+*   popq %r11
+*   stuck_here:
+*
+* would never get past 'stuck_here'.
+*/
+   if (unlikely(regs->flags & (X86_EFLAGS_RF|X86_EFLAGS_TF)))
+   return false;
+
+   /* Use SYSRET to exit to userspace */
+   return true;
 }
 #endif
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index fb729f4c4fbc2..b8025a62ac5e8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -117,80 +117,15 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, 
SYM_L_GLOBAL)
movq%rsp, %rsi
calldo_syscall_64   /* returns with IRQs disabled */
 
-   /*
-* Try to use SYSRET instead of IRET if we're returning to
-* a completely clean 64-bit userspace context.  If we're not,
-* go to the slow exit path.
-*/
-   movqRCX(%rsp), %rcx
-   movqRIP(%rsp), %r11
-
-   cmpq%rcx, %r11  /* SYSRET requires RCX == RIP */
-   jne swapgs_restore_regs_and_return_to_usermode
-
-   /*
-* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
-* in kernel space.  This essentially lets the user take over
-* the kernel, since userspace controls RSP.
-*
-* If width of "canonical tail" ever becomes variable, this will need
-* to be updated to remain correct on both old and new CPUs.
-*
-* Change top

[PATCH 0/3] x86: Clean up SYSRET/SYSEXIT validation

2020-07-18 Thread Brian Gerst

This series cleans up the tests for using the SYSRET or SYSEXIT
instructions on exit from a syscall, moving some code from assembly to C
and merging native and compat tests.

Brian Gerst (3):
  x86-64: Move SYSRET validation code to C
  x86-32: Remove SEP test for SYSEXIT
  x86: Clean up do_fast_syscall_32() tests

 arch/x86/entry/calling.h |  10 +--
 arch/x86/entry/common.c  | 142 ---
 arch/x86/entry/entry_32.S|   6 +-
 arch/x86/entry/entry_64.S|  71 +---
 arch/x86/entry/entry_64_compat.S |  13 +--
 arch/x86/include/asm/segment.h   |   1 +
 arch/x86/include/asm/syscall.h   |   4 +-
 7 files changed, 108 insertions(+), 139 deletions(-)


base-commit: bccf9048549afe54b3c6bc8979ebfddea748da85
-- 
2.26.2

[PATCH 2/3] x86-32: Remove SEP test for SYSEXIT

2020-07-18 Thread Brian Gerst

SEP must be present in order for do_fast_syscall_32() to be called on native
32-bit.  Therefore the check here is redundant.

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 9e01445f6679c..bdb4c15b8610e 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -587,8 +587,7 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs 
*regs)
 * We don't allow syscalls at all from VM86 mode, but we still
 * need to check VM, because we might be returning from sys_vm86.
 */
-   return static_cpu_has(X86_FEATURE_SEP) &&
-   regs->cs == __USER_CS && regs->ss == __USER_DS &&
+   return regs->cs == __USER_CS && regs->ss == __USER_DS &&
regs->ip == landing_pad &&
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) 
== 0;
 #endif
-- 
2.26.2

Re: [PATCH 0/2] X32 syscall cleanups

2020-07-14 Thread Brian Gerst

On Tue, Jul 14, 2020 at 2:40 AM Christoph Hellwig  wrote:
>
> On Tue, Jun 16, 2020 at 10:23:13AM -0400, Brian Gerst wrote:
> > Christoph Hellwig uncovered an issue with how we currently handle X32
> > syscalls.  Currently, we can only use COMPAT_SYS_DEFINEx() for X32
> > specific syscalls.  These changes remove that restriction and allow
> > native syscalls.
>
> Did this go anywhere?

This approach wasn't well received, so I'd just go with this as the
simplest solution:
https://lore.kernel.org/lkml/CAK8P3a17h782gO65qJ9Mmz0EuiTSKQPEyr_=nvqotnmqzuh...@mail.gmail.com/

--
Brian Gerst

Re: [PATCH] x86/entry: add compatibility with IAS

2020-07-12 Thread Brian Gerst

On Sun, Jul 12, 2020 at 9:26 PM Jian Cai  wrote:
>
> Clang's integrated assembler does not allow symbols with non-absolute
> values to be reassigned. This patch allows the affected code to be
> compatible with IAS.
>
> Link: https://github.com/ClangBuiltLinux/linux/issues/1043
> Reported-by: Nick Desaulniers 
> Reported-by: Sedat Dilek 
> Suggested-by: Nick Desaulniers 
> Tested-by: Sedat Dilek 
> Signed-off-by: Jian Cai 
> ---
>  arch/x86/include/asm/idtentry.h | 22 ++
>  1 file changed, 10 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
> index f3d70830bf2a..77beed2cd6d9 100644
> --- a/arch/x86/include/asm/idtentry.h
> +++ b/arch/x86/include/asm/idtentry.h
> @@ -468,34 +468,32 @@ __visible noinstr void func(struct pt_regs *regs,   
>   \
>   */
> .align 8
>  SYM_CODE_START(irq_entries_start)
> -vector=FIRST_EXTERNAL_VECTOR
> -pos = .
> +i = 1

Start with index 0.

> +pos1 = .

pos1 is unnecessary, as it's equal to irq_entries_start.

>  .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
> UNWIND_HINT_IRET_REGS
> -   .byte   0x6a, vector
> +   .byte   0x6a, FIRST_EXTERNAL_VECTOR + i - 1
> jmp asm_common_interrupt
> nop
> /* Ensure that the above is 8 bytes max */
> -   . = pos + 8
> -pos=pos+8
> -vector=vector+1
> +   . = pos1 + 8 * i
> +   i = i + 1

If you swap these two lines then the index will be correct for the
next stub if you start at 0..

>  .endr
>  SYM_CODE_END(irq_entries_start)
>
>  #ifdef CONFIG_X86_LOCAL_APIC
> .align 8
>  SYM_CODE_START(spurious_entries_start)
> -vector=FIRST_SYSTEM_VECTOR
> -pos = .
> +i = 1
> +pos2 = .
>  .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
> UNWIND_HINT_IRET_REGS
> -   .byte   0x6a, vector
> +   .byte   0x6a, FIRST_SYSTEM_VECTOR + i - 1
> jmp asm_spurious_interrupt
> nop
> /* Ensure that the above is 8 bytes max */
> -   . = pos + 8
> -pos=pos+8
> -vector=vector+1
> +   . = pos2 + 8 * i
> +   i = i + 1
>  .endr
>  SYM_CODE_END(spurious_entries_start)
>  #endif

--
Brian Gerst

Re: [PATCH v2 02/10] x86/percpu: Clean up percpu_to_op()

2020-07-09 Thread Brian Gerst

On Thu, Jul 9, 2020 at 6:30 AM Peter Zijlstra  wrote:
>
> On Sat, May 30, 2020 at 06:11:19PM -0400, Brian Gerst wrote:
> > + if (0) {\
> > + typeof(_var) pto_tmp__; \
> > + pto_tmp__ = (_val); \
> > + (void)pto_tmp__;\
> > + }   \
>
> This is repeated at least once more; and it looks very similar to
> __typecheck() and typecheck() but is yet another variant afaict.

The problem with typecheck() is that it will complain about a mismatch
between unsigned long and u64 (defined as unsigned long long) even
though both are 64-bits wide on x86-64.  Cleaning that mess up is
beyond the scope of this series, so I kept the existing checks.

--
Brian Gerst

Re: [PATCH 16/17] init: open code setting up stdin/stdout/stderr

2020-07-09 Thread Brian Gerst

On Thu, Jul 9, 2020 at 11:19 AM Christoph Hellwig  wrote:
>
> Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but
> instead open a struct file for /dev/console and then install it as FD
> 0/1/2 manually.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  fs/file.c|  7 +--
>  include/linux/syscalls.h |  1 -
>  init/main.c  | 16 ++--
>  3 files changed, 11 insertions(+), 13 deletions(-)
>
> diff --git a/fs/file.c b/fs/file.c
> index abb8b7081d7a44..85b7993165dd2f 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -985,7 +985,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, 
> newfd)
> return ksys_dup3(oldfd, newfd, 0);
>  }
>
> -int ksys_dup(unsigned int fildes)
> +SYSCALL_DEFINE1(dup, unsigned int, fildes)
>  {
> int ret = -EBADF;
> struct file *file = fget_raw(fildes);
> @@ -1000,11 +1000,6 @@ int ksys_dup(unsigned int fildes)
> return ret;
>  }
>
> -SYSCALL_DEFINE1(dup, unsigned int, fildes)
> -{
> -   return ksys_dup(fildes);
> -}
> -

Please split the removal of the now unused ksys_*() functions into a
separate patch.

--
Brian Gerst

Re: [PATCH v2 0/4] Remove 32-bit Xen PV guest support

2020-07-02 Thread Brian Gerst

On Wed, Jul 1, 2020 at 7:07 AM Juergen Gross  wrote:
>
> The long term plan has been to replace Xen PV guests by PVH. The first
> victim of that plan are now 32-bit PV guests, as those are used only
> rather seldom these days. Xen on x86 requires 64-bit support and with
> Grub2 now supporting PVH officially since version 2.04 there is no
> need to keep 32-bit PV guest support alive in the Linux kernel.
> Additionally Meltdown mitigation is not available in the kernel running
> as 32-bit PV guest, so dropping this mode makes sense from security
> point of view, too.

One thing that you missed is removing VDSO_NOTE_NONEGSEG_BIT from
vdso32/note.S.  With that removed there is no difference from the
64-bit version.

Otherwise this series looks good to me.
--
Brian Gerst

Re: [PATCH v2 1/4] x86/xen: remove 32-bit Xen PV guest support

2020-07-01 Thread Brian Gerst

On Wed, Jul 1, 2020 at 7:08 AM Juergen Gross  wrote:
>
> Xen is requiring 64-bit machines today and since Xen 4.14 it can be
> built without 32-bit PV guest support. There is no need to carry the
> burden of 32-bit PV guest support in the kernel any longer, as new
> guests can be either HVM or PVH, or they can use a 64 bit kernel.
>
> Remove the 32-bit Xen PV support from the kernel.

If you send a v3, it would be better to split the move of the 64-bit
code into xen-asm.S into a separate patch.

--
Brian Gerst

Re: [PATCH 3/6] x86/entry/64/compat: Fix Xen PV SYSENTER frame setup

2020-07-01 Thread Brian Gerst

On Fri, Jun 26, 2020 at 1:30 PM Andy Lutomirski  wrote:
>
> The SYSENTER frame setup was nonsense.  It worked by accident
> because the normal code into which the Xen asm jumped
> (entry_SYSENTER_32/compat) threw away SP without touching the stack.
> entry_SYSENTER_compat was recently modified such that it relied on
> having a valid stack pointer, so now the Xen asm needs to invoke it
> with a valid stack.
>
> Fix it up like SYSCALL: use the Xen-provided frame and skip the bare
> metal prologue.
>
> Cc: Boris Ostrovsky 
> Cc: Juergen Gross 
> Cc: Stefano Stabellini 
> Cc: xen-de...@lists.xenproject.org
> Fixes: 1c3e5d3f60e2 ("x86/entry: Make entry_64_compat.S objtool clean")
> Signed-off-by: Andy Lutomirski 
> ---
>  arch/x86/entry/entry_64_compat.S |  1 +
>  arch/x86/xen/xen-asm_64.S| 20 
>  2 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64_compat.S 
> b/arch/x86/entry/entry_64_compat.S
> index 7b9d8150f652..381a6de7de9c 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -79,6 +79,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
> pushfq  /* pt_regs->flags (except IF = 0) */
> pushq   $__USER32_CS/* pt_regs->cs */
> pushq   $0  /* pt_regs->ip = 0 (placeholder) */
> +SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)

This skips over the section that truncates the syscall number to
32-bits.  The comments present some doubt that it is actually
necessary, but the Xen path shouldn't differ from native.  That code
should be moved after this new label.

--
Brian Gerst

[tip: x86/cpu] x86/stackprotector: Pre-initialize canary for secondary CPUs

2020-06-18 Thread tip-bot2 for Brian Gerst

The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: c9a1ff316bc9b1d1806a4366d0aef6e18833ba52
Gitweb:
https://git.kernel.org/tip/c9a1ff316bc9b1d1806a4366d0aef6e18833ba52
Author:Brian Gerst 
AuthorDate:Wed, 17 Jun 2020 18:56:24 -04:00
Committer: Borislav Petkov 
CommitterDate: Thu, 18 Jun 2020 13:09:17 +02:00

x86/stackprotector: Pre-initialize canary for secondary CPUs

The idle tasks created for each secondary CPU already have a random stack
canary generated by fork().  Copy the canary to the percpu variable before
starting the secondary CPU which removes the need to call
boot_init_stack_canary().

Signed-off-by: Brian Gerst 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20200617225624.799335-1-brge...@gmail.com
---
 arch/x86/include/asm/stackprotector.h | 12 
 arch/x86/kernel/smpboot.c | 14 ++
 arch/x86/xen/smp_pv.c |  2 --
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/stackprotector.h 
b/arch/x86/include/asm/stackprotector.h
index 9804a79..7fb482f 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -90,6 +90,15 @@ static __always_inline void boot_init_stack_canary(void)
 #endif
 }
 
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
+{
+#ifdef CONFIG_X86_64
+   per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
+#else
+   per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
+#endif
+}
+
 static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -119,6 +128,9 @@ static inline void load_stack_canary_segment(void)
 static inline void setup_stack_canary_segment(int cpu)
 { }
 
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
+{ }
+
 static inline void load_stack_canary_segment(void)
 {
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ffbd9a3..a11bd53 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -51,7 +51,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -80,6 +79,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -259,21 +259,10 @@ static void notrace start_secondary(void *unused)
/* enable local interrupts */
local_irq_enable();
 
-   /* to prevent fake stack check failure in clock setup */
-   boot_init_stack_canary();
-
x86_cpuinit.setup_percpu_clockev();
 
wmb();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
-
-   /*
-* Prevent tail call to cpu_startup_entry() because the stack protector
-* guard has been changed a couple of function calls up, in
-* boot_init_stack_canary() and must not be checked before tail calling
-* another function.
-*/
-   prevent_tail_call_optimization();
 }
 
 /**
@@ -1011,6 +1000,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct 
*idle)
alternatives_enable_smp();
 
per_cpu(current_task, cpu) = idle;
+   cpu_init_stack_canary(cpu, idle);
 
/* Initialize the interrupt stack(s) */
ret = irq_init_percpu_irqstack(cpu);
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 171aff1..9ea598d 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -92,9 +92,7 @@ static void cpu_bringup(void)
 asmlinkage __visible void cpu_bringup_and_idle(void)
 {
cpu_bringup();
-   boot_init_stack_canary();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
-   prevent_tail_call_optimization();
 }
 
 void xen_smp_intr_free_pv(unsigned int cpu)

[PATCH V2] x86/stackprotector: Pre-initialize canary for secondary CPUs

2020-06-17 Thread Brian Gerst

The idle tasks created for each secondary CPU already have a random stack
canary generated by fork().  Copy the canary to the percpu variable before
starting the secondary CPU which removes the need to call
boot_init_stack_canary().

Signed-off-by: Brian Gerst 
---

V2: Fixed stack protector disabled case

 arch/x86/include/asm/stackprotector.h | 12 
 arch/x86/kernel/smpboot.c | 14 ++
 arch/x86/xen/smp_pv.c |  2 --
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/stackprotector.h 
b/arch/x86/include/asm/stackprotector.h
index 9804a7957f4e..7fb482f0f25b 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -90,6 +90,15 @@ static __always_inline void boot_init_stack_canary(void)
 #endif
 }
 
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
+{
+#ifdef CONFIG_X86_64
+   per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
+#else
+   per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
+#endif
+}
+
 static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -119,6 +128,9 @@ static inline void load_stack_canary_segment(void)
 static inline void setup_stack_canary_segment(int cpu)
 { }
 
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
+{ }
+
 static inline void load_stack_canary_segment(void)
 {
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ffbd9a3d78d8..a11bd53c6911 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -51,7 +51,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -80,6 +79,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -259,21 +259,10 @@ static void notrace start_secondary(void *unused)
/* enable local interrupts */
local_irq_enable();
 
-   /* to prevent fake stack check failure in clock setup */
-   boot_init_stack_canary();
-
x86_cpuinit.setup_percpu_clockev();
 
wmb();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
-
-   /*
-* Prevent tail call to cpu_startup_entry() because the stack protector
-* guard has been changed a couple of function calls up, in
-* boot_init_stack_canary() and must not be checked before tail calling
-* another function.
-*/
-   prevent_tail_call_optimization();
 }
 
 /**
@@ -1011,6 +1000,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct 
*idle)
alternatives_enable_smp();
 
per_cpu(current_task, cpu) = idle;
+   cpu_init_stack_canary(cpu, idle);
 
/* Initialize the interrupt stack(s) */
ret = irq_init_percpu_irqstack(cpu);
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 171aff1b11f2..9ea598dcc132 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -92,9 +92,7 @@ static void cpu_bringup(void)
 asmlinkage __visible void cpu_bringup_and_idle(void)
 {
cpu_bringup();
-   boot_init_stack_canary();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
-   prevent_tail_call_optimization();
 }
 
 void xen_smp_intr_free_pv(unsigned int cpu)

base-commit: 83cdaef93988a6bc6875623781de571b2694fe02
-- 
2.26.2

Re: [PATCH 1/2] x86/x32: Use __x64 prefix for X32 compat syscalls

2020-06-16 Thread Brian Gerst

On Tue, Jun 16, 2020 at 12:49 PM Andy Lutomirski  wrote:
>
> On Tue, Jun 16, 2020 at 7:23 AM Brian Gerst  wrote:
> >
> > The ABI prefix for syscalls specifies the argument register mapping, so
> > there is no specific reason to continue using the __x32 prefix for the
> > compat syscalls.  This change will allow using native syscalls in the X32
> > specific portion of the syscall table.
>
> Okay, I realize that the x86 syscall machinery is held together by
> duct tape and a lot of luck, but:
>
> >
> > Signed-off-by: Brian Gerst 
> > ---
> >  arch/x86/entry/syscall_x32.c   |  8 +++-
> >  arch/x86/include/asm/syscall_wrapper.h | 10 +-
> >  2 files changed, 8 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
> > index 3d8d70d3896c..f993e6254043 100644
> > --- a/arch/x86/entry/syscall_x32.c
> > +++ b/arch/x86/entry/syscall_x32.c
> > @@ -9,15 +9,13 @@
> >  #include 
> >
> >  #define __SYSCALL_64(nr, sym)
> > +#define __SYSCALL_COMMON(nr, sym) __SYSCALL_X32(nr, sym)
> >
> > -#define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct 
> > pt_regs *);
> > -#define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct 
> > pt_regs *);
> > +#define __SYSCALL_X32(nr, sym) extern long __x64_##sym(const struct 
> > pt_regs *);
> >  #include 
> >  #undef __SYSCALL_X32
> > -#undef __SYSCALL_COMMON
> >
> > -#define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
> > -#define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
> > +#define __SYSCALL_X32(nr, sym) [nr] = __x64_##sym,
> >
> >  asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] 
> > = {
> > /*
> > diff --git a/arch/x86/include/asm/syscall_wrapper.h 
> > b/arch/x86/include/asm/syscall_wrapper.h
> > index a84333adeef2..267fae9904ff 100644
> > --- a/arch/x86/include/asm/syscall_wrapper.h
> > +++ b/arch/x86/include/asm/syscall_wrapper.h
> > @@ -17,7 +17,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs 
> > *regs);
> >   * __x64_sys_*() - 64-bit native syscall
> >   * __ia32_sys_*()- 32-bit native syscall or common compat syscall
> >   * __ia32_compat_sys_*() - 32-bit compat syscall
>
> On a 64-bit kernel, an "ia32" compat syscall is __ia32_compat_sys_*, but...
>
> > - * __x32_compat_sys_*()  - 64-bit X32 compat syscall
> > + * __x64_compat_sys_*()  - 64-bit X32 compat syscall
>
> Now an x32 compat syscall is __x64_compat?  This seems nonsensical.

Again, think of it as how the registers are mapped, not which syscall
table it belongs to.  X32 and X64 are identical in that regard.

> I'm also a bit confused as to how this is even necessary for your
> other patch.

This came out of discussion on Cristoph's patch to combine compat
execve*() into the native version:
https://lore.kernel.org/lkml/20200615141239.ga12...@lst.de/

The bottom line is that marking a syscall as X32-only in the syscall
table forces an __x32 prefix even if it's not a "compat" syscall.
This causes a link failure.  This is just another quirk caused by how
X32 was designed.  The solution is to make the prefix consistent for
the whole table.  The other alternative is to use __x32 for all the
common syscalls.

The second patch isn't really necessary, but it makes more sense to
not have a compat syscall with no corresponding native version.

--
Brian Gerst

[PATCH 2/2] x86/x32: Convert x32_rt_sigreturn to native syscall

2020-06-16 Thread Brian Gerst

x32_rt_sigreturn doesn't need to be a compat syscall because there aren't two
versions.

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/syscalls/syscall_64.tbl| 2 +-
 arch/x86/kernel/signal.c  | 2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
b/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..5fb63ac69971 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -368,7 +368,7 @@
 # is defined.
 #
 512x32 rt_sigactioncompat_sys_rt_sigaction
-513x32 rt_sigreturncompat_sys_x32_rt_sigreturn
+513x32 rt_sigreturnsys_x32_rt_sigreturn
 514x32 ioctl   compat_sys_ioctl
 515x32 readv   compat_sys_readv
 516x32 writev  compat_sys_writev
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 399f97abee02..8a3d1cd4ea70 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -856,7 +856,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, 
char *where)
 }
 
 #ifdef CONFIG_X86_X32_ABI
-COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
+SYSCALL_DEFINE0(x32_rt_sigreturn)
 {
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl 
b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 37b844f839bc..36a3c8a913da 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -367,7 +367,7 @@
 # is defined.
 #
 512x32 rt_sigactioncompat_sys_rt_sigaction
-513x32 rt_sigreturncompat_sys_x32_rt_sigreturn
+513x32 rt_sigreturnsys_x32_rt_sigreturn
 514x32 ioctl   compat_sys_ioctl
 515x32 readv   compat_sys_readv
 516x32 writev  compat_sys_writev
-- 
2.26.2

[PATCH 0/2] X32 syscall cleanups

2020-06-16 Thread Brian Gerst

Christoph Hellwig uncovered an issue with how we currently handle X32
syscalls.  Currently, we can only use COMPAT_SYS_DEFINEx() for X32
specific syscalls.  These changes remove that restriction and allow
native syscalls.

Brian Gerst (2):
  x86/x32: Use __x64 prefix for X32 compat syscalls
  x86/x32: Convert x32_rt_sigreturn to native syscall

 arch/x86/entry/syscall_x32.c  |  8 +++-
 arch/x86/entry/syscalls/syscall_64.tbl|  2 +-
 arch/x86/include/asm/syscall_wrapper.h| 10 +-
 arch/x86/kernel/signal.c  |  2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl |  2 +-
 5 files changed, 11 insertions(+), 13 deletions(-)


base-commit: 83cdaef93988a6bc6875623781de571b2694fe02
-- 
2.26.2

[PATCH 1/2] x86/x32: Use __x64 prefix for X32 compat syscalls

2020-06-16 Thread Brian Gerst

The ABI prefix for syscalls specifies the argument register mapping, so
there is no specific reason to continue using the __x32 prefix for the
compat syscalls.  This change will allow using native syscalls in the X32
specific portion of the syscall table.

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/syscall_x32.c   |  8 +++-
 arch/x86/include/asm/syscall_wrapper.h | 10 +-
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 3d8d70d3896c..f993e6254043 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -9,15 +9,13 @@
 #include 
 
 #define __SYSCALL_64(nr, sym)
+#define __SYSCALL_COMMON(nr, sym) __SYSCALL_X32(nr, sym)
 
-#define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct pt_regs *);
-#define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct pt_regs 
*);
+#define __SYSCALL_X32(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include 
 #undef __SYSCALL_X32
-#undef __SYSCALL_COMMON
 
-#define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
-#define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL_X32(nr, sym) [nr] = __x64_##sym,
 
 asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
/*
diff --git a/arch/x86/include/asm/syscall_wrapper.h 
b/arch/x86/include/asm/syscall_wrapper.h
index a84333adeef2..267fae9904ff 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -17,7 +17,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
  * __x64_sys_*() - 64-bit native syscall
  * __ia32_sys_*()- 32-bit native syscall or common compat syscall
  * __ia32_compat_sys_*() - 32-bit compat syscall
- * __x32_compat_sys_*()  - 64-bit X32 compat syscall
+ * __x64_compat_sys_*()  - 64-bit X32 compat syscall
  *
  * The registers are decoded according to the ABI:
  * 64-bit: RDI, RSI, RDX, R10, R8, R9
@@ -165,17 +165,17 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs 
*regs);
  * with x86_64 obviously do not need such care.
  */
 #define __X32_COMPAT_SYS_STUB0(name)   \
-   __SYS_STUB0(x32, compat_sys_##name)
+   __SYS_STUB0(x64, compat_sys_##name)
 
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)   \
-   __SYS_STUBx(x32, compat_sys##name,  \
+   __SYS_STUBx(x64, compat_sys##name,  \
SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
 
 #define __X32_COMPAT_COND_SYSCALL(name)
\
-   __COND_SYSCALL(x32, compat_sys_##name)
+   __COND_SYSCALL(x64, compat_sys_##name)
 
 #define __X32_COMPAT_SYS_NI(name)  \
-   __SYS_NI(x32, compat_sys_##name)
+   __SYS_NI(x64, compat_sys_##name)
 #else /* CONFIG_X86_X32 */
 #define __X32_COMPAT_SYS_STUB0(name)
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)
-- 
2.26.2

Re: [PATCH 2/6] exec: simplify the compat syscall handling

2020-06-15 Thread Brian Gerst

On Mon, Jun 15, 2020 at 2:47 PM Arnd Bergmann  wrote:
>
> On Mon, Jun 15, 2020 at 4:48 PM Brian Gerst  wrote:
> > On Mon, Jun 15, 2020 at 10:13 AM Christoph Hellwig  wrote:
> > > On Mon, Jun 15, 2020 at 03:31:35PM +0200, Arnd Bergmann wrote:
>
> > >
> > > I'd rather keep it in common code as that allows all the low-level
> > > exec stuff to be marked static, and avoid us growing new pointless
> > > compat variants through copy and paste.
> > > smart compiler to d
> > >
> > > > I don't really understand
> > > > the comment, why can't this just use this?
> > >
> > > That errors out with:
> > >
> > > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
> > > `__x32_sys_execve'
> > > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
> > > `__x32_sys_execveat'
> > > make: *** [Makefile:1139: vmlinux] Error 1
> >
> > I think I have a fix for this, by modifying the syscall wrappers to
> > add an alias for the __x32 variant to the native __x64_sys_foo().
> > I'll get back to you with a patch.
>
> Do we actually need the __x32 prefix any more, or could we just
> change all x32 specific calls to use __x64_compat_sys_foo()?

I suppose that would work too.  The prefix really describes the
register mapping.

--
Brian Gerst

Re: [PATCH 2/6] exec: simplify the compat syscall handling

2020-06-15 Thread Brian Gerst

On Mon, Jun 15, 2020 at 11:10 AM Christoph Hellwig  wrote:
>
> On Mon, Jun 15, 2020 at 04:46:15PM +0200, Arnd Bergmann wrote:
> > How about this one:
> >
> > diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
> > index 3d8d70d3896c..0ce15807cf54 100644
> > --- a/arch/x86/entry/syscall_x32.c
> > +++ b/arch/x86/entry/syscall_x32.c
> > @@ -16,6 +16,9 @@
> >  #undef __SYSCALL_X32
> >  #undef __SYSCALL_COMMON
> >
> > +#define __x32_sys_execve __x64_sys_execve
> > +#define __x32_sys_execveat __x64_sys_execveat
> > +
>
>
> arch/x86/entry/syscall_x32.c:19:26: error: ‘__x64_sys_execve’ undeclared here 
> (not in a function); did you mean ‘__x32_sys_execve’?
>19 | #define __x32_sys_execve __x64_sys_execve
>   |  ^~~~
> arch/x86/entry/syscall_x32.c:22:39: note: in expansion of macro 
> ‘__x32_sys_execve’
>22 | #define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
>   |   ^~
> ./arch/x86/include/generated/asm/syscalls_64.h:344:1: note: in expansion of 
> macro ‘__SYSCALL_X32’
>   344 | __SYSCALL_X32(520, sys_execve)
>   | ^
> arch/x86/entry/syscall_x32.c:20:28: error: ‘__x64_sys_execveat’ undeclared 
> here (not in a function); did you mean ‘__x32_sys_execveat’?
>20 | #define __x32_sys_execveat __x64_sys_execveat
>   |^~
> arch/x86/entry/syscall_x32.c:22:39: note: in expansion of macro 
> ‘__x32_sys_execveat’
>22 | #define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
>   |   ^~
> ./arch/x86/include/generated/asm/syscalls_64.h:369:1: note: in expansion of 
> macro ‘__SYSCALL_X32’
>   369 | __SYSCALL_X32(545, sys_execveat)
>   | ^
> make[2]: *** [scripts/Makefile.build:281: arch/x86/entry/syscall_x32.o] Error 
> 1
> make[1]: *** [scripts/Makefile.build:497: arch/x86/entry] Error 2
> make[1]: *** Waiting for unfinished jobs
> kernel/exit.o: warning: objtool: __x64_sys_exit_group()+0x14: unreachable 
> instruction
> make: *** [Makefile:1764: arch/x86] Error 2
> make: *** Waiting for unfinished jobs

If you move those aliases above all the __SYSCALL_* defines it will
work, since that will get the forward declaration too.  This would be
the simplest workaround.

--
Brian Gerst

Re: [PATCH 2/6] exec: simplify the compat syscall handling

2020-06-15 Thread Brian Gerst

On Mon, Jun 15, 2020 at 10:13 AM Christoph Hellwig  wrote:
>
> On Mon, Jun 15, 2020 at 03:31:35PM +0200, Arnd Bergmann wrote:
> > >  #ifdef CONFIG_COMPAT
> > > -   if (unlikely(argv.is_compat)) {
> > > +   if (in_compat_syscall()) {
> > > +   const compat_uptr_t __user *compat_argv =
> > > +   compat_ptr((unsigned long)argv);
> > > compat_uptr_t compat;
> > >
> > > -   if (get_user(compat, argv.ptr.compat + nr))
> > > +   if (get_user(compat, compat_argv + nr))
> > > return ERR_PTR(-EFAULT);
> > >
> > > return compat_ptr(compat);
> > > }
> > >  #endif
> >
> > I would expect that the "#ifdef CONFIG_COMPAT" can be removed
> > now, since compat_ptr() and in_compat_syscall() are now defined
> > unconditionally. I have not tried that though.
>
> True, I'll give it a spin.
>
> > > +/*
> > > + * x32 syscalls are listed in the same table as x86_64 ones, so we need 
> > > to
> > > + * define compat syscalls that are exactly the same as the native 
> > > version for
> > > + * the syscall table machinery to work.  Sigh..
> > > + */
> > > +#ifdef CONFIG_X86_X32
> > >  COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
> > > -   const compat_uptr_t __user *, argv,
> > > -   const compat_uptr_t __user *, envp)
> > > +  const char __user *const __user *, argv,
> > > +  const char __user *const __user *, envp)
> > >  {
> > > -   return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 
> > > 0);
> > > +   return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, 
> > > NULL);
> > >  }
> >
> > Maybe move it to arch/x86/kernel/process_64.c or 
> > arch/x86/entry/syscall_x32.c
> > to keep it out of the common code if this is needed.
>
> I'd rather keep it in common code as that allows all the low-level
> exec stuff to be marked static, and avoid us growing new pointless
> compat variants through copy and paste.
> smart compiler to d
>
> > I don't really understand
> > the comment, why can't this just use this?
>
> That errors out with:
>
> ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
> `__x32_sys_execve'
> ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
> `__x32_sys_execveat'
> make: *** [Makefile:1139: vmlinux] Error 1

I think I have a fix for this, by modifying the syscall wrappers to
add an alias for the __x32 variant to the native __x64_sys_foo().
I'll get back to you with a patch.

--
Brian Gerst

Re: [PATCH] syscalls: fix offset type of ksys_ftruncate

2020-06-10 Thread Brian Gerst

On Wed, Jun 10, 2020 at 7:48 AM Jiri Slaby  wrote:
>
> After the commit below, truncate on i586 uses ksys_ftruncate. But
> ksys_ftruncate truncates the offset to unsigned long. So switch the type
> of offset to loff_t which is what the lower do_sys_ftruncate expects.
>
> Signed-off-by: Jiri Slaby 
> Fixes: 121b32a58a3a (x86/entry/32: Use IA32-specific wrappers for syscalls 
> taking 64-bit arguments)
> Cc: Brian Gerst 
> Cc: Thomas Gleixner 
> Cc: Dominik Brodowski 
> ---
>  include/linux/syscalls.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 63ffa6dc9da3..e97ca179d0dc 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -1370,7 +1370,7 @@ static inline long ksys_lchown(const char __user 
> *filename, uid_t user,
>
>  extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
>
> -static inline long ksys_ftruncate(unsigned int fd, unsigned long length)
> +static inline long ksys_ftruncate(unsigned int fd, loff_t length)
>  {
> return do_sys_ftruncate(fd, length, 1);
>  }

Reviewed-by: Brian Gerst

[PATCH] x86/stackprotector: Pre-initialize canary for secondary CPUs

2020-06-04 Thread Brian Gerst

The idle tasks created for each secondary CPU already have a random stack
canary generated by fork().  Copy the canary to the percpu variable before
starting the secondary CPU which removes the need to call
boot_init_stack_canary().

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/stackprotector.h | 12 
 arch/x86/kernel/smpboot.c | 12 +---
 arch/x86/xen/smp_pv.c |  2 --
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/stackprotector.h 
b/arch/x86/include/asm/stackprotector.h
index 9804a7957f4e..7fb482f0f25b 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -90,6 +90,15 @@ static __always_inline void boot_init_stack_canary(void)
 #endif
 }
 
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
+{
+#ifdef CONFIG_X86_64
+   per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
+#else
+   per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
+#endif
+}
+
 static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -119,6 +128,9 @@ static inline void load_stack_canary_segment(void)
 static inline void setup_stack_canary_segment(int cpu)
 { }
 
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
+{ }
+
 static inline void load_stack_canary_segment(void)
 {
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2467f3dd35d3..dad7f9ca6478 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -259,21 +259,10 @@ static void notrace start_secondary(void *unused)
/* enable local interrupts */
local_irq_enable();
 
-   /* to prevent fake stack check failure in clock setup */
-   boot_init_stack_canary();
-
x86_cpuinit.setup_percpu_clockev();
 
wmb();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
-
-   /*
-* Prevent tail call to cpu_startup_entry() because the stack protector
-* guard has been changed a couple of function calls up, in
-* boot_init_stack_canary() and must not be checked before tail calling
-* another function.
-*/
-   prevent_tail_call_optimization();
 }
 
 /**
@@ -1011,6 +1000,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct 
*idle)
alternatives_enable_smp();
 
per_cpu(current_task, cpu) = idle;
+   cpu_init_stack_canary(cpu, idle);
 
/* Initialize the interrupt stack(s) */
ret = irq_init_percpu_irqstack(cpu);
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index ae4d0f283df3..e9f5d6ec30a6 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -92,9 +92,7 @@ static void cpu_bringup(void)
 asmlinkage __visible void cpu_bringup_and_idle(void)
 {
cpu_bringup();
-   boot_init_stack_canary();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
-   prevent_tail_call_optimization();
 }
 
 void xen_smp_intr_free_pv(unsigned int cpu)

base-commit: cc7a4a02564c6cc8dc981fb0a37313830ee8c2d4
-- 
2.25.4

Re: [PATCH v3 35/75] x86/head/64: Build k/head64.c with -fno-stack-protector

2020-06-03 Thread Brian Gerst

On Wed, Jun 3, 2020 at 11:18 AM Joerg Roedel  wrote:
>
> On Tue, May 19, 2020 at 09:58:18AM -0400, Brian Gerst wrote:
> > On Tue, Apr 28, 2020 at 11:28 AM Joerg Roedel  wrote:
>
> > The proper fix would be to initialize MSR_GS_BASE earlier.
>
> That'll mean to initialize it two times during boot, as the first C
> function with stack-protection is called before the kernel switches to
> its high addresses (early_idt_setup call-path). But okay, I can do that.

Good point.  Since this is boot code which isn't subject to stack
smashing attacks, disabling stack protector is probably the simpler
option.

--
Brian Gerst

Re: [PATCH v2 09/10] x86/percpu: Clean up percpu_stable_op()

2020-06-02 Thread Brian Gerst

On Mon, Jun 1, 2020 at 4:43 PM Nick Desaulniers  wrote:
>
> On Sat, May 30, 2020 at 3:11 PM Brian Gerst  wrote:
> >
> > Use __pcpu_size_call_return() to simplify this_cpu_read_stable().
>
> Clever! As in this_cpu_read() in include/linux/percpu-defs.h.  Could
> be its own patch before this, but it's fine.
> Reviewed-by: Nick Desaulniers 
>
> > Also remove __bad_percpu_size() which is now unused.
> >
> > Signed-off-by: Brian Gerst 
> > ---
> >  arch/x86/include/asm/percpu.h | 41 ++-
> >  1 file changed, 12 insertions(+), 29 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
> > index 7efc0b5c4ff0..cf2b9c2a241e 100644
> > --- a/arch/x86/include/asm/percpu.h
> > +++ b/arch/x86/include/asm/percpu.h
> > @@ -85,7 +85,6 @@
> >
> >  /* For arch-specific code, we can use direct single-insn ops (they
> >   * don't give an lvalue though). */
> > -extern void __bad_percpu_size(void);
> >
> >  #define __pcpu_type_1 u8
> >  #define __pcpu_type_2 u16
> > @@ -167,33 +166,13 @@ do {  
> > \
> > (typeof(_var))(unsigned long) pfo_val__;\
> >  })
> >
> > -#define percpu_stable_op(op, var)  \
> > -({ \
> > -   typeof(var) pfo_ret__;  \
> > -   switch (sizeof(var)) {  \
> > -   case 1: \
> > -   asm(op "b "__percpu_arg(P1)",%0"\
>
> What does the `P` do here?
> https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html#Simple-Constraints
> says can be machine dependent integral literal in a certain range.
> https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
> doesn't document `P` for x86 though...

https://gcc.gnu.org/onlinedocs/gcc-10.1.0/gcc/Extended-Asm.html#x86-Operand-Modifiers

Removing the 'P' modifier results in this:
movq %gs:$current_task, %rdx#, pfo_val__

This is because the 'p' constraint treats a memory address as a
constant.  I tried replacing it with __this_cpu_read(), which since
commit 0b9ccc0a should have similar non-volatile semantics.  But the
compiler still reloaded it on every use, so I left the asm template
as-is for now until that can be resolved.

--
Brian Gerst

[PATCH v2 02/10] x86/percpu: Clean up percpu_to_op()

2020-05-30 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Reviewed-by: Nick Desaulniers 
---
 arch/x86/include/asm/percpu.h | 90 ++-
 1 file changed, 35 insertions(+), 55 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 19838e4f7a8f..fb280fba94c5 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -117,37 +117,17 @@ extern void __bad_percpu_size(void);
 #define __pcpu_reg_imm_4(x) "ri" (x)
 #define __pcpu_reg_imm_8(x) "re" (x)
 
-#define percpu_to_op(qual, op, var, val)   \
-do {   \
-   typedef typeof(var) pto_T__;\
-   if (0) {\
-   pto_T__ pto_tmp__;  \
-   pto_tmp__ = (val);  \
-   (void)pto_tmp__;\
-   }   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual (op "b %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "qi" ((pto_T__)(val)));   \
-   break;  \
-   case 2: \
-   asm qual (op "w %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "ri" ((pto_T__)(val)));   \
-   break;  \
-   case 4: \
-   asm qual (op "l %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "ri" ((pto_T__)(val)));   \
-   break;  \
-   case 8: \
-   asm qual (op "q %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "re" ((pto_T__)(val)));   \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
+#define percpu_to_op(size, qual, op, _var, _val)   \
+do {   \
+   __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);\
+   if (0) {\
+   typeof(_var) pto_tmp__; \
+   pto_tmp__ = (_val); \
+   (void)pto_tmp__;\
+   }   \
+   asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var]))   \
+   : [var] "+m" (_var) \
+   : [val] __pcpu_reg_imm_##size(pto_val__));  \
 } while (0)
 
 /*
@@ -425,18 +405,18 @@ do {  
\
 #define raw_cpu_read_2(pcp)percpu_from_op(, "mov", pcp)
 #define raw_cpu_read_4(pcp)percpu_from_op(, "mov", pcp)
 
-#define raw_cpu_write_1(pcp, val)  percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val)  percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val)  percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_1(pcp, val)  percpu_to_op(1, , "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val)  percpu_to_op(2, , "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val)  percpu_to_op(4, , "mov", (pcp), val)
 #define raw_cpu_add_1(pcp, val)percpu_add_op(, (pcp), val)
 #define raw_cpu_add_2(pcp, val)percpu_add_op(, (pcp), val)
 #define raw_cpu_add_4(pcp, val)percpu_add_op(, (pcp), val)
-#define raw_cpu_and_1(pcp, val)percpu_to_op(, "and", (pcp), 
val)
-#define raw_cpu_and_2(pcp, val)percpu_to_op(, "and", (pcp), 
val)
-#define raw_cpu_and_4(pcp, val)percpu_to_op(, "and", (pcp), 
val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or"

[PATCH v2 08/10] x86/percpu: Clean up percpu_cmpxchg_op()

2020-05-30 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Reviewed-by: Nick Desaulniers 
---
 arch/x86/include/asm/percpu.h | 58 +++
 1 file changed, 18 insertions(+), 40 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ac6d7e76c0d4..7efc0b5c4ff0 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -236,39 +236,17 @@ do {  
\
  * cmpxchg has no such implied lock semantics as a result it is much
  * more efficient for cpu local operations.
  */
-#define percpu_cmpxchg_op(qual, var, oval, nval)   \
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval)  \
 ({ \
-   typeof(var) pco_ret__;  \
-   typeof(var) pco_old__ = (oval); \
-   typeof(var) pco_new__ = (nval); \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("cmpxchgb %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "q" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 2: \
-   asm qual ("cmpxchgw %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 4: \
-   asm qual ("cmpxchgl %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 8: \
-   asm qual ("cmpxchgq %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pco_ret__;  \
+   __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval);   \
+   __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval);   \
+   asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]",   \
+   __percpu_arg([var]))\
+ : [oval] "+a" (pco_old__),\
+   [var] "+m" (_var)   \
+ : [nval] __pcpu_reg_##size(, pco_new__)   \
+ : "memory");  \
+   (typeof(_var))(unsigned long) pco_old__;\
 })
 
 /*
@@ -336,16 +314,16 @@ do {  
\
 #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, 
val)
 #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, 
val)
 #define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, 
val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cm

[PATCH v2 10/10] x86/percpu: Remove unused PER_CPU() macro

2020-05-30 Thread Brian Gerst

Also remove now unused __percpu_mov_op.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 18 --
 1 file changed, 18 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index cf2b9c2a241e..a3c33b79fb86 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -4,33 +4,15 @@
 
 #ifdef CONFIG_X86_64
 #define __percpu_seg   gs
-#define __percpu_mov_opmovq
 #else
 #define __percpu_seg   fs
-#define __percpu_mov_opmovl
 #endif
 
 #ifdef __ASSEMBLY__
 
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *var - variable name
- *reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- *PER_CPU(cpu_gdt_descr, %ebx)
- */
 #ifdef CONFIG_SMP
-#define PER_CPU(var, reg)  \
-   __percpu_mov_op %__percpu_seg:this_cpu_off, reg;\
-   lea var(reg), reg
 #define PER_CPU_VAR(var)   %__percpu_seg:var
 #else /* ! SMP */
-#define PER_CPU(var, reg)  __percpu_mov_op $var, reg
 #define PER_CPU_VAR(var)   var
 #endif /* SMP */
 
-- 
2.25.4

[PATCH v2 07/10] x86/percpu: Clean up percpu_xchg_op()

2020-05-30 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Reviewed-by: Nick Desaulniers 
---
 arch/x86/include/asm/percpu.h | 61 +++
 1 file changed, 18 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0776a11e7e11..ac6d7e76c0d4 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -215,46 +215,21 @@ do {  
\
  * expensive due to the implied lock prefix.  The processor cannot prefetch
  * cachelines if xchg is used.
  */
-#define percpu_xchg_op(qual, var, nval)
\
+#define percpu_xchg_op(size, qual, _var, _nval)
\
 ({ \
-   typeof(var) pxo_ret__;  \
-   typeof(var) pxo_new__ = (nval); \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%al"  \
-   "\n1:\tcmpxchgb %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "q" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 2: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%ax"  \
-   "\n1:\tcmpxchgw %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 4: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
-   "\n1:\tcmpxchgl %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 8: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
-   "\n1:\tcmpxchgq %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pxo_ret__;  \
+   __pcpu_type_##size pxo_old__;   \
+   __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval);   \
+   asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \
+   "%[oval]")  \
+ "\n1:\t"  \
+ __pcpu_op2_##size("cmpxchg", "%[nval]",   \
+   __percpu_arg([var]))\
+ "\n\tjnz 1b"  \
+ : [oval] "=" (pxo_old__),   \
+   [var] "+m" (_var

[PATCH v2 09/10] x86/percpu: Clean up percpu_stable_op()

2020-05-30 Thread Brian Gerst

Use __pcpu_size_call_return() to simplify this_cpu_read_stable().
Also remove __bad_percpu_size() which is now unused.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 41 ++-
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 7efc0b5c4ff0..cf2b9c2a241e 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -85,7 +85,6 @@
 
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
-extern void __bad_percpu_size(void);
 
 #define __pcpu_type_1 u8
 #define __pcpu_type_2 u16
@@ -167,33 +166,13 @@ do {  
\
(typeof(_var))(unsigned long) pfo_val__;\
 })
 
-#define percpu_stable_op(op, var)  \
-({ \
-   typeof(var) pfo_ret__;  \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm(op "b "__percpu_arg(P1)",%0"\
-   : "=q" (pfo_ret__)  \
-   : "p" (&(var)));\
-   break;  \
-   case 2: \
-   asm(op "w "__percpu_arg(P1)",%0"\
-   : "=r" (pfo_ret__)  \
-   : "p" (&(var)));\
-   break;  \
-   case 4: \
-   asm(op "l "__percpu_arg(P1)",%0"\
-   : "=r" (pfo_ret__)  \
-   : "p" (&(var)));\
-   break;  \
-   case 8: \
-   asm(op "q "__percpu_arg(P1)",%0"\
-   : "=r" (pfo_ret__)  \
-   : "p" (&(var)));\
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pfo_ret__;  \
+#define percpu_stable_op(size, op, _var)   \
+({ \
+   __pcpu_type_##size pfo_val__;   \
+   asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]")   \
+   : [val] __pcpu_reg_##size("=", pfo_val__)   \
+   : [var] "p" (&(_var))); \
+   (typeof(_var))(unsigned long) pfo_val__;\
 })
 
 /*
@@ -258,7 +237,11 @@ do {   
\
  * per-thread variables implemented as per-cpu variables and thus
  * stable for the duration of the respective task.
  */
-#define this_cpu_read_stable(var)  percpu_stable_op("mov", var)
+#define this_cpu_read_stable_1(pcp)percpu_stable_op(1, "mov", pcp)
+#define this_cpu_read_stable_2(pcp)percpu_stable_op(2, "mov", pcp)
+#define this_cpu_read_stable_4(pcp)percpu_stable_op(4, "mov", pcp)
+#define this_cpu_read_stable_8(pcp)percpu_stable_op(8, "mov", pcp)
+#define this_cpu_read_stable(pcp)  
__pcpu_size_call_return(this_cpu_read_stable_, pcp)
 
 #define raw_cpu_read_1(pcp)percpu_from_op(1, , "mov", pcp)
 #define raw_cpu_read_2(pcp)percpu_from_op(2, , "mov", pcp)
-- 
2.25.4

[PATCH v2 05/10] x86/percpu: Remove "e" constraint from XADD

2020-05-30 Thread Brian Gerst

The "e" constraint represents a constant, but the XADD instruction doesn't
accept immediate operands.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 2a24f3c795eb..9bb5440d98d3 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -220,7 +220,7 @@ do {
\
break;  \
case 8: \
asm qual ("xaddq %0, "__percpu_arg(1)   \
-   : "+re" (paro_ret__), "+m" (var)\
+   : "+r" (paro_ret__), "+m" (var) \
: : "memory");  \
break;  \
default: __bad_percpu_size();   \
-- 
2.25.4

[PATCH v2 03/10] x86/percpu: Clean up percpu_from_op()

2020-05-30 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
Reviewed-by: Nick Desaulniers 
---
 arch/x86/include/asm/percpu.h | 50 +++
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index fb280fba94c5..a40d2e055f58 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -190,33 +190,13 @@ do {  
\
}   \
 } while (0)
 
-#define percpu_from_op(qual, op, var)  \
-({ \
-   typeof(var) pfo_ret__;  \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual (op "b "__percpu_arg(1)",%0"   \
-   : "=q" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 2: \
-   asm qual (op "w "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 4: \
-   asm qual (op "l "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 8: \
-   asm qual (op "q "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pfo_ret__;  \
+#define percpu_from_op(size, qual, op, _var)   \
+({ \
+   __pcpu_type_##size pfo_val__;   \
+   asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]")  \
+   : [val] __pcpu_reg_##size("=", pfo_val__)   \
+   : [var] "m" (_var));\
+   (typeof(_var))(unsigned long) pfo_val__;\
 })
 
 #define percpu_stable_op(op, var)  \
@@ -401,9 +381,9 @@ do {
\
  */
 #define this_cpu_read_stable(var)  percpu_stable_op("mov", var)
 
-#define raw_cpu_read_1(pcp)percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_2(pcp)percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_4(pcp)percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_1(pcp)percpu_from_op(1, , "mov", pcp)
+#define raw_cpu_read_2(pcp)percpu_from_op(2, , "mov", pcp)
+#define raw_cpu_read_4(pcp)percpu_from_op(4, , "mov", pcp)
 
 #define raw_cpu_write_1(pcp, val)  percpu_to_op(1, , "mov", (pcp), val)
 #define raw_cpu_write_2(pcp, val)  percpu_to_op(2, , "mov", (pcp), val)
@@ -433,9 +413,9 @@ do {
\
 #define raw_cpu_xchg_2(pcp, val)   raw_percpu_xchg_op(pcp, val)
 #define raw_cpu_xchg_4(pcp, val)   raw_percpu_xchg_op(pcp, val)
 
-#define this_cpu_read_1(pcp)   percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_read_2(pcp)   percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_read_4(pcp)   percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_1(pcp)   percpu_from_op(1, volatile, "mov", pcp)
+#define this_cpu_read_2(pcp)   percpu_from_op(2, volatile, "mov", pcp)
+#define this_cpu_read_4(pcp)   percpu_from_op(4, volatile, "mov", pcp)
 #define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), 
val)
 #define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), 
val)
 #define this_cpu_write_4(pcp, val) pe

[PATCH v2 00/10] x86: Clean up percpu operations

2020-05-30 Thread Brian Gerst

The core percpu operations already have a switch on the width of the
data type, which resulted in an extra amount of dead code being
generated with the x86 operations having another switch.  This patch set
rewrites the x86 ops to remove the switch.  Additional cleanups are to
use named assembly operands, and to cast variables to the width used in
the assembly to make Clang happy.

Changes from v1:
- Add separate patch for XADD constraint fix
- Fixed sparse truncation warning
- Add cleanup of percpu_stable_op()
- Add patch to Remove PER_CPU()

Brian Gerst (10):
  x86/percpu: Introduce size abstraction macros
  x86/percpu: Clean up percpu_to_op()
  x86/percpu: Clean up percpu_from_op()
  x86/percpu: Clean up percpu_add_op()
  x86/percpu: Remove "e" constraint from XADD
  x86/percpu: Clean up percpu_add_return_op()
  x86/percpu: Clean up percpu_xchg_op()
  x86/percpu: Clean up percpu_cmpxchg_op()
  x86/percpu: Clean up percpu_stable_op()
  x86/percpu: Remove unused PER_CPU() macro

 arch/x86/include/asm/percpu.h | 510 --
 1 file changed, 172 insertions(+), 338 deletions(-)


base-commit: 229aaa8c059f2c908e0561453509f996f2b2d5c4
-- 
2.25.4

[PATCH v2 04/10] x86/percpu: Clean up percpu_add_op()

2020-05-30 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 99 ---
 1 file changed, 22 insertions(+), 77 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a40d2e055f58..2a24f3c795eb 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -130,64 +130,32 @@ do {  
\
: [val] __pcpu_reg_imm_##size(pto_val__));  \
 } while (0)
 
+#define percpu_unary_op(size, qual, op, _var)  \
+({ \
+   asm qual (__pcpu_op1_##size(op, __percpu_arg([var]))\
+   : [var] "+m" (_var));   \
+})
+
 /*
  * Generate a percpu add to memory instruction and optimize code
  * if one is added or subtracted.
  */
-#define percpu_add_op(qual, var, val)  \
+#define percpu_add_op(size, qual, var, val)\
 do {   \
-   typedef typeof(var) pao_T__;\
const int pao_ID__ = (__builtin_constant_p(val) &&  \
  ((val) == 1 || (val) == -1)) ?\
(int)(val) : 0; \
if (0) {\
-   pao_T__ pao_tmp__;  \
+   typeof(var) pao_tmp__;  \
pao_tmp__ = (val);  \
(void)pao_tmp__;\
}   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
-   else\
-   asm qual ("addb %1, "__percpu_arg(0)\
-   : "+m" (var)\
-   : "qi" ((pao_T__)(val)));   \
-   break;  \
-   case 2: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
-   else\
-   asm qual ("addw %1, "__percpu_arg(0)\
-   : "+m" (var)\
-   : "ri" ((pao_T__)(val)));   \
-   break;  \
-   case 4: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
-   else\
-   asm qual ("addl %1, "__percpu_arg(0)\
-   : "+m" (var)\
-   : "ri" ((pao_T__)(val)));   \
-   break;  \
-   case 8: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-

[PATCH v2 06/10] x86/percpu: Clean up percpu_add_return_op()

2020-05-30 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 51 +++
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 9bb5440d98d3..0776a11e7e11 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -199,34 +199,15 @@ do {  
\
 /*
  * Add return operation
  */
-#define percpu_add_return_op(qual, var, val)   \
+#define percpu_add_return_op(size, qual, _var, _val)   \
 ({ \
-   typeof(var) paro_ret__ = val;   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("xaddb %0, "__percpu_arg(1)   \
-   : "+q" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 2: \
-   asm qual ("xaddw %0, "__percpu_arg(1)   \
-   : "+r" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 4: \
-   asm qual ("xaddl %0, "__percpu_arg(1)   \
-   : "+r" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 8: \
-   asm qual ("xaddq %0, "__percpu_arg(1)   \
-   : "+r" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   paro_ret__ += val;  \
-   paro_ret__; \
+   __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val);   \
+   asm qual (__pcpu_op2_##size("xadd", "%[tmp]",   \
+__percpu_arg([var]))   \
+ : [tmp] __pcpu_reg_##size("+", paro_tmp__),   \
+   [var] "+m" (_var)   \
+ : : "memory");\
+   (typeof(_var))(unsigned long) (paro_tmp__ + _val);  \
 })
 
 /*
@@ -377,16 +358,16 @@ do {  
\
 #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
 #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
 
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, 
val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, 
val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, 
val)
 #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
 #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
 #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
 
-#define this_cpu_add_return_1(pcp, val)
percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_add_return_2(pcp, val)
percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_add_return_4(pcp, val)
percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_1(pcp, val)percpu_add_return_op(1, 
volat

[PATCH v2 01/10] x86/percpu: Introduce size abstraction macros

2020-05-30 Thread Brian Gerst

In preparation for cleaning up the percpu operations, define macros for
abstraction based on the width of the operation.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 2278797c769d..19838e4f7a8f 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -87,6 +87,36 @@
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);
 
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
+
+#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0x))
+#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0x))
+#define __pcpu_cast_8(val) ((u64)(val))
+
+#define __pcpu_op1_1(op, dst) op "b " dst
+#define __pcpu_op1_2(op, dst) op "w " dst
+#define __pcpu_op1_4(op, dst) op "l " dst
+#define __pcpu_op1_8(op, dst) op "q " dst
+
+#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
+#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
+#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
+#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
+
+#define __pcpu_reg_1(mod, x) mod "q" (x)
+#define __pcpu_reg_2(mod, x) mod "r" (x)
+#define __pcpu_reg_4(mod, x) mod "r" (x)
+#define __pcpu_reg_8(mod, x) mod "r" (x)
+
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
+
 #define percpu_to_op(qual, op, var, val)   \
 do {   \
typedef typeof(var) pto_T__;\
-- 
2.25.4

Re: [PATCH v2 2/3] x86/boot/compressed: force hidden visibility for all symbol references

2020-05-27 Thread Brian Gerst

On Wed, May 27, 2020 at 2:08 PM Arvind Sankar  wrote:
>
> On Sat, May 23, 2020 at 02:00:20PM +0200, Ard Biesheuvel wrote:
> > Eliminate all GOT entries in the decompressor binary, by forcing hidden
> > visibility for all symbol references, which informs the compiler that
> > such references will be resolved at link time without the need for
> > allocating GOT entries.
> >
> > To ensure that no GOT entries will creep back in, add an assertion to
> > the decompressor linker script that will fire if the .got section has
> > a non-zero size.
> >
> > Signed-off-by: Ard Biesheuvel 
> > ---
> >  arch/x86/boot/compressed/Makefile  |  1 +
> >  arch/x86/boot/compressed/hidden.h  | 19 +++
> >  arch/x86/boot/compressed/vmlinux.lds.S |  1 +
> >  3 files changed, 21 insertions(+)
> >
> > diff --git a/arch/x86/boot/compressed/Makefile 
> > b/arch/x86/boot/compressed/Makefile
> > index 5f7c262bcc99..aa9ed814e5fa 100644
> > --- a/arch/x86/boot/compressed/Makefile
> > +++ b/arch/x86/boot/compressed/Makefile
> > @@ -40,6 +40,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
> >  KBUILD_CFLAGS += -Wno-pointer-sign
> >  KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
> >  KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
> > +KBUILD_CFLAGS += -include hidden.h
> >
>
> Ard, from the other thread [1] in case you missed it -- the plain
> hidden.h fails to build in-tree. We need something like
> KBUILD_CFLAGS += -include $(srctree)/$(src)/hidden.h
> instead.
>
> [1] https://lore.kernel.org/lkml/20200526153104.gc2190...@rani.riverdale.lan/

How about using -fvisibility=hidden instead of including this header?

--
Brian Gerst

Re: [PATCH 1/4] x86/boot: Add .text.startup to setup.ld

2020-05-24 Thread Brian Gerst

On Sun, May 24, 2020 at 5:32 PM Arvind Sankar  wrote:
>
> gcc puts the main function into .text.startup when compiled with -Os (or
> -O2). This results in arch/x86/boot/main.c having a .text.startup
> section which is currently not included explicitly in the linker script
> setup.ld in the same directory.

If the compiler is making assumptions based on the function name
"main" wouldn't it be simpler just to rename the function?

--
Brian Gerst

Re: [PATCH 2/7] x86/percpu: Clean up percpu_to_op()

2020-05-21 Thread Brian Gerst

On Wed, May 20, 2020 at 1:26 PM Nick Desaulniers
 wrote:
>
> On Mon, May 18, 2020 at 8:38 PM Brian Gerst  wrote:
> >
> > On Mon, May 18, 2020 at 5:15 PM Nick Desaulniers
> >  wrote:
> > >
> > > On Sun, May 17, 2020 at 8:29 AM Brian Gerst  wrote:
> > > >
> > > > The core percpu macros already have a switch on the data size, so the 
> > > > switch
> > > > in the x86 code is redundant and produces more dead code.
> > > >
> > > > Also use appropriate types for the width of the instructions.  This 
> > > > avoids
> > > > errors when compiling with Clang.
> > > >
> > > > Signed-off-by: Brian Gerst 
> > > > ---
> > > >  arch/x86/include/asm/percpu.h | 90 ++-
> > > >  1 file changed, 35 insertions(+), 55 deletions(-)
> > > >
> > > > diff --git a/arch/x86/include/asm/percpu.h 
> > > > b/arch/x86/include/asm/percpu.h
> > > > index 89f918a3e99b..233c7a78d1a6 100644
> > > > --- a/arch/x86/include/asm/percpu.h
> > > > +++ b/arch/x86/include/asm/percpu.h
> > > > @@ -117,37 +117,17 @@ extern void __bad_percpu_size(void);
> > > >  #define __pcpu_reg_imm_4(x) "ri" (x)
> > > >  #define __pcpu_reg_imm_8(x) "re" (x)
> > > >
> > > > -#define percpu_to_op(qual, op, var, val)   \
> > > > -do {   \
> > > > -   typedef typeof(var) pto_T__;\
> > > > -   if (0) {\
> > > > -   pto_T__ pto_tmp__;  \
> > > > -   pto_tmp__ = (val);  \
> > > > -   (void)pto_tmp__;\
> > > > -   }   \
> > > > -   switch (sizeof(var)) {  \
> > > > -   case 1: \
> > > > -   asm qual (op "b %1,"__percpu_arg(0) \
> > > > -   : "+m" (var)\
> > > > -   : "qi" ((pto_T__)(val)));   \
> > > > -   break;  \
> > > > -   case 2: \
> > > > -   asm qual (op "w %1,"__percpu_arg(0) \
> > > > -   : "+m" (var)\
> > > > -   : "ri" ((pto_T__)(val)));   \
> > > > -   break;  \
> > > > -   case 4: \
> > > > -   asm qual (op "l %1,"__percpu_arg(0) \
> > > > -   : "+m" (var)\
> > > > -   : "ri" ((pto_T__)(val)));   \
> > > > -   break;  \
> > > > -   case 8: \
> > > > -   asm qual (op "q %1,"__percpu_arg(0) \
> > > > -   : "+m" (var)\
> > > > -   : "re" ((pto_T__)(val)));   \
> > > > -   break;  \
> > > > -   default: __bad_percpu_size();   \
> > > > -   }   \
> > > > +#define percpu_to_op(size, qual, op, _var, _val)   
> > > > \
> > > > +do {   
> > > > \
> > > > +   __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);
> > > > \
> > > > +   if (0) {
> > > > \
> > > > +   typeof(_var) pto_tmp__; 
> > > > \
> > > > +   pto_tmp__ = (_val); 
> > > > \
> > > > +   (void)pto_tmp__;
> > > > \
> > > > +   }   
> > > > \
> > &g

Re: [PATCH v3 35/75] x86/head/64: Build k/head64.c with -fno-stack-protector

2020-05-19 Thread Brian Gerst

On Tue, Apr 28, 2020 at 11:28 AM Joerg Roedel  wrote:
>
> From: Joerg Roedel 
>
> The code inserted by the stack protector does not work in the early
> boot environment because it uses the GS segment, at least with memory
> encryption enabled. Make sure the early code is compiled without this
> feature enabled.
>
> Signed-off-by: Joerg Roedel 
> ---
>  arch/x86/kernel/Makefile | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index ba89cabe5fcf..1192de38fa56 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -35,6 +35,10 @@ ifdef CONFIG_FRAME_POINTER
>  OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
>  endif
>
> +# make sure head64.c is built without stack protector
> +nostackp := $(call cc-option, -fno-stack-protector)
> +CFLAGS_head64.o:= $(nostackp)
> +
>  # If instrumentation of this dir is enabled, boot hangs during first second.
>  # Probably could be more selective here, but note that files related to irqs,
>  # boot, dumpstack/stacktrace, etc are either non-interesting or can lead to

The proper fix would be to initialize MSR_GS_BASE earlier.

--
Brian Gerst

Re: [PATCH 2/7] x86/percpu: Clean up percpu_to_op()

2020-05-18 Thread Brian Gerst

On Mon, May 18, 2020 at 5:15 PM Nick Desaulniers
 wrote:
>
> On Sun, May 17, 2020 at 8:29 AM Brian Gerst  wrote:
> >
> > The core percpu macros already have a switch on the data size, so the switch
> > in the x86 code is redundant and produces more dead code.
> >
> > Also use appropriate types for the width of the instructions.  This avoids
> > errors when compiling with Clang.
> >
> > Signed-off-by: Brian Gerst 
> > ---
> >  arch/x86/include/asm/percpu.h | 90 ++-
> >  1 file changed, 35 insertions(+), 55 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
> > index 89f918a3e99b..233c7a78d1a6 100644
> > --- a/arch/x86/include/asm/percpu.h
> > +++ b/arch/x86/include/asm/percpu.h
> > @@ -117,37 +117,17 @@ extern void __bad_percpu_size(void);
> >  #define __pcpu_reg_imm_4(x) "ri" (x)
> >  #define __pcpu_reg_imm_8(x) "re" (x)
> >
> > -#define percpu_to_op(qual, op, var, val)   \
> > -do {   \
> > -   typedef typeof(var) pto_T__;\
> > -   if (0) {\
> > -   pto_T__ pto_tmp__;  \
> > -   pto_tmp__ = (val);  \
> > -   (void)pto_tmp__;\
> > -   }   \
> > -   switch (sizeof(var)) {  \
> > -   case 1: \
> > -   asm qual (op "b %1,"__percpu_arg(0) \
> > -   : "+m" (var)\
> > -   : "qi" ((pto_T__)(val)));   \
> > -   break;  \
> > -   case 2: \
> > -   asm qual (op "w %1,"__percpu_arg(0) \
> > -   : "+m" (var)\
> > -   : "ri" ((pto_T__)(val)));   \
> > -   break;  \
> > -   case 4: \
> > -   asm qual (op "l %1,"__percpu_arg(0) \
> > -   : "+m" (var)\
> > -   : "ri" ((pto_T__)(val)));   \
> > -   break;  \
> > -   case 8: \
> > -   asm qual (op "q %1,"__percpu_arg(0) \
> > -   : "+m" (var)\
> > -   : "re" ((pto_T__)(val)));   \
> > -   break;  \
> > -   default: __bad_percpu_size();   \
> > -   }   \
> > +#define percpu_to_op(size, qual, op, _var, _val)   \
> > +do {   \
> > +   __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);\
> > +   if (0) {\
> > +   typeof(_var) pto_tmp__; \
> > +   pto_tmp__ = (_val); \
> > +   (void)pto_tmp__;    \
> > +   }   \
>
> Please replace the whole `if (0)` block with:
> ```c
> __same_type(_var, _val);
> ```
> from include/linux/compiler.h.

The problem with __builtin_types_compatible_p() is that it considers
unsigned long and u64 (aka unsigned long long) as different types even
though they are the same width on x86-64.  While this may be a good
cleanup to look at in the future, it's not a simple drop-in
replacement.

--
Brian Gerst

Re: [PATCH 5/7] x86/percpu: Clean up percpu_add_return_op()

2020-05-18 Thread Brian Gerst

On Mon, May 18, 2020 at 6:46 PM Nick Desaulniers
 wrote:
>
> On Sun, May 17, 2020 at 8:29 AM Brian Gerst  wrote:
> >
> > The core percpu macros already have a switch on the data size, so the switch
> > in the x86 code is redundant and produces more dead code.
> >
> > Also use appropriate types for the width of the instructions.  This avoids
> > errors when compiling with Clang.
> >
> > Signed-off-by: Brian Gerst 
> > ---
> >  arch/x86/include/asm/percpu.h | 51 +++
> >  1 file changed, 16 insertions(+), 35 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
> > index 21c5013a681a..ac8c391a190e 100644
> > --- a/arch/x86/include/asm/percpu.h
> > +++ b/arch/x86/include/asm/percpu.h
> > @@ -199,34 +199,15 @@ do {  
> > \
> >  /*
> >   * Add return operation
> >   */
> > -#define percpu_add_return_op(qual, var, val)   \
> > +#define percpu_add_return_op(size, qual, _var, _val)   \
> >  ({ \
> > -   typeof(var) paro_ret__ = val;   \
> > -   switch (sizeof(var)) {  \
> > -   case 1: \
> > -   asm qual ("xaddb %0, "__percpu_arg(1)   \
> > -   : "+q" (paro_ret__), "+m" (var) \
> > -   : : "memory");  \
> > -   break;  \
> > -   case 2: \
> > -   asm qual ("xaddw %0, "__percpu_arg(1)   \
> > -   : "+r" (paro_ret__), "+m" (var) \
> > -   : : "memory");  \
> > -   break;  \
> > -   case 4: \
> > -   asm qual ("xaddl %0, "__percpu_arg(1)   \
> > -   : "+r" (paro_ret__), "+m" (var) \
> > -   : : "memory");  \
> > -   break;  \
> > -   case 8: \
> > -   asm qual ("xaddq %0, "__percpu_arg(1)   \
> > -   : "+re" (paro_ret__), "+m" (var)\
>
> ^ before we use the "+re" constraint for 8B input.
>
> > -   : : "memory");  \
> > -   break;  \
> > -   default: __bad_percpu_size();   \
>
> Comment on the series as a whole.  After applying the series, the
> final reference to __bad_percpu_size and switch statement in
> arch/x86/include/asm/percpu.h in the definition of the
> percpu_stable_op() macro.  If you clean that up, too, then the rest of
> this file feels more consistent with your series, even if it's not a
> blocker for Clang i386 support. Then you can get rid of
> __bad_percpu_size, too!

I haven't yet figured out what to do with percpu_stable_op().  It's
x86-specific, so there isn't another switch in the core code.  I think
it is supposed to be similar to READ_ONCE() but for percpu variables,
but I'm not 100% sure.

> > -   }   \
> > -   paro_ret__ += val;  \
> > -   paro_ret__; \
> > +   __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val);   \
> > +   asm qual (__pcpu_op2_##size("xadd", "%[tmp]",   \
> > +__percpu_arg([var]))   \
> > + : [tmp] __pcpu_reg_##size("+", paro_tmp__),   \
>
> ^ after, for `size == 8`, we use "+r". [0] says for "e":
>
> 32-bit signed integer constant, or a symbolic reference known to fit
> that range (for immediate operands in sign-extending x86-6

[PATCH 7/7] x86/percpu: Clean up percpu_cmpxchg_op()

2020-05-17 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 58 +++
 1 file changed, 18 insertions(+), 40 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 3c95ab3c99cd..b61d4fc5568e 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -236,39 +236,17 @@ do {  
\
  * cmpxchg has no such implied lock semantics as a result it is much
  * more efficient for cpu local operations.
  */
-#define percpu_cmpxchg_op(qual, var, oval, nval)   \
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval)  \
 ({ \
-   typeof(var) pco_ret__;  \
-   typeof(var) pco_old__ = (oval); \
-   typeof(var) pco_new__ = (nval); \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("cmpxchgb %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "q" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 2: \
-   asm qual ("cmpxchgw %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 4: \
-   asm qual ("cmpxchgl %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   case 8: \
-   asm qual ("cmpxchgq %2, "__percpu_arg(1)\
-   : "=a" (pco_ret__), "+m" (var)  \
-   : "r" (pco_new__), "0" (pco_old__)  \
-   : "memory");\
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pco_ret__;  \
+   __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval);   \
+   __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval);   \
+   asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]",   \
+   __percpu_arg([var]))\
+ : [oval] "+a" (pco_old__),\
+   [var] "+m" (_var)   \
+ : [nval] __pcpu_reg_##size(, pco_new__)   \
+ : "memory");  \
+   (typeof(_var))(unsigned long) pco_old__;\
 })
 
 /*
@@ -336,16 +314,16 @@ do {  
\
 #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, 
val)
 #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, 
val)
 #define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, 
val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, 
oval, nval)

[PATCH 6/7] x86/percpu: Clean up percpu_xchg_op()

2020-05-17 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 61 +++
 1 file changed, 18 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ac8c391a190e..3c95ab3c99cd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -215,46 +215,21 @@ do {  
\
  * expensive due to the implied lock prefix.  The processor cannot prefetch
  * cachelines if xchg is used.
  */
-#define percpu_xchg_op(qual, var, nval)
\
+#define percpu_xchg_op(size, qual, _var, _nval)
\
 ({ \
-   typeof(var) pxo_ret__;  \
-   typeof(var) pxo_new__ = (nval); \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%al"  \
-   "\n1:\tcmpxchgb %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "q" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 2: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%ax"  \
-   "\n1:\tcmpxchgw %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 4: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
-   "\n1:\tcmpxchgl %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   case 8: \
-   asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
-   "\n1:\tcmpxchgq %2, "__percpu_arg(1)\
-   "\n\tjnz 1b"\
-   : "=" (pxo_ret__), "+m" (var) \
-   : "r" (pxo_new__)   \
-   : "memory");\
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pxo_ret__;  \
+   __pcpu_type_##size pxo_old__;   \
+   __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval);   \
+   asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \
+   "%[oval]")  \
+ "\n1:\t"  \
+ __pcpu_op2_##size("cmpxchg", "%[nval]",   \
+   __percpu_arg([var]))\
+ "\n\tjnz 1b"  \
+ : [oval] "=" (pxo_old__),   \
+   [var] "+m" (_var)   \
+

[PATCH 4/7] x86/percpu: Clean up percpu_add_op()

2020-05-17 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 99 ---
 1 file changed, 22 insertions(+), 77 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 93f33202492d..21c5013a681a 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -130,64 +130,32 @@ do {  
\
: [val] __pcpu_reg_imm_##size(pto_val__));  \
 } while (0)
 
+#define percpu_unary_op(size, qual, op, _var)  \
+({ \
+   asm qual (__pcpu_op1_##size(op, __percpu_arg([var]))\
+   : [var] "+m" (_var));   \
+})
+
 /*
  * Generate a percpu add to memory instruction and optimize code
  * if one is added or subtracted.
  */
-#define percpu_add_op(qual, var, val)  \
+#define percpu_add_op(size, qual, var, val)\
 do {   \
-   typedef typeof(var) pao_T__;\
const int pao_ID__ = (__builtin_constant_p(val) &&  \
  ((val) == 1 || (val) == -1)) ?\
(int)(val) : 0; \
if (0) {\
-   pao_T__ pao_tmp__;  \
+   typeof(var) pao_tmp__;  \
pao_tmp__ = (val);  \
(void)pao_tmp__;\
}   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
-   else\
-   asm qual ("addb %1, "__percpu_arg(0)\
-   : "+m" (var)\
-   : "qi" ((pao_T__)(val)));   \
-   break;  \
-   case 2: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
-   else\
-   asm qual ("addw %1, "__percpu_arg(0)\
-   : "+m" (var)\
-   : "ri" ((pao_T__)(val)));   \
-   break;  \
-   case 4: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-   asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
-   else\
-   asm qual ("addl %1, "__percpu_arg(0)\
-   : "+m" (var)\
-   : "ri" ((pao_T__)(val)));   \
-   break;  \
-   case 8: \
-   if (pao_ID__ == 1)  \
-   asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
-   else if (pao_ID__ == -1)\
-

[PATCH 3/7] x86/percpu: Clean up percpu_from_op()

2020-05-17 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 50 +++
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 233c7a78d1a6..93f33202492d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -190,33 +190,13 @@ do {  
\
}   \
 } while (0)
 
-#define percpu_from_op(qual, op, var)  \
-({ \
-   typeof(var) pfo_ret__;  \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual (op "b "__percpu_arg(1)",%0"   \
-   : "=q" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 2: \
-   asm qual (op "w "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 4: \
-   asm qual (op "l "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   case 8: \
-   asm qual (op "q "__percpu_arg(1)",%0"   \
-   : "=r" (pfo_ret__)  \
-   : "m" (var));   \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   pfo_ret__;  \
+#define percpu_from_op(size, qual, op, _var)   \
+({ \
+   __pcpu_type_##size pfo_val__;   \
+   asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]")  \
+   : [val] __pcpu_reg_##size("=", pfo_val__)   \
+   : [var] "m" (_var));\
+   (typeof(_var))(unsigned long) pfo_val__;\
 })
 
 #define percpu_stable_op(op, var)  \
@@ -401,9 +381,9 @@ do {
\
  */
 #define this_cpu_read_stable(var)  percpu_stable_op("mov", var)
 
-#define raw_cpu_read_1(pcp)percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_2(pcp)percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_4(pcp)percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_1(pcp)percpu_from_op(1, , "mov", pcp)
+#define raw_cpu_read_2(pcp)percpu_from_op(2, , "mov", pcp)
+#define raw_cpu_read_4(pcp)percpu_from_op(4, , "mov", pcp)
 
 #define raw_cpu_write_1(pcp, val)  percpu_to_op(1, , "mov", (pcp), val)
 #define raw_cpu_write_2(pcp, val)  percpu_to_op(2, , "mov", (pcp), val)
@@ -433,9 +413,9 @@ do {
\
 #define raw_cpu_xchg_2(pcp, val)   raw_percpu_xchg_op(pcp, val)
 #define raw_cpu_xchg_4(pcp, val)   raw_percpu_xchg_op(pcp, val)
 
-#define this_cpu_read_1(pcp)   percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_read_2(pcp)   percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_read_4(pcp)   percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_1(pcp)   percpu_from_op(1, volatile, "mov", pcp)
+#define this_cpu_read_2(pcp)   percpu_from_op(2, volatile, "mov", pcp)
+#define this_cpu_read_4(pcp)   percpu_from_op(4, volatile, "mov", pcp)
 #define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), 
val)
 #define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), 
val)
 #define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile,

[PATCH 2/7] x86/percpu: Clean up percpu_to_op()

2020-05-17 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 90 ++-
 1 file changed, 35 insertions(+), 55 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 89f918a3e99b..233c7a78d1a6 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -117,37 +117,17 @@ extern void __bad_percpu_size(void);
 #define __pcpu_reg_imm_4(x) "ri" (x)
 #define __pcpu_reg_imm_8(x) "re" (x)
 
-#define percpu_to_op(qual, op, var, val)   \
-do {   \
-   typedef typeof(var) pto_T__;\
-   if (0) {\
-   pto_T__ pto_tmp__;  \
-   pto_tmp__ = (val);  \
-   (void)pto_tmp__;\
-   }   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual (op "b %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "qi" ((pto_T__)(val)));   \
-   break;  \
-   case 2: \
-   asm qual (op "w %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "ri" ((pto_T__)(val)));   \
-   break;  \
-   case 4: \
-   asm qual (op "l %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "ri" ((pto_T__)(val)));   \
-   break;  \
-   case 8: \
-   asm qual (op "q %1,"__percpu_arg(0) \
-   : "+m" (var)\
-   : "re" ((pto_T__)(val)));   \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
+#define percpu_to_op(size, qual, op, _var, _val)   \
+do {   \
+   __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);\
+   if (0) {\
+   typeof(_var) pto_tmp__; \
+   pto_tmp__ = (_val); \
+   (void)pto_tmp__;\
+   }   \
+   asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var]))   \
+   : [var] "+m" (_var) \
+   : [val] __pcpu_reg_imm_##size(pto_val__));  \
 } while (0)
 
 /*
@@ -425,18 +405,18 @@ do {  
\
 #define raw_cpu_read_2(pcp)percpu_from_op(, "mov", pcp)
 #define raw_cpu_read_4(pcp)percpu_from_op(, "mov", pcp)
 
-#define raw_cpu_write_1(pcp, val)  percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val)  percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val)  percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_1(pcp, val)  percpu_to_op(1, , "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val)  percpu_to_op(2, , "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val)  percpu_to_op(4, , "mov", (pcp), val)
 #define raw_cpu_add_1(pcp, val)percpu_add_op(, (pcp), val)
 #define raw_cpu_add_2(pcp, val)percpu_add_op(, (pcp), val)
 #define raw_cpu_add_4(pcp, val)percpu_add_op(, (pcp), val)
-#define raw_cpu_and_1(pcp, val)percpu_to_op(, "and", (pcp), 
val)
-#define raw_cpu_and_2(pcp, val)percpu_to_op(, "and", (pcp), 
val)
-#define raw_cpu_and_4(pcp, val)percpu_to_op(, "and", (pcp), 
val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_or_4(pcp, val)

[PATCH 5/7] x86/percpu: Clean up percpu_add_return_op()

2020-05-17 Thread Brian Gerst

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions.  This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 51 +++
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 21c5013a681a..ac8c391a190e 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -199,34 +199,15 @@ do {  
\
 /*
  * Add return operation
  */
-#define percpu_add_return_op(qual, var, val)   \
+#define percpu_add_return_op(size, qual, _var, _val)   \
 ({ \
-   typeof(var) paro_ret__ = val;   \
-   switch (sizeof(var)) {  \
-   case 1: \
-   asm qual ("xaddb %0, "__percpu_arg(1)   \
-   : "+q" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 2: \
-   asm qual ("xaddw %0, "__percpu_arg(1)   \
-   : "+r" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 4: \
-   asm qual ("xaddl %0, "__percpu_arg(1)   \
-   : "+r" (paro_ret__), "+m" (var) \
-   : : "memory");  \
-   break;  \
-   case 8: \
-   asm qual ("xaddq %0, "__percpu_arg(1)   \
-   : "+re" (paro_ret__), "+m" (var)\
-   : : "memory");  \
-   break;  \
-   default: __bad_percpu_size();   \
-   }   \
-   paro_ret__ += val;  \
-   paro_ret__; \
+   __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val);   \
+   asm qual (__pcpu_op2_##size("xadd", "%[tmp]",   \
+__percpu_arg([var]))   \
+ : [tmp] __pcpu_reg_##size("+", paro_tmp__),   \
+   [var] "+m" (_var)   \
+ : : "memory");\
+   (typeof(_var))(unsigned long) (paro_tmp__ + _val);  \
 })
 
 /*
@@ -377,16 +358,16 @@ do {  
\
 #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
 #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
 
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, 
val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, 
val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, 
val)
 #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
 #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
 #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, 
nval)
 
-#define this_cpu_add_return_1(pcp, val)
percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_add_return_2(pcp, val)
percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_add_return_4(pcp, val)
percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_1(pcp, val)percpu_add_return_op(1, 
volat

[PATCH 0/7] x86: Clean up percpu operations

2020-05-17 Thread Brian Gerst

The core percpu operations already have a switch on the width of the
data type, which resulted in an extra amount of dead code being
generated with the x86 operations having another switch.  This patch set
rewrites the x86 ops to remove the switch.  Additional cleanups are to
use named assembly operands, and to cast variables to the width used in
the assembly to make Clang happy.

Brian Gerst (7):
  x86/percpu: Introduce size abstraction macros
  x86/percpu: Clean up percpu_to_op()
  x86/percpu: Clean up percpu_from_op()
  x86/percpu: Clean up percpu_add_op()
  x86/percpu: Clean up percpu_add_return_op()
  x86/percpu: Clean up percpu_xchg_op()
  x86/percpu: Clean up percpu_cmpxchg_op()

 arch/x86/include/asm/percpu.h | 447 --
 1 file changed, 158 insertions(+), 289 deletions(-)

-- 
2.25.4

[PATCH 1/7] x86/percpu: Introduce size abstraction macros

2020-05-17 Thread Brian Gerst

In preparation for cleaning up the percpu operations, define macros for
abstraction based on the width of the operation.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/percpu.h | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 2278797c769d..89f918a3e99b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -87,6 +87,36 @@
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);
 
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
+
+#define __pcpu_cast_1(val) ((u8)((unsigned long) val))
+#define __pcpu_cast_2(val) ((u16)((unsigned long) val))
+#define __pcpu_cast_4(val) ((u32)((unsigned long) val))
+#define __pcpu_cast_8(val) ((u64)(val))
+
+#define __pcpu_op1_1(op, dst) op "b " dst
+#define __pcpu_op1_2(op, dst) op "w " dst
+#define __pcpu_op1_4(op, dst) op "l " dst
+#define __pcpu_op1_8(op, dst) op "q " dst
+
+#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
+#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
+#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
+#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
+
+#define __pcpu_reg_1(out, x) out "q" (x)
+#define __pcpu_reg_2(out, x) out "r" (x)
+#define __pcpu_reg_4(out, x) out "r" (x)
+#define __pcpu_reg_8(out, x) out "r" (x)
+
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
+
 #define percpu_to_op(qual, op, var, val)   \
 do {   \
typedef typeof(var) pto_T__;\
-- 
2.25.4

Re: [PATCH] x86: support i386 with Clang

2020-05-11 Thread Brian Gerst

On Mon, May 11, 2020 at 3:34 PM Brian Gerst  wrote:
>
> On Mon, May 11, 2020 at 2:46 PM Nick Desaulniers
>  wrote:
> >
> > On Mon, May 11, 2020 at 11:09 AM Brian Gerst  wrote:
> > > This looks like the same issue that we just discussed for bitops.h.
> > > Add the "b" operand size modifier to force it to use the 8-bit
> > > register names (and probably also needs the "w" modifier in the 16-bit
> > > case).
> >
> > While it does feel familiar, it is slightly different.
> > https://godbolt.org/z/Rme4Zg
> > That case was both compilers validating the inline asm, yet generating
> > assembly that the assembler would choke on.  This case is validation
> > in the front end failing.
>
> > long long ret;
> > switch (sizeof(ret)) {
> > case 1:
> > asm ("movb $5, %0" : "=q" (ret));
> > break;
> > case 8:;
> > }
>
> So if the issue here is that the output variable type is long long,
> what code is using a 64-bit percpu variable on a 32-bit kernel?  Can
> you give a specific file that fails to build with Clang?  If Clang is
> choking on it it may be silently miscompiling on GCC.

On further investigation, 64-bit percpu operations fall back to the
generic code on x86-32, so there is no problem with miscompiling here.

On a side note from looking at the preprocessed output of the percpu
macros: they generate a ton of extra dead code because the core macros
also have a switch on data size.  I will take a stab at cleaning that
up.

--
Brian Gerst

Re: [PATCH] x86: support i386 with Clang

2020-05-11 Thread Brian Gerst

On Mon, May 11, 2020 at 2:46 PM Nick Desaulniers
 wrote:
>
> On Mon, May 11, 2020 at 11:09 AM Brian Gerst  wrote:
> > This looks like the same issue that we just discussed for bitops.h.
> > Add the "b" operand size modifier to force it to use the 8-bit
> > register names (and probably also needs the "w" modifier in the 16-bit
> > case).
>
> While it does feel familiar, it is slightly different.
> https://godbolt.org/z/Rme4Zg
> That case was both compilers validating the inline asm, yet generating
> assembly that the assembler would choke on.  This case is validation
> in the front end failing.

> long long ret;
> switch (sizeof(ret)) {
> case 1:
> asm ("movb $5, %0" : "=q" (ret));
> break;
> case 8:;
> }

So if the issue here is that the output variable type is long long,
what code is using a 64-bit percpu variable on a 32-bit kernel?  Can
you give a specific file that fails to build with Clang?  If Clang is
choking on it it may be silently miscompiling on GCC.

--
Brian Gerst

Re: [PATCH v5] x86: bitops: fix build regression

2020-05-11 Thread Brian Gerst

On Fri, May 8, 2020 at 2:32 PM Nick Desaulniers  wrote:
>
> This is easily reproducible via CC=clang+CONFIG_STAGING=y+CONFIG_VT6656=m.
>
> It turns out that if your config tickles __builtin_constant_p via
> differences in choices to inline or not, these statements produce
> invalid assembly:
>
> $ cat foo.c
> long a(long b, long c) {
>   asm("orb\t%1, %0" : "+q"(c): "r"(b));
>   return c;
> }
> $ gcc foo.c
> foo.c: Assembler messages:
> foo.c:2: Error: `%rax' not allowed with `orb'
>
> Use the `%b` "x86 Operand Modifier" to instead force register allocation
> to select a lower-8-bit GPR operand.
>
> The "q" constraint only has meaning on -m32 otherwise is treated as
> "r". Not all GPRs have low-8-bit aliases for -m32.
>
> Cc: Jesse Brandeburg 
> Link: https://github.com/ClangBuiltLinux/linux/issues/961
> Link: https://lore.kernel.org/lkml/20200504193524.ga221...@google.com/
> Link: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#x86Operandmodifiers
> Fixes: 1651e700664b4 ("x86: Fix bitops.h warning with a moved cast")
> Reported-by: Sedat Dilek 
> Reported-by: kernelci.org bot 
> Suggested-by: Andy Shevchenko 
> Suggested-by: Brian Gerst 
> Suggested-by: H. Peter Anvin 
> Suggested-by: Ilie Halip 
> Signed-off-by: Nick Desaulniers 

Reviewed-By: Brian Gerst

Re: [PATCH] x86: support i386 with Clang

2020-05-11 Thread Brian Gerst

\
> > asm qual ("addb %1, "__percpu_arg(0)\
> > : "+m" (var)\
> > -   : "qi" ((pao_T__)(val)));   \
> > +   : "qi" ((unsigned char)(unsigned long)(val))); \
> > break;  \
> > case 2: \
> > if (pao_ID__ == 1)  \
> > @@ -182,12 +182,14 @@ do {  
> > \
> >
> >  #define percpu_from_op(qual, op, var)  \
> >  ({ \
> > +   unsigned char pfo_u8__; \
> > typeof(var) pfo_ret__;  \
> > switch (sizeof(var)) {  \
> > case 1: \
> > asm qual (op "b "__percpu_arg(1)",%0"   \
> > -   : "=q" (pfo_ret__)  \
> > +   : "=q" (pfo_u8__)   \
> > : "m" (var));   \
> > +   pfo_ret__ = (typeof(var))(unsigned long)pfo_u8__;   \
> > break;  \
> > case 2: \
> > asm qual (op "w "__percpu_arg(1)",%0"   \
> > @@ -211,12 +213,14 @@ do {  
> > \
> >
> >  #define percpu_stable_op(op, var)  \
> >  ({ \
> > +   unsigned char pfo_u8__; \
> > typeof(var) pfo_ret__;  \
> > switch (sizeof(var)) {  \
> > case 1: \
> > asm(op "b "__percpu_arg(P1)",%0"\
> > -   : "=q" (pfo_ret__)  \
> > +   : "=q" (pfo_u8__)   \
> > : "p" (&(var)));\
> > +   pfo_ret__ = (typeof(var))(unsigned long)pfo_u8__;   \
> > break;  \
> > case 2: \
> > asm(op "w "__percpu_arg(P1)",%0"\
> > diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> > index d8f283b9a569..cf8483cd80e1 100644
> > --- a/arch/x86/include/asm/uaccess.h
> > +++ b/arch/x86/include/asm/uaccess.h
> > @@ -314,11 +314,13 @@ do {  
> > \
> >
> >  #define __get_user_size(x, ptr, size, retval)  \
> >  do {       \
> > +   unsigned char x_u8__;   \
> > retval = 0; \
> > __chk_user_ptr(ptr);\
> > switch (size) { \
> > case 1: \
> > -   __get_user_asm(x, ptr, retval, "b", "=q");  \
> > +   __get_user_asm(x_u8__, ptr, retval, "b", "=q"); \
> > +   (x) = x_u8__;   \
> > break;  \
> > case 2: \
> > __get_user_asm(x, ptr, retval, "w", "=r");  \
> > --
> > 2.26.2.526.g744177e7f7-goog
> >
>
>
> --
> Thanks,
> ~Nick Desaulniers

This looks like the same issue that we just discussed for bitops.h.
Add the "b" operand size modifier to force it to use the 8-bit
register names (and probably also needs the "w" modifier in the 16-bit
case).

--
Brian Gerst

Re: [PATCH v5] x86: bitops: fix build regression

2020-05-09 Thread Brian Gerst

On Sat, May 9, 2020 at 8:20 AM Andy Shevchenko
 wrote:
>
> On Fri, May 8, 2020 at 9:35 PM Nick Desaulniers  
> wrote:
> >
> > This is easily reproducible via CC=clang+CONFIG_STAGING=y+CONFIG_VT6656=m.
> >
> > It turns out that if your config tickles __builtin_constant_p via
> > differences in choices to inline or not, these statements produce
> > invalid assembly:
> >
> > $ cat foo.c
> > long a(long b, long c) {
> >   asm("orb\t%1, %0" : "+q"(c): "r"(b));
> >   return c;
> > }
> > $ gcc foo.c
> > foo.c: Assembler messages:
> > foo.c:2: Error: `%rax' not allowed with `orb'
> >
> > Use the `%b` "x86 Operand Modifier" to instead force register allocation
> > to select a lower-8-bit GPR operand.
> >
> > The "q" constraint only has meaning on -m32 otherwise is treated as
> > "r". Not all GPRs have low-8-bit aliases for -m32.
> >
>
> Looks very good!
> One question though, does it work with minimum supported version of gcc?

Yes.  The operand width modifiers have been around a long time but not
well documented until more recently.

--
Brian Gerst

Re: [PATCH v3] x86: bitops: fix build regression

2020-05-08 Thread Brian Gerst

On Fri, May 8, 2020 at 2:06 PM Nick Desaulniers  wrote:
>
> This is easily reproducible via CC=clang+CONFIG_STAGING=y+CONFIG_VT6656=m.
>
> It turns out that if your config tickles __builtin_constant_p via
> differences in choices to inline or not, these statements produce
> invalid assembly:
>
> $ cat foo.c
> long a(long b, long c) {
>   asm("orb\t%1, %0" : "+q"(c): "r"(b));
>   return c;
> }
> $ gcc foo.c
> foo.c: Assembler messages:
> foo.c:2: Error: `%rax' not allowed with `orb'
>
> Use the `%b` "x86 Operand Modifier" to instead force register allocation
> to select a lower-8-bit GPR operand.
>
> The "q" constraint only has meaning on -m32 otherwise is treated as
> "r". Not all GPRs have low-8-bit aliases for -m32.
>
> Cc: Jesse Brandeburg 
> Link: https://github.com/ClangBuiltLinux/linux/issues/961
> Link: https://lore.kernel.org/lkml/20200504193524.ga221...@google.com/
> Link: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#x86Operandmodifiers
> Fixes: 1651e700664b4 ("x86: Fix bitops.h warning with a moved cast")
> Reported-by: Sedat Dilek 
> Reported-by: kernelci.org bot 
> Suggested-by: Andy Shevchenko 
> Suggested-by: Brian Gerst 
> Suggested-by: H. Peter Anvin 
> Suggested-by: Ilie Halip 
> Signed-off-by: Nick Desaulniers 
> ---
> Changes V2 -> V3:
> * use `%b` "x86 Operand Modifier" instead of bitwise op then cast.
> * reword commit message.
> * add Brian and HPA suggested by tags
> * drop Nathan & Sedat Tested by/reviewed by tags (new patch is different
>   enough).
>
> Changes V1 -> V2:
> * change authorship/signed-off-by to Ilie
> * add Nathan's Tested by/reviewed by
> * update commit message slightly with info sent to HPA.
>
>  arch/x86/include/asm/bitops.h | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
> index b392571c1f1d..03e24286e4eb 100644
> --- a/arch/x86/include/asm/bitops.h
> +++ b/arch/x86/include/asm/bitops.h
> @@ -52,9 +52,9 @@ static __always_inline void
>  arch_set_bit(long nr, volatile unsigned long *addr)
>  {
> if (__builtin_constant_p(nr)) {
> -   asm volatile(LOCK_PREFIX "orb %1,%0"
> +   asm volatile(LOCK_PREFIX "orb %b1,%0"
> : CONST_MASK_ADDR(nr, addr)
> -   : "iq" (CONST_MASK(nr) & 0xff)
> +   : "iq" (CONST_MASK(nr))
> : "memory");
> } else {
> asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
> @@ -72,9 +72,9 @@ static __always_inline void
>  arch_clear_bit(long nr, volatile unsigned long *addr)
>  {
> if (__builtin_constant_p(nr)) {
> -   asm volatile(LOCK_PREFIX "andb %1,%0"
> +   asm volatile(LOCK_PREFIX "andb %b1,%0"
>         : CONST_MASK_ADDR(nr, addr)
> -   : "iq" (CONST_MASK(nr) ^ 0xff));
> +   : "iq" (~CONST_MASK(nr)));
> } else {
> asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
> : : RLONG_ADDR(addr), "Ir" (nr) : "memory");

arch_change_bit() should also be changed, but otherwise looks good.

--
Brian Gerst

Re: [PATCH] x86: bitops: fix build regression

2020-05-07 Thread Brian Gerst

On Thu, May 7, 2020 at 6:29 PM Nick Desaulniers  wrote:
>
> On Thu, May 7, 2020 at 12:19 PM Nick Desaulniers
>  wrote:
> >
> > On Thu, May 7, 2020 at 7:00 AM Brian Gerst  wrote:
> > >
> > > This change will make sparse happy and allow these cleanups:
> > > #define CONST_MASK(nr) ((u8)1 << ((nr) & 7))
> >
> > yep, this is more elegant, IMO.  Will send a v3 later with this
> > change.  Looking at the uses of CONST_MASK, I noticed
> > arch_change_bit() currently has the (u8) cast from commit
> > 838e8bb71dc0c ("x86: Implement change_bit with immediate operand as
> > "lock xorb""), so that instance can get cleaned up with the above
> > suggestion.
>
> Oh, we need the cast to be the final operation.  The binary AND and
> XOR in 2 of the 3 uses of CONST_MASK implicitly promote the operands
> of the binary operand to int, so the type of the evaluated
> subexpression is int.
> https://wiki.sei.cmu.edu/confluence/display/c/EXP14-C.+Beware+of+integer+promotion+when+performing+bitwise+operations+on+integer+types+smaller+than+int
> So I think this version (v2) is most precise fix, and would be better
> than defining more macros or (worse) using metaprogramming.

One last suggestion.  Add the "b" modifier to the mask operand: "orb
%b1, %0".  That forces the compiler to use the 8-bit register name
instead of trying to deduce the width from the input.

--
Brian Gerst

Re: [PATCH] x86: bitops: fix build regression

2020-05-07 Thread Brian Gerst

On Thu, May 7, 2020 at 7:38 AM Peter Zijlstra  wrote:
>
> On Tue, May 05, 2020 at 11:07:24AM -0700, h...@zytor.com wrote:
> > On May 5, 2020 10:44:22 AM PDT, Nick Desaulniers  
> > wrote:
>
> > >@@ -54,7 +54,7 @@ arch_set_bit(long nr, volatile unsigned long *addr)
> > > if (__builtin_constant_p(nr)) {
> > > asm volatile(LOCK_PREFIX "orb %1,%0"
> > > : CONST_MASK_ADDR(nr, addr)
> > >-: "iq" (CONST_MASK(nr) & 0xff)
> > >+: "iq" ((u8)(CONST_MASK(nr) & 0xff))
> > > : "memory");
> > > } else {
> > > asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
> > >@@ -74,7 +74,7 @@ arch_clear_bit(long nr, volatile unsigned long *addr)
> > > if (__builtin_constant_p(nr)) {
> > > asm volatile(LOCK_PREFIX "andb %1,%0"
> > > : CONST_MASK_ADDR(nr, addr)
> > >-: "iq" (CONST_MASK(nr) ^ 0xff));
> > >+: "iq" ((u8)(CONST_MASK(nr) ^ 0xff)));
> > > } else {
> > > asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
> > > : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
> >
> > Drop & 0xff and change ^ 0xff to ~.
>
> But then we're back to sparse being unhappy, no? The thing with ~ is
> that it will set high bits which will be truncated, which makes sparse
> sad.

This change will make sparse happy and allow these cleanups:
#define CONST_MASK(nr) ((u8)1 << ((nr) & 7))

Tested with GCC 9.3.1.

--
Brian Gerst

Re: [PATCH] x86: bitops: fix build regression

2020-05-07 Thread Brian Gerst

On Thu, May 7, 2020 at 3:02 AM  wrote:
>
> On May 6, 2020 11:18:09 PM PDT, Brian Gerst  wrote:
> >On Tue, May 5, 2020 at 1:47 PM Nick Desaulniers
> > wrote:
> >>
> >> From: Sedat Dilek 
> >>
> >> It turns out that if your config tickles __builtin_constant_p via
> >> differences in choices to inline or not, this now produces invalid
> >> assembly:
> >>
> >> $ cat foo.c
> >> long a(long b, long c) {
> >>   asm("orb\t%1, %0" : "+q"(c): "r"(b));
> >>   return c;
> >> }
> >> $ gcc foo.c
> >> foo.c: Assembler messages:
> >> foo.c:2: Error: `%rax' not allowed with `orb'
> >>
> >> The "q" constraint only has meanting on -m32 otherwise is treated as
> >> "r".
> >>
> >> This is easily reproducible via
> >Clang+CONFIG_STAGING=y+CONFIG_VT6656=m,
> >> or Clang+allyesconfig.
> >>
> >> Keep the masking operation to appease sparse (`make C=1`), add back
> >the
> >> cast in order to properly select the proper 8b register alias.
> >>
> >>  [Nick: reworded]
> >>
> >> Cc: sta...@vger.kernel.org
> >> Cc: Jesse Brandeburg 
> >> Link: https://github.com/ClangBuiltLinux/linux/issues/961
> >> Link:
> >https://lore.kernel.org/lkml/20200504193524.ga221...@google.com/
> >> Fixes: 1651e700664b4 ("x86: Fix bitops.h warning with a moved cast")
> >> Reported-by: Sedat Dilek 
> >> Reported-by: kernelci.org bot 
> >> Suggested-by: Andy Shevchenko 
> >> Suggested-by: Ilie Halip 
> >> Tested-by: Sedat Dilek 
> >> Signed-off-by: Sedat Dilek 
> >> Signed-off-by: Nick Desaulniers 
> >> ---
> >>  arch/x86/include/asm/bitops.h | 4 ++--
> >>  1 file changed, 2 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/bitops.h
> >b/arch/x86/include/asm/bitops.h
> >> index b392571c1f1d..139122e5b25b 100644
> >> --- a/arch/x86/include/asm/bitops.h
> >> +++ b/arch/x86/include/asm/bitops.h
> >> @@ -54,7 +54,7 @@ arch_set_bit(long nr, volatile unsigned long *addr)
> >> if (__builtin_constant_p(nr)) {
> >>     asm volatile(LOCK_PREFIX "orb %1,%0"
> >> : CONST_MASK_ADDR(nr, addr)
> >> -   : "iq" (CONST_MASK(nr) & 0xff)
> >> +   : "iq" ((u8)(CONST_MASK(nr) & 0xff))
> >
> >I think a better fix would be to make CONST_MASK() return a u8 value
> >rather than have to cast on every use.
> >
> >Also I question the need for the "q" constraint.  It was added in
> >commit 437a0a54 as a workaround for GCC 3.4.4.  Now that the minimum
> >GCC version is 4.6, is this still necessary?
> >
> >--
> >Brian Gerst
>
> Yes, "q" is needed on i386.

I think the bug this worked around was that the compiler didn't detect
that CONST_MASK(nr) was also constant and doesn't need to be put into
a register.  The question is does that bug still exist on compiler
versions we care about?

--
Brian Gerst

Re: [PATCH] x86: bitops: fix build regression

2020-05-07 Thread Brian Gerst

On Tue, May 5, 2020 at 1:47 PM Nick Desaulniers  wrote:
>
> From: Sedat Dilek 
>
> It turns out that if your config tickles __builtin_constant_p via
> differences in choices to inline or not, this now produces invalid
> assembly:
>
> $ cat foo.c
> long a(long b, long c) {
>   asm("orb\t%1, %0" : "+q"(c): "r"(b));
>   return c;
> }
> $ gcc foo.c
> foo.c: Assembler messages:
> foo.c:2: Error: `%rax' not allowed with `orb'
>
> The "q" constraint only has meanting on -m32 otherwise is treated as
> "r".
>
> This is easily reproducible via Clang+CONFIG_STAGING=y+CONFIG_VT6656=m,
> or Clang+allyesconfig.
>
> Keep the masking operation to appease sparse (`make C=1`), add back the
> cast in order to properly select the proper 8b register alias.
>
>  [Nick: reworded]
>
> Cc: sta...@vger.kernel.org
> Cc: Jesse Brandeburg 
> Link: https://github.com/ClangBuiltLinux/linux/issues/961
> Link: https://lore.kernel.org/lkml/20200504193524.ga221...@google.com/
> Fixes: 1651e700664b4 ("x86: Fix bitops.h warning with a moved cast")
> Reported-by: Sedat Dilek 
> Reported-by: kernelci.org bot 
> Suggested-by: Andy Shevchenko 
> Suggested-by: Ilie Halip 
> Tested-by: Sedat Dilek 
> Signed-off-by: Sedat Dilek 
> Signed-off-by: Nick Desaulniers 
> ---
>  arch/x86/include/asm/bitops.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
> index b392571c1f1d..139122e5b25b 100644
> --- a/arch/x86/include/asm/bitops.h
> +++ b/arch/x86/include/asm/bitops.h
> @@ -54,7 +54,7 @@ arch_set_bit(long nr, volatile unsigned long *addr)
> if (__builtin_constant_p(nr)) {
> asm volatile(LOCK_PREFIX "orb %1,%0"
> : CONST_MASK_ADDR(nr, addr)
> -   : "iq" (CONST_MASK(nr) & 0xff)
> +   : "iq" ((u8)(CONST_MASK(nr) & 0xff))

I think a better fix would be to make CONST_MASK() return a u8 value
rather than have to cast on every use.

Also I question the need for the "q" constraint.  It was added in
commit 437a0a54 as a workaround for GCC 3.4.4.  Now that the minimum
GCC version is 4.6, is this still necessary?

--
Brian Gerst

Re: [patch V4 part 1 19/36] x86/entry: Exclude low level entry code from sanitizing

2020-05-05 Thread Brian Gerst

On Tue, May 5, 2020 at 10:13 AM Thomas Gleixner  wrote:
>
> The sanitizers are not really applicable to the fragile low level entry
> code. code. Entry code needs to carefully setup a normal 'runtime'
> environment.
>
> Signed-off-by: Peter Zijlstra (Intel) 
> Signed-off-by: Thomas Gleixner 
> ---
>  arch/x86/entry/Makefile |8 
>  1 file changed, 8 insertions(+)
>
> --- a/arch/x86/entry/Makefile
> +++ b/arch/x86/entry/Makefile
> @@ -3,6 +3,14 @@
>  # Makefile for the x86 low level entry code
>  #
>
> +KASAN_SANITIZE := n
> +UBSAN_SANITIZE := n
> +KCOV_INSTRUMENT := n
> +
> +CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) -fstack-protector 
> -fstack-protector-strong
> +CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE) -fstack-protector 
> -fstack-protector-strong
> +CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) -fstack-protector 
> -fstack-protector-strong

Is this necessary for syscall_*.o?  They just contain the syscall
tables (ie. data).

--
Brian Gerst

Re: [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives

2020-04-29 Thread Brian Gerst

On Wed, Apr 29, 2020 at 6:18 AM Peter Zijlstra  wrote:
>
> On Wed, Apr 29, 2020 at 10:30:53AM +0200, Peter Zijlstra wrote:
> > > POPF is an expensive instruction that should be avoided if possible.
> > > A better solution would be to have the alternative jump over the
> > > push/pop when SMAP is disabled.
> >
> > Yeah. I think I had that, but then confused myself again. I don't think
> > it matters much if you look at where it's used though.
> >
> > Still, let me try the jmp thing again..
>
> Here goes..
>
> ---
> Subject: x86,smap: Fix smap_{save,restore}() alternatives
> From: Peter Zijlstra 
> Date: Tue Apr 28 19:57:59 CEST 2020
>
> As reported by objtool:
>
>   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative 
> modifies stack
>   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative 
> modifies stack
>
> the smap_{save,restore}() alternatives violate (the newly enforced)
> rule on stack invariance. That is, due to there only being a single
> ORC table it must be valid to any alternative. These alternatives
> violate this with the direct result that unwinds will not be correct
> when it hits between the PUSH and POP instructions.
>
> Rewrite the functions to only have a conditional jump.
>
> Signed-off-by: Peter Zijlstra (Intel) 
> ---
>  arch/x86/include/asm/smap.h |   11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> --- a/arch/x86/include/asm/smap.h
> +++ b/arch/x86/include/asm/smap.h
> @@ -57,8 +57,10 @@ static __always_inline unsigned long sma
>  {
> unsigned long flags;
>
> -   asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
> - X86_FEATURE_SMAP)
> +   asm volatile ("# smap_save\n\t"
> + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
> + "pushf; pop %0; " __ASM_CLAC "\n\t"
> + "1:"
>   : "=rm" (flags) : : "memory", "cc");
>
> return flags;
> @@ -66,7 +68,10 @@ static __always_inline unsigned long sma
>
>  static __always_inline void smap_restore(unsigned long flags)
>  {
> -   asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
> +   asm volatile ("# smap_restore\n\t"
> + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
> + "push %0; popf\n\t"
> + "1:"
>   : : "g" (flags) : "memory", "cc");
>  }
>

Looks good.  Alternatively, you could use static_cpu_has(X86_FEATURE_SMAP).

--
Brian Gerst

Re: [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives

2020-04-28 Thread Brian Gerst

On Tue, Apr 28, 2020 at 3:21 PM Peter Zijlstra  wrote:
>
> As reported by objtool:
>
>   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative 
> modifies stack
>   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative 
> modifies stack
>
> the smap_{save,restore}() alternatives violate (the newly enforced)
> rule on stack invariance. That is, due to there only being a single
> ORC table it must be valid to any alternative. These alternatives
> violate this with the direct result that unwinds will not be correct
> in between these calls.
>
> [ In specific, since we force SMAP on for objtool, running on !SMAP
> hardware will observe a different stack-layout and the ORC unwinder
> will stumble. ]
>
> So rewrite the functions to unconditionally save/restore the flags,
> which gives an invariant stack layout irrespective of the SMAP state.
>
> Signed-off-by: Peter Zijlstra (Intel) 
> ---
>  arch/x86/include/asm/smap.h |   11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/include/asm/smap.h
> +++ b/arch/x86/include/asm/smap.h
> @@ -57,16 +57,19 @@ static __always_inline unsigned long sma
>  {
> unsigned long flags;
>
> -   asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
> - X86_FEATURE_SMAP)
> - : "=rm" (flags) : : "memory", "cc");
> +   asm volatile ("# smap_save\n\t"
> + "pushf; pop %0"
> + : "=rm" (flags) : : "memory");
> +
> +   clac();
>
> return flags;
>  }
>
>  static __always_inline void smap_restore(unsigned long flags)
>  {
> -   asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
> +   asm volatile ("# smap_restore\n\t"
> +     "push %0; popf"
>   : : "g" (flags) : "memory", "cc");
>  }

POPF is an expensive instruction that should be avoided if possible.
A better solution would be to have the alternative jump over the
push/pop when SMAP is disabled.

--
Brian Gerst

Re: [PATCH] sched/x86: Save [ER]FLAGS on context switch

2019-02-15 Thread Brian Gerst

On Thu, Feb 14, 2019 at 2:34 PM Peter Zijlstra  wrote:
>
> On Thu, Feb 14, 2019 at 11:18:01AM -0500, Brian Gerst wrote:
>
> > > --- a/arch/x86/include/asm/switch_to.h
> > > +++ b/arch/x86/include/asm/switch_to.h
> > > @@ -40,6 +40,7 @@ asmlinkage void ret_from_fork(void);
> > >   * order of the fields must match the code in __switch_to_asm().
> > >   */
> > >  struct inactive_task_frame {
> > > +   unsigned long flags;
> > >  #ifdef CONFIG_X86_64
> > > unsigned long r15;
> > > unsigned long r14;
> >
> > flags should be initialized in copy_thread_tls().  I think the new
> > stack is zeroed already, but it would be better to explicitly set it.
>
> Ah indeed. I somehow misread that code and thought we got initialized
> with a copy of current.
>
> Something like the below, right?
>
> ---
>
> --- a/arch/x86/kernel/process_32.c
> +++ b/arch/x86/kernel/process_32.c
> @@ -127,6 +127,7 @@ int copy_thread_tls(unsigned long clone_
> struct task_struct *tsk;
> int err;
>
> +   frame->flags = 0;
> frame->bp = 0;
> frame->ret_addr = (unsigned long) ret_from_fork;
> p->thread.sp = (unsigned long) fork_frame;
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -392,6 +392,7 @@ int copy_thread_tls(unsigned long clone_
> childregs = task_pt_regs(p);
> fork_frame = container_of(childregs, struct fork_frame, regs);
> frame = _frame->frame;
> +   frame->flags = 0;
> frame->bp = 0;
> frame->ret_addr = (unsigned long) ret_from_fork;
> p->thread.sp = (unsigned long) fork_frame;

Yes, this looks good.

--
Brian Gerst

Re: [PATCH] sched/x86: Save [ER]FLAGS on context switch

2019-02-14 Thread Brian Gerst

On Thu, Feb 14, 2019 at 5:14 AM Peter Zijlstra  wrote:
>
> On Wed, Feb 13, 2019 at 02:49:47PM -0800, Andy Lutomirski wrote:
>
> > Do we need to backport this thing?
>
> Possibly, just to be safe.
>
> > The problem can’t be too widespread or we would have heard of it before.
>
> Yes, so far we've been lucky.
>
> ---
> Subject: sched/x86: Save [ER]FLAGS on context switch
> From: Peter Zijlstra 
> Date: Thu Feb 14 10:30:52 CET 2019
>
> Effectively revert commit:
>
>   2c7577a75837 ("sched/x86_64: Don't save flags on context switch")
>
> Specifically because SMAP uses FLAGS.AC which invalidates the claim
> that the kernel has clean flags.
>
> In particular; while preemption from interrupt return is fine (the
> IRET frame on the exception stack contains FLAGS) it breaks any code
> that does synchonous scheduling, including preempt_enable().
>
> This has become a significant issue ever since commit:
>
>   5b24a7a2aa20 ("Add 'unsafe' user access functions for batched accesses")
>
> provided for means of having 'normal' C code between STAC / CLAC,
> exposing the FLAGS.AC state. So far this hasn't led to trouble,
> however fix it before it comes apart.
>
> Fixes: 5b24a7a2aa20 ("Add 'unsafe' user access functions for batched 
> accesses")
> Acked-by: Andy Lutomirski 
> Reported-by: Julien Thierry 
> Signed-off-by: Peter Zijlstra (Intel) 
> ---
>  arch/x86/entry/entry_32.S|2 ++
>  arch/x86/entry/entry_64.S|2 ++
>  arch/x86/include/asm/switch_to.h |1 +
>  3 files changed, 5 insertions(+)
>
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -650,6 +650,7 @@ ENTRY(__switch_to_asm)
> pushl   %ebx
> pushl   %edi
> pushl   %esi
> +   pushfl
>
> /* switch stack */
> movl%esp, TASK_threadsp(%eax)
> @@ -672,6 +673,7 @@ ENTRY(__switch_to_asm)
>  #endif
>
> /* restore callee-saved registers */
> +   popfl
> popl%esi
> popl%edi
> popl%ebx
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -291,6 +291,7 @@ ENTRY(__switch_to_asm)
> pushq   %r13
> pushq   %r14
> pushq   %r15
> +   pushfq
>
> /* switch stack */
> movq%rsp, TASK_threadsp(%rdi)
> @@ -313,6 +314,7 @@ ENTRY(__switch_to_asm)
>  #endif
>
> /* restore callee-saved registers */
> +   popfq
> popq%r15
> popq%r14
> popq%r13
> --- a/arch/x86/include/asm/switch_to.h
> +++ b/arch/x86/include/asm/switch_to.h
> @@ -40,6 +40,7 @@ asmlinkage void ret_from_fork(void);
>   * order of the fields must match the code in __switch_to_asm().
>   */
>  struct inactive_task_frame {
> +   unsigned long flags;
>  #ifdef CONFIG_X86_64
> unsigned long r15;
> unsigned long r14;

flags should be initialized in copy_thread_tls().  I think the new
stack is zeroed already, but it would be better to explicitly set it.

--
Brian Gerst

Re: [PATCH] Reduce boot header size with 1 byte

2018-09-27 Thread Brian Gerst

On Thu, Sep 27, 2018 at 9:42 AM  wrote:
>
> From: Alexander F. Rødseth 
>
> Only ah needs to be set to 0 before calling interrupt 0x16 for waiting
> for a keypress.
>
> This patch changes the line that uses xor so that it only zeroes "ah" instead 
> of "ax".
> This saves a byte.
>
> Signed-off-by: Alexander F. Rødseth 
> ---
>  arch/x86/boot/header.S | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
> index 850b8762e889..905cb96f43d4 100644
> --- a/arch/x86/boot/header.S
> +++ b/arch/x86/boot/header.S
> @@ -71,14 +71,15 @@ msg_loop:
> jmp msg_loop
>
>  bs_die:
> -   # Allow the user to press a key, then reboot
> -   xorw%ax, %ax
> +   # Allow the user to press a key
> +   xorb%ah, %ah

Because this is code running in 16-bit real mode, xorw does not need a
16-bit prefix and only uses 2 bytes.  You save nothing by using xorb.

--
Brian Gerst

Re: [PATCH] Reduce boot header size with 1 byte

2018-09-27 Thread Brian Gerst

On Thu, Sep 27, 2018 at 9:42 AM  wrote:
>
> From: Alexander F. Rødseth 
>
> Only ah needs to be set to 0 before calling interrupt 0x16 for waiting
> for a keypress.
>
> This patch changes the line that uses xor so that it only zeroes "ah" instead 
> of "ax".
> This saves a byte.
>
> Signed-off-by: Alexander F. Rødseth 
> ---
>  arch/x86/boot/header.S | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
> index 850b8762e889..905cb96f43d4 100644
> --- a/arch/x86/boot/header.S
> +++ b/arch/x86/boot/header.S
> @@ -71,14 +71,15 @@ msg_loop:
> jmp msg_loop
>
>  bs_die:
> -   # Allow the user to press a key, then reboot
> -   xorw%ax, %ax
> +   # Allow the user to press a key
> +   xorb%ah, %ah

Because this is code running in 16-bit real mode, xorw does not need a
16-bit prefix and only uses 2 bytes.  You save nothing by using xorb.

--
Brian Gerst

Re: [PATCH] x86: kvm: Restrict X86_FEATURE_VMMCALL to x86_64 platform

2018-08-01 Thread Brian Gerst

On Tue, Jul 31, 2018 at 9:00 AM Paolo Bonzini  wrote:
>
> On 31/07/2018 14:57, tedheadster wrote:
> >>
> >> This shouldn't be necessary; for systems that don't have virtualization
> >> extensions, the comment explains why setting X86_FEATURE_VMMCALL is safe.
> >>
> >> But it is also wrong, because you can run a 32-bit kernel as a guest on
> >> a 64-bit processor, and then it should set X86_FEATURE_VMMCALL because
> >> the processor has the vmmcall instruction and not Intel's vmcall.
> >>
> >
> > Paolo,
> >   I'm running this on a bare metal machine (no virtualization) with a
> > 32-bit AMD i486 class cpu. Should the feature be showing up in
> > /proc/cpuinfo under the 'flags' line? It does on my machine, and it
> > looked wrong to me.
>
> It's a bit silly, but it's not particularly wrong.

Why is there even a specific feature flag for VMMCALL?  Isn't
X86_FEATURE_SVM sufficient to differentiate which opcode to use?

--
Brian Gerst

Re: [PATCH] x86: kvm: Restrict X86_FEATURE_VMMCALL to x86_64 platform

2018-08-01 Thread Brian Gerst

On Tue, Jul 31, 2018 at 9:00 AM Paolo Bonzini  wrote:
>
> On 31/07/2018 14:57, tedheadster wrote:
> >>
> >> This shouldn't be necessary; for systems that don't have virtualization
> >> extensions, the comment explains why setting X86_FEATURE_VMMCALL is safe.
> >>
> >> But it is also wrong, because you can run a 32-bit kernel as a guest on
> >> a 64-bit processor, and then it should set X86_FEATURE_VMMCALL because
> >> the processor has the vmmcall instruction and not Intel's vmcall.
> >>
> >
> > Paolo,
> >   I'm running this on a bare metal machine (no virtualization) with a
> > 32-bit AMD i486 class cpu. Should the feature be showing up in
> > /proc/cpuinfo under the 'flags' line? It does on my machine, and it
> > looked wrong to me.
>
> It's a bit silly, but it's not particularly wrong.

Why is there even a specific feature flag for VMMCALL?  Isn't
X86_FEATURE_SVM sufficient to differentiate which opcode to use?

--
Brian Gerst

Re: [PATCH 07/39] x86/entry/32: Enter the kernel via trampoline stack

2018-07-18 Thread Brian Gerst

On Wed, Jul 18, 2018 at 5:41 AM Joerg Roedel  wrote:
>
> From: Joerg Roedel 
>
> Use the entry-stack as a trampoline to enter the kernel. The
> entry-stack is already in the cpu_entry_area and will be
> mapped to userspace when PTI is enabled.
>
> Signed-off-by: Joerg Roedel 
> ---
>  arch/x86/entry/entry_32.S| 119 
> ---
>  arch/x86/include/asm/switch_to.h |  14 -
>  arch/x86/kernel/asm-offsets.c|   1 +
>  arch/x86/kernel/cpu/common.c |   5 +-
>  arch/x86/kernel/process.c|   2 -
>  arch/x86/kernel/process_32.c |   2 -
>  6 files changed, 115 insertions(+), 28 deletions(-)
>
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index 7251c4f..fea49ec 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -154,7 +154,7 @@
>
>  #endif /* CONFIG_X86_32_LAZY_GS */
>
> -.macro SAVE_ALL pt_regs_ax=%eax
> +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
> cld
> PUSH_GS
> pushl   %fs
> @@ -173,6 +173,12 @@
> movl$(__KERNEL_PERCPU), %edx
> movl%edx, %fs
> SET_KERNEL_GS %edx
> +
> +   /* Switch to kernel stack if necessary */
> +.if \switch_stacks > 0
> +   SWITCH_TO_KERNEL_STACK
> +.endif
> +
>  .endm
>
>  /*
> @@ -269,6 +275,73 @@
>  .Lend_\@:
>  #endif /* CONFIG_X86_ESPFIX32 */
>  .endm
> +
> +
> +/*
> + * Called with pt_regs fully populated and kernel segments loaded,
> + * so we can access PER_CPU and use the integer registers.
> + *
> + * We need to be very careful here with the %esp switch, because an NMI
> + * can happen everywhere. If the NMI handler finds itself on the
> + * entry-stack, it will overwrite the task-stack and everything we
> + * copied there. So allocate the stack-frame on the task-stack and
> + * switch to it before we do any copying.
> + */
> +.macro SWITCH_TO_KERNEL_STACK
> +
> +   ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
> +
> +   /* Are we on the entry stack? Bail out if not! */
> +   movlPER_CPU_VAR(cpu_entry_area), %ecx
> +   addl$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
> +   subl%esp, %ecx  /* ecx = (end of entry_stack) - esp */
> +   cmpl$SIZEOF_entry_stack, %ecx
> +   jae .Lend_\@
> +
> +   /* Load stack pointer into %esi and %edi */
> +   movl%esp, %esi
> +   movl%esi, %edi
> +
> +   /* Move %edi to the top of the entry stack */
> +   andl$(MASK_entry_stack), %edi
> +   addl$(SIZEOF_entry_stack), %edi
> +
> +   /* Load top of task-stack into %edi */
> +   movlTSS_entry2task_stack(%edi), %edi
> +
> +   /* Bytes to copy */
> +   movl$PTREGS_SIZE, %ecx
> +
> +#ifdef CONFIG_VM86
> +   testl   $X86_EFLAGS_VM, PT_EFLAGS(%esi)
> +   jz  .Lcopy_pt_regs_\@
> +
> +   /*
> +* Stack-frame contains 4 additional segment registers when
> +* coming from VM86 mode
> +*/
> +   addl$(4 * 4), %ecx
> +
> +.Lcopy_pt_regs_\@:
> +#endif
> +
> +   /* Allocate frame on task-stack */
> +   subl%ecx, %edi
> +
> +   /* Switch to task-stack */
> +   movl%edi, %esp
> +
> +   /*
> +* We are now on the task-stack and can safely copy over the
> +* stack-frame
> +*/
> +   shrl$2, %ecx

This shift can be removed if you divide the constants by 4 above.
Ditto on the exit path in the next patch.

--
Brian Gerst

Re: [PATCH 07/39] x86/entry/32: Enter the kernel via trampoline stack

2018-07-18 Thread Brian Gerst

On Wed, Jul 18, 2018 at 5:41 AM Joerg Roedel  wrote:
>
> From: Joerg Roedel 
>
> Use the entry-stack as a trampoline to enter the kernel. The
> entry-stack is already in the cpu_entry_area and will be
> mapped to userspace when PTI is enabled.
>
> Signed-off-by: Joerg Roedel 
> ---
>  arch/x86/entry/entry_32.S| 119 
> ---
>  arch/x86/include/asm/switch_to.h |  14 -
>  arch/x86/kernel/asm-offsets.c|   1 +
>  arch/x86/kernel/cpu/common.c |   5 +-
>  arch/x86/kernel/process.c|   2 -
>  arch/x86/kernel/process_32.c |   2 -
>  6 files changed, 115 insertions(+), 28 deletions(-)
>
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index 7251c4f..fea49ec 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -154,7 +154,7 @@
>
>  #endif /* CONFIG_X86_32_LAZY_GS */
>
> -.macro SAVE_ALL pt_regs_ax=%eax
> +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
> cld
> PUSH_GS
> pushl   %fs
> @@ -173,6 +173,12 @@
> movl$(__KERNEL_PERCPU), %edx
> movl%edx, %fs
> SET_KERNEL_GS %edx
> +
> +   /* Switch to kernel stack if necessary */
> +.if \switch_stacks > 0
> +   SWITCH_TO_KERNEL_STACK
> +.endif
> +
>  .endm
>
>  /*
> @@ -269,6 +275,73 @@
>  .Lend_\@:
>  #endif /* CONFIG_X86_ESPFIX32 */
>  .endm
> +
> +
> +/*
> + * Called with pt_regs fully populated and kernel segments loaded,
> + * so we can access PER_CPU and use the integer registers.
> + *
> + * We need to be very careful here with the %esp switch, because an NMI
> + * can happen everywhere. If the NMI handler finds itself on the
> + * entry-stack, it will overwrite the task-stack and everything we
> + * copied there. So allocate the stack-frame on the task-stack and
> + * switch to it before we do any copying.
> + */
> +.macro SWITCH_TO_KERNEL_STACK
> +
> +   ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
> +
> +   /* Are we on the entry stack? Bail out if not! */
> +   movlPER_CPU_VAR(cpu_entry_area), %ecx
> +   addl$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
> +   subl%esp, %ecx  /* ecx = (end of entry_stack) - esp */
> +   cmpl$SIZEOF_entry_stack, %ecx
> +   jae .Lend_\@
> +
> +   /* Load stack pointer into %esi and %edi */
> +   movl%esp, %esi
> +   movl%esi, %edi
> +
> +   /* Move %edi to the top of the entry stack */
> +   andl$(MASK_entry_stack), %edi
> +   addl$(SIZEOF_entry_stack), %edi
> +
> +   /* Load top of task-stack into %edi */
> +   movlTSS_entry2task_stack(%edi), %edi
> +
> +   /* Bytes to copy */
> +   movl$PTREGS_SIZE, %ecx
> +
> +#ifdef CONFIG_VM86
> +   testl   $X86_EFLAGS_VM, PT_EFLAGS(%esi)
> +   jz  .Lcopy_pt_regs_\@
> +
> +   /*
> +* Stack-frame contains 4 additional segment registers when
> +* coming from VM86 mode
> +*/
> +   addl$(4 * 4), %ecx
> +
> +.Lcopy_pt_regs_\@:
> +#endif
> +
> +   /* Allocate frame on task-stack */
> +   subl%ecx, %edi
> +
> +   /* Switch to task-stack */
> +   movl%edi, %esp
> +
> +   /*
> +* We are now on the task-stack and can safely copy over the
> +* stack-frame
> +*/
> +   shrl$2, %ecx

This shift can be removed if you divide the constants by 4 above.
Ditto on the exit path in the next patch.

--
Brian Gerst

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1225 matches

Mail list logo