Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-03-01 Thread Peter Zijlstra
On Mon, Feb 25, 2019 at 02:21:03PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 25, 2019 at 12:47:00AM -0800, h...@zytor.com wrote:
> > It doesn't have to understand the contents of the memop, but it seems
> > that the presence of a modrm with mode ≠ 3 should be plenty. It needs
> > to know that much in order to know the length of instructions anyway.
> > For extra credit, ignore LEA or hinting instructions.
> 
> A little something like so then?


$ ./objtool check --no-fp --backtrace 
../../defconfig-build/arch/x86/lib/usercopy_64.o
../../defconfig-build/arch/x86/lib/usercopy_64.o: warning: objtool: 
.altinstr_replacement+0x3: UACCESS disable without MEMOPs: __clear_user()
../../defconfig-build/arch/x86/lib/usercopy_64.o: warning: objtool: 
__clear_user()+0x3a: (alt)
../../defconfig-build/arch/x86/lib/usercopy_64.o: warning: objtool: 
__clear_user()+0x2e: (branch)
../../defconfig-build/arch/x86/lib/usercopy_64.o: warning: objtool: 
__clear_user()+0x18: (branch)
../../defconfig-build/arch/x86/lib/usercopy_64.o: warning: objtool: 
.altinstr_replacement+0x: (branch)
../../defconfig-build/arch/x86/lib/usercopy_64.o: warning: objtool: 
__clear_user()+0x5: (alt)
../../defconfig-build/arch/x86/lib/usercopy_64.o: warning: objtool: 
__clear_user()+0x0: <=== (func)


 <__clear_user>:
   0:   e8 00 00 00 00  callq  5 <__clear_user+0x5>
   1: R_X86_64_PLT32   __fentry__-0x4
   5:   90  nop
   6:   90  nop
   7:   90  nop
   8:   48 89 f0mov%rsi,%rax
   b:   48 c1 ee 03 shr$0x3,%rsi
   f:   83 e0 07and$0x7,%eax
  12:   48 89 f1mov%rsi,%rcx
  15:   48 85 c9test   %rcx,%rcx
  18:   74 0f   je 29 <__clear_user+0x29>
  1a:   48 c7 07 00 00 00 00movq   $0x0,(%rdi)
  21:   48 83 c7 08 add$0x8,%rdi
  25:   ff c9   dec%ecx
  27:   75 f1   jne1a <__clear_user+0x1a>
  29:   48 89 c1mov%rax,%rcx
  2c:   85 c9   test   %ecx,%ecx
  2e:   74 0a   je 3a <__clear_user+0x3a>
  30:   c6 07 00movb   $0x0,(%rdi)
  33:   48 ff c7inc%rdi
  36:   ff c9   dec%ecx
  38:   75 f6   jne30 <__clear_user+0x30>
  3a:   90  nop
  3b:   90  nop
  3c:   90  nop
  3d:   48 89 c8mov%rcx,%rax
  40:   c3  retq


Seems correct. Not sure you want to go fix that though. Let me know if
you want more output.


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-25 Thread Andy Lutomirski



> On Feb 25, 2019, at 3:53 AM, Peter Zijlstra  wrote:
> 
>> On Mon, Feb 25, 2019 at 11:51:44AM +0100, Peter Zijlstra wrote:
>>> On Fri, Feb 22, 2019 at 03:55:25PM -0800, Andy Lutomirski wrote:
>>> I'm wondering if we can just change the code that does getreg() and
>>> load_gs_index() so it doesn't do it with AC set.  Also, what about
>>> paravirt kernels?  They'll call into PV code for load_gs_index() with
>>> AC set.
>> 
>> Paravirt can go bugger off. There's no sane way to fix that.
> 
>> I don't fully understand that code at all; I also have no clue why GS
>> has paravirt bits on but the other segments do not. 
> 
> *sigh* SWAPGS
> 
>> *thought*... we could delay the actual set_user_seg() thing until after
>> the get_user_catch(), would that work?
> 
> 
> How horrible / broken is this?
> 
> ---
> 
> diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
> index 321fe5f5d0e9..67c866943102 100644
> --- a/arch/x86/ia32/ia32_signal.c
> +++ b/arch/x86/ia32/ia32_signal.c
> @@ -60,17 +60,21 @@
>regs->seg = GET_SEG(seg) | 3;\
> } while (0)
> 
> -#define RELOAD_SEG(seg){\
> -unsigned int pre = GET_SEG(seg);\
> -unsigned int cur = get_user_seg(seg);\
> -pre |= 3;\
> -if (pre != cur)\
> -set_user_seg(seg, pre);\
> +#define LOAD_SEG(seg){\
> +pre_##seg = 3 | GET_SEG(seg);\
> +cur_##seg = get_user_seg(seg);\
> +}
> +
> +#define RELOAD_SEG(seg){\
> +if (pre_##seg != cur_##seg)\
> +set_user_seg(seg, pre_##seg);\
> }
> 
> static int ia32_restore_sigcontext(struct pt_regs *regs,
>   struct sigcontext_32 __user *sc)
> {
> +u16 pre_gs, pre_fs, pre_ds, pre_es;
> +u16 cur_gs, cur_fs, cur_ds, cur_es;
>unsigned int tmpflags, err = 0;
>void __user *buf;
>u32 tmp;
> @@ -85,10 +89,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
> * the handler, but does not clobber them at least in the
> * normal case.
> */
> -RELOAD_SEG(gs);
> -RELOAD_SEG(fs);
> -RELOAD_SEG(ds);
> -RELOAD_SEG(es);
> +LOAD_SEG(gs);
> +LOAD_SEG(fs);
> +LOAD_SEG(ds);
> +LOAD_SEG(es);
> 
>COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
>COPY(dx); COPY(cx); COPY(ip); COPY(ax);
> @@ -106,6 +110,11 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
>buf = compat_ptr(tmp);
>} get_user_catch(err);
> 
> +RELOAD_SEG(gs);
> +RELOAD_SEG(fs);
> +RELOAD_SEG(ds);
> +RELOAD_SEG(es);
> +
>err |= fpu__restore_sig(buf, 1);
> 
>force_iret();

I would call this pretty horrible. How about we do it without macros? :)

But yes, deferring the segment load until after the read seems fine to me. 
Frankly, we could also just copy_from_user the whole thing up front — thus code 
is not really a serious fast path.

Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-25 Thread Peter Zijlstra
On Mon, Feb 25, 2019 at 12:47:00AM -0800, h...@zytor.com wrote:
> It doesn't have to understand the contents of the memop, but it seems
> that the presence of a modrm with mode ≠ 3 should be plenty. It needs
> to know that much in order to know the length of instructions anyway.
> For extra credit, ignore LEA or hinting instructions.

A little something like so then?

arch/x86/kernel/fpu/signal.o: warning: objtool: .altinstr_replacement+0x9c: 
UACCESS disable without MEMOPs: copy_fpstate_to_sigframe()

023c  00020002 R_X86_64_PC32  .text + 604
0240  00070002 R_X86_64_PC32  
.altinstr_replacement + 99
0249  00020002 R_X86_64_PC32  .text + 610
024d  00070002 R_X86_64_PC32  
.altinstr_replacement + 9c

.text

604:   90  nop
605:   90  nop
606:   90  nop
607:   83 ce 03or $0x3,%esi
60a:   89 b3 00 02 00 00   mov%esi,0x200(%rbx)
610:   90  nop
611:   90  nop
612:   90  nop

.altinstr_replacement

99:   0f 01 cbstac
9c:   0f 01 caclac

Which looks like the tool failed to recognise that MOV as a memop.



---
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -37,7 +37,8 @@
 #define INSN_CLAC  12
 #define INSN_STD   13
 #define INSN_CLD   14
-#define INSN_OTHER 15
+#define INSN_MEMOP 15
+#define INSN_OTHER 16
 #define INSN_LAST  INSN_OTHER
 
 enum op_dest_type {
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -123,6 +123,9 @@ int arch_decode_instruction(struct elf *
modrm_mod = X86_MODRM_MOD(modrm);
modrm_reg = X86_MODRM_REG(modrm);
modrm_rm = X86_MODRM_RM(modrm);
+
+   if (modrm_mod != 3)
+   *type = INSN_MEMOP;
}
 
if (insn.sib.nbytes)
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2047,6 +2047,7 @@ static int validate_branch(struct objtoo
WARN_FUNC("recursive UACCESS enable", sec, 
insn->offset);
 
state.uaccess = true;
+   state.memop = false;
break;
 
case INSN_CLAC:
@@ -2058,6 +2059,9 @@ static int validate_branch(struct objtoo
break;
}
 
+   if (!state.memop && insn->func)
+   WARN_FUNC("UACCESS disable without MEMOPs: 
%s()", sec, insn->offset, insn->func->name);
+
state.uaccess = false;
break;
 
@@ -2075,6 +2079,10 @@ static int validate_branch(struct objtoo
state.df = false;
break;
 
+   case INSN_MEMOP:
+   state.memop = true;
+   break;
+
default:
break;
}
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -31,7 +31,7 @@ struct insn_state {
int stack_size;
unsigned char type;
bool bp_scratch;
-   bool drap, end, uaccess, uaccess_safe, df;
+   bool drap, end, uaccess, uaccess_safe, df, memop;
int drap_reg, drap_offset;
struct cfi_reg vals[CFI_NUM_REGS];
 };


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-25 Thread Peter Zijlstra
On Mon, Feb 25, 2019 at 08:33:35AM +, Julien Thierry wrote:
> > It has an AC_SAFE(func) annotation which allows marking specific
> > functions as safe to call. The patch includes 2 instances which were
> > required to make arch/x86 'build':

> I haven't looked at all the details. But could the annotation be called
> UACCESS_SAFE() (and corresponding naming in the objtool checks)? Since
> this is not an x86 only issue and the AC flags only exists for x86.

Sure that works. Lemme sed the patches.


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-25 Thread Peter Zijlstra
On Mon, Feb 25, 2019 at 11:51:44AM +0100, Peter Zijlstra wrote:
> On Fri, Feb 22, 2019 at 03:55:25PM -0800, Andy Lutomirski wrote:
> > I'm wondering if we can just change the code that does getreg() and
> > load_gs_index() so it doesn't do it with AC set.  Also, what about
> > paravirt kernels?  They'll call into PV code for load_gs_index() with
> > AC set.
> 
> Paravirt can go bugger off. There's no sane way to fix that.

> I don't fully understand that code at all; I also have no clue why GS
> has paravirt bits on but the other segments do not. 

*sigh* SWAPGS

> *thought*... we could delay the actual set_user_seg() thing until after
> the get_user_catch(), would that work?


How horrible / broken is this?

---

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 321fe5f5d0e9..67c866943102 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -60,17 +60,21 @@
regs->seg = GET_SEG(seg) | 3;   \
 } while (0)
 
-#define RELOAD_SEG(seg){   \
-   unsigned int pre = GET_SEG(seg);\
-   unsigned int cur = get_user_seg(seg);   \
-   pre |= 3;   \
-   if (pre != cur) \
-   set_user_seg(seg, pre); \
+#define LOAD_SEG(seg)  {   \
+   pre_##seg = 3 | GET_SEG(seg);   \
+   cur_##seg = get_user_seg(seg);  \
+}
+
+#define RELOAD_SEG(seg){   \
+   if (pre_##seg != cur_##seg) \
+   set_user_seg(seg, pre_##seg);   \
 }
 
 static int ia32_restore_sigcontext(struct pt_regs *regs,
   struct sigcontext_32 __user *sc)
 {
+   u16 pre_gs, pre_fs, pre_ds, pre_es;
+   u16 cur_gs, cur_fs, cur_ds, cur_es;
unsigned int tmpflags, err = 0;
void __user *buf;
u32 tmp;
@@ -85,10 +89,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 * the handler, but does not clobber them at least in the
 * normal case.
 */
-   RELOAD_SEG(gs);
-   RELOAD_SEG(fs);
-   RELOAD_SEG(ds);
-   RELOAD_SEG(es);
+   LOAD_SEG(gs);
+   LOAD_SEG(fs);
+   LOAD_SEG(ds);
+   LOAD_SEG(es);
 
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
@@ -106,6 +110,11 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
buf = compat_ptr(tmp);
} get_user_catch(err);
 
+   RELOAD_SEG(gs);
+   RELOAD_SEG(fs);
+   RELOAD_SEG(ds);
+   RELOAD_SEG(es);
+
err |= fpu__restore_sig(buf, 1);
 
force_iret();


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-25 Thread Peter Zijlstra
On Fri, Feb 22, 2019 at 03:55:25PM -0800, Andy Lutomirski wrote:
> I'm wondering if we can just change the code that does getreg() and
> load_gs_index() so it doesn't do it with AC set.  Also, what about
> paravirt kernels?  They'll call into PV code for load_gs_index() with
> AC set.

Paravirt can go bugger off. There's no sane way to fix that.

Luckily the load_gs_index() thing is part of the paravirt me harder crap
and so nobody sane should ever hit that.

I don't fully understand that code at all; I also have no clue why GS
has paravirt bits on but the other segments do not. But it looks like we
want to do that RELOAD_SEG() crap under SMAP because of the GET_SEG() ->
get_user_ex() thing.

Anyway, I only see 3 options here:

 1) live with the paravirt me harder builds complaining
 2) exclude the AC validation from the paravirt me harder builds
 3) rewrite this code to no need that stupid call in the first place


2 seems like an exceptionally bad ideal, 3 would need someone that
understands this, so for now I'll pick 1 :-)


*thought*... we could delay the actual set_user_seg() thing until after
the get_user_catch(), would that work?




Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-25 Thread hpa
On February 23, 2019 12:39:42 AM PST, Peter Zijlstra  
wrote:
>On Fri, Feb 22, 2019 at 03:39:48PM -0800, h...@zytor.com wrote:
>> Objtool could also detect CLAC-STAC or STAC-CLAC sequences without
>> memory operations and remove them; don't know how often that happens,
>> but I know it *does* happen.
>
>Objtool doesn't know about memops; that'd be a lot of work. Also,
>objtool doesn't actually rewrite the text, at best it could warn about
>such occurences.

It doesn't even have to change the text per se, just nullify the altinstr.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-25 Thread hpa
On February 23, 2019 12:39:42 AM PST, Peter Zijlstra  
wrote:
>On Fri, Feb 22, 2019 at 03:39:48PM -0800, h...@zytor.com wrote:
>> Objtool could also detect CLAC-STAC or STAC-CLAC sequences without
>> memory operations and remove them; don't know how often that happens,
>> but I know it *does* happen.
>
>Objtool doesn't know about memops; that'd be a lot of work. Also,
>objtool doesn't actually rewrite the text, at best it could warn about
>such occurences.

It doesn't have to understand the contents of the memop, but it seems that the 
presence of a modrm with mode ≠ 3 should be plenty. It needs to know that much 
in order to know the length of instructions anyway. For extra credit, ignore 
LEA or hinting instructions.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-25 Thread Julien Thierry



On 22/02/2019 22:26, Peter Zijlstra wrote:
> On Fri, Feb 22, 2019 at 07:10:34PM +0100, Thomas Gleixner wrote:
> 
>> But correct :)
> 
>> I agree, that a function which is doing the actual copy should be callable,
>> but random other functions? NO!
> 
> So find the below patch -- which spotted fail in ptrace.c
> 
> It has an AC_SAFE(func) annotation which allows marking specific
> functions as safe to call. The patch includes 2 instances which were
> required to make arch/x86 'build':
> 
>   arch/x86/ia32/ia32_signal.o: warning: objtool: 
> ia32_restore_sigcontext()+0x3d: call to native_load_gs_index() with AC set
>   arch/x86/kernel/ptrace.o: warning: objtool: genregs_get()+0x8e: call to 
> getreg() with AC set
> 
> It also screams (provided one has CONFIG_FUNCTION_TRACE=y) about the
> lack of notrace annotations on functions marked AC_SAFE():
> 
>   arch/x86/kernel/ptrace.o: warning: objtool: getreg()+0x0: call to 
> __fentry__() with AC set
> 
> It builds arch/x86 relatively clean; it only complains about some
> redundant CLACs in entry_64.S because it doesn't understand interrupts
> and I've not bothered creating an annotation for them yet.
> 
>   arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0x4d: 
> redundant CLAC
>   arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0x5a: 
> redundant CLAC
>   ...
>   arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0xb1: 
> redundant CLAC
> 
> Also, I realized we don't need special annotations for preempt_count;
> preempt_disable() emits a CALL instruction which should readily trigger
> the warnings added here.
> 
> The VDSO thing is a bit of a hack, but I couldn't quickly find anything
> better.
> 
> Comments?

I haven't looked at all the details. But could the annotation be called
UACCESS_SAFE() (and corresponding naming in the objtool checks)? Since
this is not an x86 only issue and the AC flags only exists for x86.

Cheers,

Julien

> 
> ---
>  arch/x86/include/asm/special_insns.h |  2 ++
>  arch/x86/kernel/ptrace.c |  3 +-
>  include/linux/frame.h| 23 ++
>  tools/objtool/arch.h |  4 ++-
>  tools/objtool/arch/x86/decode.c  | 14 -
>  tools/objtool/check.c| 59 
> 
>  tools/objtool/check.h|  3 +-
>  tools/objtool/elf.h  |  1 +
>  8 files changed, 105 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/special_insns.h 
> b/arch/x86/include/asm/special_insns.h
> index 43c029cdc3fe..cd31e4433f4c 100644
> --- a/arch/x86/include/asm/special_insns.h
> +++ b/arch/x86/include/asm/special_insns.h
> @@ -5,6 +5,7 @@
>  
>  #ifdef __KERNEL__
>  
> +#include 
>  #include 
>  
>  /*
> @@ -135,6 +136,7 @@ static inline void native_wbinvd(void)
>  }
>  
>  extern asmlinkage void native_load_gs_index(unsigned);
> +AC_SAFE(native_load_gs_index);
>  
>  static inline unsigned long __read_cr4(void)
>  {
> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> index 4b8ee05dd6ad..e278b4055a8b 100644
> --- a/arch/x86/kernel/ptrace.c
> +++ b/arch/x86/kernel/ptrace.c
> @@ -420,7 +420,7 @@ static int putreg(struct task_struct *child,
>   return 0;
>  }
>  
> -static unsigned long getreg(struct task_struct *task, unsigned long offset)
> +static notrace unsigned long getreg(struct task_struct *task, unsigned long 
> offset)
>  {
>   switch (offset) {
>   case offsetof(struct user_regs_struct, cs):
> @@ -444,6 +444,7 @@ static unsigned long getreg(struct task_struct *task, 
> unsigned long offset)
>  
>   return *pt_regs_access(task_pt_regs(task), offset);
>  }
> +AC_SAFE(getreg);
>  
>  static int genregs_get(struct task_struct *target,
>  const struct user_regset *regset,
> diff --git a/include/linux/frame.h b/include/linux/frame.h
> index 02d3ca2d9598..5d354cf42a56 100644
> --- a/include/linux/frame.h
> +++ b/include/linux/frame.h
> @@ -21,4 +21,27 @@
>  
>  #endif /* CONFIG_STACK_VALIDATION */
>  
> +#if defined(CONFIG_STACK_VALIDATION) && !defined(BUILD_VDSO) && 
> !defined(BUILD_VDSO32)
> +/*
> + * This macro marks functions as AC-safe, that is, it is safe to call from an
> + * EFLAGS.AC enabled region (typically user_access_begin() /
> + * user_access_end()).
> + *
> + * These functions in turn will only call AC-safe functions themselves (which
> + * precludes tracing, including __fentry__ and scheduling, including
> + * preempt_enable).
> + *
> + * AC-safe functions will obviously also not change EFLAGS.AC themselves.
> + *
> + * Since STAC/CLAC are OPL-0 only, this is all irrelevant for VDSO builds
> + * (and the generated symbol reference will in fact cause link failures).
> + */
> +#define AC_SAFE(func) \
> + static void __used __section(.discard.ac_safe) \
> + *__func_ac_safe_##func = func
> +
> +#else
> +#define AC_SAFE(func)
> +#endif
> +
>  #endif /* _LINUX_FRAME_H */
> diff --git 

Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-23 Thread Peter Zijlstra
On Sat, Feb 23, 2019 at 09:37:06AM +0100, Peter Zijlstra wrote:
> On Fri, Feb 22, 2019 at 03:55:25PM -0800, Andy Lutomirski wrote:
> > On Fri, Feb 22, 2019 at 2:26 PM Peter Zijlstra  wrote:
> 
> > >   arch/x86/entry/entry_64.o: warning: objtool: 
> > > .altinstr_replacement+0xb1: redundant CLAC
> > 
> > Does objtool understand altinstr?
> 
> Yep, otherwise it would've never found any of the STAC/CLAC instructions
> to begin with, they're all alternatives. The emitted code is all NOPs.
> 
> > If it understands that this is an
> > altinstr associated with a not-actually-a-fuction (i.e. END not
> > ENDPROC) piece of code, it could suppress this warning.
> 
> Using readelf on entry_64.o the symbol type appears to be STT_NOTYPE for
> these 'functions', so yes, I can try and ignore this warning for those.
> 
> > > +#define AC_SAFE(func) \
> > > +   static void __used __section(.discard.ac_safe) \
> > > +   *__func_ac_safe_##func = func
> > 
> > Are we okay with annotating function *declarations* in a way that
> > their addresses get taken whenever the declaration is processed?  It
> > would be nice if we could avoid this.
> > 
> > I'm wondering if we can just change the code that does getreg() and
> > load_gs_index() so it doesn't do it with AC set.  Also, what about
> > paravirt kernels?  They'll call into PV code for load_gs_index() with
> > AC set.
> 
> Fixing that code would be fine; but we need that annotation regardless,
> read Linus' email a little further back, he wants things like
> copy_{to,from}_user_unsafe().
> 
> Those really would need to be marked AC-safe, there's no inlining that.
> 
> That said, yes these annotations _suck_. But it's what we had and works,
> I'm just copying it (from STACK_FRAME_NON_STANDARD).
> 
> That said; maybe we can do something like:
> 
> #define AC_SAFE(func) \
>   asm (".pushsection .discard.ac_safe_sym\n\t"\
>"999: .ascii \"" #func "\"\n\t"\
>".popsection\n\t"  \
>".pushsection .discard.ac_safe\n\t"\
>_ASM_PTR " 999b\n\t"   \
>".popsection")
> 
> I just don't have a clue on how to read that in objtool; weak elf foo.
> I'll see if I can make it work.

Fixed all that. Should probably also convert that NON_STANDARD stuff to
this new shiny thing.

Retained the ptrace muck because it's a nice test case, your patch is
obviously a better fix there.

Haven't bothered looking at alternatives yet, this is a
defconfig+tracing build.

Weekend now.

---
 arch/x86/include/asm/special_insns.h |   2 +
 arch/x86/kernel/ptrace.c |   3 +-
 include/linux/frame.h|  38 
 tools/objtool/Makefile   |   2 +-
 tools/objtool/arch.h |   6 +-
 tools/objtool/arch/x86/decode.c  |  22 ++-
 tools/objtool/check.c| 117 ++-
 tools/objtool/check.h|   2 +-
 tools/objtool/elf.h  |   1 +
 9 files changed, 187 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/special_insns.h 
b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..cd31e4433f4c 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -5,6 +5,7 @@
 
 #ifdef __KERNEL__
 
+#include 
 #include 
 
 /*
@@ -135,6 +136,7 @@ static inline void native_wbinvd(void)
 }
 
 extern asmlinkage void native_load_gs_index(unsigned);
+AC_SAFE(native_load_gs_index);
 
 static inline unsigned long __read_cr4(void)
 {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 4b8ee05dd6ad..e278b4055a8b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -420,7 +420,7 @@ static int putreg(struct task_struct *child,
return 0;
 }
 
-static unsigned long getreg(struct task_struct *task, unsigned long offset)
+static notrace unsigned long getreg(struct task_struct *task, unsigned long 
offset)
 {
switch (offset) {
case offsetof(struct user_regs_struct, cs):
@@ -444,6 +444,7 @@ static unsigned long getreg(struct task_struct *task, 
unsigned long offset)
 
return *pt_regs_access(task_pt_regs(task), offset);
 }
+AC_SAFE(getreg);
 
 static int genregs_get(struct task_struct *target,
   const struct user_regset *regset,
diff --git a/include/linux/frame.h b/include/linux/frame.h
index 02d3ca2d9598..870a894bc586 100644
--- a/include/linux/frame.h
+++ b/include/linux/frame.h
@@ -21,4 +21,42 @@
 
 #endif /* CONFIG_STACK_VALIDATION */
 
+#if defined(CONFIG_STACK_VALIDATION)
+/*
+ * This macro marks functions as AC-safe, that is, it is safe to call from an
+ * EFLAGS.AC enabled region (typically user_access_begin() /
+ * user_access_end()).
+ *
+ * These functions in turn will only call AC-safe functions themselves (which
+ * precludes tracing, including __fentry__ 

Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-23 Thread Peter Zijlstra
On Fri, Feb 22, 2019 at 03:34:00PM -0800, Linus Torvalds wrote:
> Can you make it do DF too while at it?

Sure.


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-23 Thread Peter Zijlstra
On Fri, Feb 22, 2019 at 03:39:48PM -0800, h...@zytor.com wrote:
> Objtool could also detect CLAC-STAC or STAC-CLAC sequences without
> memory operations and remove them; don't know how often that happens,
> but I know it *does* happen.

Objtool doesn't know about memops; that'd be a lot of work. Also,
objtool doesn't actually rewrite the text, at best it could warn about
such occurences.


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-23 Thread Peter Zijlstra
On Fri, Feb 22, 2019 at 03:55:25PM -0800, Andy Lutomirski wrote:
> On Fri, Feb 22, 2019 at 2:26 PM Peter Zijlstra  wrote:

> >   arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0xb1: 
> > redundant CLAC
> 
> Does objtool understand altinstr?

Yep, otherwise it would've never found any of the STAC/CLAC instructions
to begin with, they're all alternatives. The emitted code is all NOPs.

> If it understands that this is an
> altinstr associated with a not-actually-a-fuction (i.e. END not
> ENDPROC) piece of code, it could suppress this warning.

Using readelf on entry_64.o the symbol type appears to be STT_NOTYPE for
these 'functions', so yes, I can try and ignore this warning for those.

> > +#define AC_SAFE(func) \
> > +   static void __used __section(.discard.ac_safe) \
> > +   *__func_ac_safe_##func = func
> 
> Are we okay with annotating function *declarations* in a way that
> their addresses get taken whenever the declaration is processed?  It
> would be nice if we could avoid this.
> 
> I'm wondering if we can just change the code that does getreg() and
> load_gs_index() so it doesn't do it with AC set.  Also, what about
> paravirt kernels?  They'll call into PV code for load_gs_index() with
> AC set.

Fixing that code would be fine; but we need that annotation regardless,
read Linus' email a little further back, he wants things like
copy_{to,from}_user_unsafe().

Those really would need to be marked AC-safe, there's no inlining that.

That said, yes these annotations _suck_. But it's what we had and works,
I'm just copying it (from STACK_FRAME_NON_STANDARD).

That said; maybe we can do something like:

#define AC_SAFE(func)   \
asm (".pushsection .discard.ac_safe_sym\n\t"\
 "999: .ascii \"" #func "\"\n\t"\
 ".popsection\n\t"  \
 ".pushsection .discard.ac_safe\n\t"\
 _ASM_PTR " 999b\n\t"   \
 ".popsection")

I just don't have a clue on how to read that in objtool; weak elf foo.
I'll see if I can make it work.


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-22 Thread Andy Lutomirski
On Fri, Feb 22, 2019 at 2:26 PM Peter Zijlstra  wrote:
>
> On Fri, Feb 22, 2019 at 07:10:34PM +0100, Thomas Gleixner wrote:
>
> > But correct :)
>
> > I agree, that a function which is doing the actual copy should be callable,
> > but random other functions? NO!
>
> So find the below patch -- which spotted fail in ptrace.c
>
> It has an AC_SAFE(func) annotation which allows marking specific
> functions as safe to call. The patch includes 2 instances which were
> required to make arch/x86 'build':
>
>   arch/x86/ia32/ia32_signal.o: warning: objtool: 
> ia32_restore_sigcontext()+0x3d: call to native_load_gs_index() with AC set
>   arch/x86/kernel/ptrace.o: warning: objtool: genregs_get()+0x8e: call to 
> getreg() with AC set
>
> It also screams (provided one has CONFIG_FUNCTION_TRACE=y) about the
> lack of notrace annotations on functions marked AC_SAFE():
>
>   arch/x86/kernel/ptrace.o: warning: objtool: getreg()+0x0: call to 
> __fentry__() with AC set
>
> It builds arch/x86 relatively clean; it only complains about some
> redundant CLACs in entry_64.S because it doesn't understand interrupts
> and I've not bothered creating an annotation for them yet.
>
>   arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0x4d: 
> redundant CLAC
>   arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0x5a: 
> redundant CLAC
>   ...
>   arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0xb1: 
> redundant CLAC

Does objtool understand altinstr?  If it understands that this is an
altinstr associated with a not-actually-a-fuction (i.e. END not
ENDPROC) piece of code, it could suppress this warning.

>
> -static unsigned long getreg(struct task_struct *task, unsigned long offset)
> +static notrace unsigned long getreg(struct task_struct *task, unsigned long 
> offset)
>  {
> switch (offset) {
> case offsetof(struct user_regs_struct, cs):
> @@ -444,6 +444,7 @@ static unsigned long getreg(struct task_struct *task, 
> unsigned long offset)
>
> return *pt_regs_access(task_pt_regs(task), offset);
>  }
> +AC_SAFE(getreg);

This takes the address and could plausibly suppress some optimizations.

>
> +#if defined(CONFIG_STACK_VALIDATION) && !defined(BUILD_VDSO) && 
> !defined(BUILD_VDSO32)
> +/*
> + * This macro marks functions as AC-safe, that is, it is safe to call from an
> + * EFLAGS.AC enabled region (typically user_access_begin() /
> + * user_access_end()).
> + *
> + * These functions in turn will only call AC-safe functions themselves (which
> + * precludes tracing, including __fentry__ and scheduling, including
> + * preempt_enable).
> + *
> + * AC-safe functions will obviously also not change EFLAGS.AC themselves.
> + *
> + * Since STAC/CLAC are OPL-0 only, this is all irrelevant for VDSO builds
> + * (and the generated symbol reference will in fact cause link failures).
> + */
> +#define AC_SAFE(func) \
> +   static void __used __section(.discard.ac_safe) \
> +   *__func_ac_safe_##func = func

Are we okay with annotating function *declarations* in a way that
their addresses get taken whenever the declaration is processed?  It
would be nice if we could avoid this.

I'm wondering if we can just change the code that does getreg() and
load_gs_index() so it doesn't do it with AC set.  Also, what about
paravirt kernels?  They'll call into PV code for load_gs_index() with
AC set.

--Andy


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-22 Thread Linus Torvalds
On Fri, Feb 22, 2019 at 2:26 PM Peter Zijlstra  wrote:
>
> So find the below patch -- which spotted fail in ptrace.c
>
> It has an AC_SAFE(func) annotation which allows marking specific
> functions as safe to call. The patch includes 2 instances which were
> required to make arch/x86 'build':

Looks sane enough to me.

Can you make it do DF too while at it? I really think AC and DF are
the same in this context. If you call an arbitrary function with DF
set, things will very quickly go sideways (even if it might work in
practice as long as the function just doesn't do a memcpy or similar)

  Linus


Re: [RFC][PATCH] objtool: STAC/CLAC validation

2019-02-22 Thread hpa
On February 22, 2019 2:26:35 PM PST, Peter Zijlstra  
wrote:
>On Fri, Feb 22, 2019 at 07:10:34PM +0100, Thomas Gleixner wrote:
>
>> But correct :)
>
>> I agree, that a function which is doing the actual copy should be
>callable,
>> but random other functions? NO!
>
>So find the below patch -- which spotted fail in ptrace.c
>
>It has an AC_SAFE(func) annotation which allows marking specific
>functions as safe to call. The patch includes 2 instances which were
>required to make arch/x86 'build':
>
>arch/x86/ia32/ia32_signal.o: warning: objtool:
>ia32_restore_sigcontext()+0x3d: call to native_load_gs_index() with AC
>set
>arch/x86/kernel/ptrace.o: warning: objtool: genregs_get()+0x8e: call to
>getreg() with AC set
>
>It also screams (provided one has CONFIG_FUNCTION_TRACE=y) about the
>lack of notrace annotations on functions marked AC_SAFE():
>
>arch/x86/kernel/ptrace.o: warning: objtool: getreg()+0x0: call to
>__fentry__() with AC set
>
>It builds arch/x86 relatively clean; it only complains about some
>redundant CLACs in entry_64.S because it doesn't understand interrupts
>and I've not bothered creating an annotation for them yet.
>
>arch/x86/entry/entry_64.o: warning: objtool:
>.altinstr_replacement+0x4d: redundant CLAC
>arch/x86/entry/entry_64.o: warning: objtool:
>.altinstr_replacement+0x5a: redundant CLAC
>  ...
>arch/x86/entry/entry_64.o: warning: objtool:
>.altinstr_replacement+0xb1: redundant CLAC
>
>Also, I realized we don't need special annotations for preempt_count;
>preempt_disable() emits a CALL instruction which should readily trigger
>the warnings added here.
>
>The VDSO thing is a bit of a hack, but I couldn't quickly find anything
>better.
>
>Comments?
>
>---
> arch/x86/include/asm/special_insns.h |  2 ++
> arch/x86/kernel/ptrace.c |  3 +-
> include/linux/frame.h| 23 ++
> tools/objtool/arch.h |  4 ++-
> tools/objtool/arch/x86/decode.c  | 14 -
>tools/objtool/check.c| 59
>
> tools/objtool/check.h|  3 +-
> tools/objtool/elf.h  |  1 +
> 8 files changed, 105 insertions(+), 4 deletions(-)
>
>diff --git a/arch/x86/include/asm/special_insns.h
>b/arch/x86/include/asm/special_insns.h
>index 43c029cdc3fe..cd31e4433f4c 100644
>--- a/arch/x86/include/asm/special_insns.h
>+++ b/arch/x86/include/asm/special_insns.h
>@@ -5,6 +5,7 @@
> 
> #ifdef __KERNEL__
> 
>+#include 
> #include 
> 
> /*
>@@ -135,6 +136,7 @@ static inline void native_wbinvd(void)
> }
> 
> extern asmlinkage void native_load_gs_index(unsigned);
>+AC_SAFE(native_load_gs_index);
> 
> static inline unsigned long __read_cr4(void)
> {
>diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
>index 4b8ee05dd6ad..e278b4055a8b 100644
>--- a/arch/x86/kernel/ptrace.c
>+++ b/arch/x86/kernel/ptrace.c
>@@ -420,7 +420,7 @@ static int putreg(struct task_struct *child,
>   return 0;
> }
> 
>-static unsigned long getreg(struct task_struct *task, unsigned long
>offset)
>+static notrace unsigned long getreg(struct task_struct *task, unsigned
>long offset)
> {
>   switch (offset) {
>   case offsetof(struct user_regs_struct, cs):
>@@ -444,6 +444,7 @@ static unsigned long getreg(struct task_struct
>*task, unsigned long offset)
> 
>   return *pt_regs_access(task_pt_regs(task), offset);
> }
>+AC_SAFE(getreg);
> 
> static int genregs_get(struct task_struct *target,
>  const struct user_regset *regset,
>diff --git a/include/linux/frame.h b/include/linux/frame.h
>index 02d3ca2d9598..5d354cf42a56 100644
>--- a/include/linux/frame.h
>+++ b/include/linux/frame.h
>@@ -21,4 +21,27 @@
> 
> #endif /* CONFIG_STACK_VALIDATION */
> 
>+#if defined(CONFIG_STACK_VALIDATION) && !defined(BUILD_VDSO) &&
>!defined(BUILD_VDSO32)
>+/*
>+ * This macro marks functions as AC-safe, that is, it is safe to call
>from an
>+ * EFLAGS.AC enabled region (typically user_access_begin() /
>+ * user_access_end()).
>+ *
>+ * These functions in turn will only call AC-safe functions themselves
>(which
>+ * precludes tracing, including __fentry__ and scheduling, including
>+ * preempt_enable).
>+ *
>+ * AC-safe functions will obviously also not change EFLAGS.AC
>themselves.
>+ *
>+ * Since STAC/CLAC are OPL-0 only, this is all irrelevant for VDSO
>builds
>+ * (and the generated symbol reference will in fact cause link
>failures).
>+ */
>+#define AC_SAFE(func) \
>+  static void __used __section(.discard.ac_safe) \
>+  *__func_ac_safe_##func = func
>+
>+#else
>+#define AC_SAFE(func)
>+#endif
>+
> #endif /* _LINUX_FRAME_H */
>diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
>index b0d7dc3d71b5..48327099466d 100644
>--- a/tools/objtool/arch.h
>+++ b/tools/objtool/arch.h
>@@ -33,7 +33,9 @@
> #define INSN_STACK8
> #define INSN_BUG  9
> #define INSN_NOP  10
>-#define INSN_OTHER11
>+#define INSN_STAC 11
>+#define INSN_CLAC