On Mon, Jun 19, 2017 at 09:22:57PM -0400, Todd Mortimer wrote:
> Hello tech,
> 
> I have attached a patch that converts NOP padding from the assembler
> into INT3 padding on amd64. The idea is to remove potentially conveinent
> NOP sleds from programs and libraries, which makes it harder for an
> attacker to hit any ROP gadgets or other instructions after a NOP sled. 
> 
> NOP sleds are used for text alignment in order to get jump targets onto
> 16 byte boundaries. They can appear both in the middle of a function
> and at the end. The trapsleds implemented in this diff convert NOP sleds
> longer than 2 bytes from a series of 0x66666690 instructions to a 2 byte
> short JMP over a series of INT3 instructions that fill the rest of the
> gap. Programs that would have normally just slid through the NOP sled
> will now jump over. An attacker trying to hit the NOP sled will now get
> a core dump.
> 
> I have been running this on my system for over a week without any
> apparent ill effects. Specifically, there don't appear to be any
> performance penalties associated with doing this. A full base build
> on a system completely converted over to this took slightly less time to
> complete than the same build on a normal system, and my synthetic
> testing shows trapsleds perform similarly to nopsleds (performance
> difference was <1%, which is within error over multiple runs).
> 
> If people like this, I can do up the equivalent diff for clang.
> 
> Things that could could be improved:
> 
> 1. For padding inserted at the end of a function, the JMP is
> unnecessary, and could also be a 0xCCCC. I am going to have a go at gcc
> to see if I can coerce it into distinguishing end-of-function padding
> from padding that is intended to be executed. If some kind soul with gcc
> experience knows where I should look, any pointers would be welcome - my
> previous attempt was not fruitful.
> 
> 2. This patch also hits NOP sleds > 8 bytes on i386. We could also hit
> the NOP sleds between 3 and 7 bytes if there are no objections.
> 
> Comments and suggestions are welcome. Thanks to Theo for suggesting it
> in the hallway track at BSDCan. 
> 
> Todd
> 

Nice, well done! I had this on my to do list for a while now and I'm happy
to see someone beat me to it.

-ml

> Index: gas/config/tc-i386.c
> ===================================================================
> RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/config/tc-i386.c,v
> retrieving revision 1.7
> diff -u -p -u -p -r1.7 tc-i386.c
> --- gas/config/tc-i386.c      4 Jun 2017 20:26:18 -0000       1.7
> +++ gas/config/tc-i386.c      20 Jun 2017 00:36:27 -0000
> @@ -538,8 +538,8 @@ i386_align_code (fragP, count)
>      {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,     /* leal 0L(%esi,1),%esi */
>       0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};    /* leal 0L(%edi,1),%edi */
>    static const char f32_15[] =
> -    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,     /* jmp .+15; lotsa nops */
> -     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
> +    {0xeb,0x0d,0xCC,0xCC,0xCC,0xCC,0xCC,     /* jmp .+15; lotsa int3 */
> +     0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC};
>    static const char f16_3[] =
>      {0x8d,0x74,0x00};                                /* lea 0(%esi),%esi     
> */
>    static const char f16_4[] =
> @@ -556,6 +556,8 @@ i386_align_code (fragP, count)
>    static const char f16_8[] =
>      {0x8d,0xb4,0x00,0x00,                    /* lea 0w(%si),%si      */
>       0x8d,0xbd,0x00,0x00};                   /* lea 0w(%di),%di      */
> +  static const char f64_2[] =
> +    {0x66,0x90};        /* data16, nop*/
>    static const char *const f32_patt[] = {
>      f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
>      f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
> @@ -564,32 +566,21 @@ i386_align_code (fragP, count)
>      f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8,
>      f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15
>    };
> +  static const char *const f64_patt[] = {
> +    f32_1, f64_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15,
> +    f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15
> +  };
>  
>    if (count <= 0 || count > 15)
>      return;
>  
> -  /* The recommended way to pad 64bit code is to use NOPs preceded by
> -     maximally four 0x66 prefixes.  Balance the size of nops.  */
>    if (flag_code == CODE_64BIT)
>      {
> -      int i;
> -      int nnops = (count + 3) / 4;
> -      int len = count / nnops;
> -      int remains = count - nnops * len;
> -      int pos = 0;
> -
> -      for (i = 0; i < remains; i++)
> -     {
> -       memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len);
> -       fragP->fr_literal[fragP->fr_fix + pos + len] = 0x90;
> -       pos += len + 1;
> -     }
> -      for (; i < nnops; i++)
> -     {
> -       memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len - 1);
> -       fragP->fr_literal[fragP->fr_fix + pos + len - 1] = 0x90;
> -       pos += len;
> -     }
> +      memcpy(fragP->fr_literal + fragP->fr_fix,
> +          f64_patt[count -1], count);
> +        if (count > 2)
> +          /* Adjust jump offset */
> +          fragP->fr_literal[fragP->fr_fix + 1] = count - 2;
>      }
>    else
>      if (flag_code == CODE_16BIT)

Reply via email to