Hello tech,

I have attached a patch that converts NOP padding from the assembler
into INT3 padding on amd64. The idea is to remove potentially conveinent
NOP sleds from programs and libraries, which makes it harder for an
attacker to hit any ROP gadgets or other instructions after a NOP sled. 

NOP sleds are used for text alignment in order to get jump targets onto
16 byte boundaries. They can appear both in the middle of a function
and at the end. The trapsleds implemented in this diff convert NOP sleds
longer than 2 bytes from a series of 0x66666690 instructions to a 2 byte
short JMP over a series of INT3 instructions that fill the rest of the
gap. Programs that would have normally just slid through the NOP sled
will now jump over. An attacker trying to hit the NOP sled will now get
a core dump.

I have been running this on my system for over a week without any
apparent ill effects. Specifically, there don't appear to be any
performance penalties associated with doing this. A full base build
on a system completely converted over to this took slightly less time to
complete than the same build on a normal system, and my synthetic
testing shows trapsleds perform similarly to nopsleds (performance
difference was <1%, which is within error over multiple runs).

If people like this, I can do up the equivalent diff for clang.

Things that could could be improved:

1. For padding inserted at the end of a function, the JMP is
unnecessary, and could also be a 0xCCCC. I am going to have a go at gcc
to see if I can coerce it into distinguishing end-of-function padding
from padding that is intended to be executed. If some kind soul with gcc
experience knows where I should look, any pointers would be welcome - my
previous attempt was not fruitful.

2. This patch also hits NOP sleds > 8 bytes on i386. We could also hit
the NOP sleds between 3 and 7 bytes if there are no objections.

Comments and suggestions are welcome. Thanks to Theo for suggesting it
in the hallway track at BSDCan. 

Todd

Index: gas/config/tc-i386.c
===================================================================
RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/config/tc-i386.c,v
retrieving revision 1.7
diff -u -p -u -p -r1.7 tc-i386.c
--- gas/config/tc-i386.c        4 Jun 2017 20:26:18 -0000       1.7
+++ gas/config/tc-i386.c        20 Jun 2017 00:36:27 -0000
@@ -538,8 +538,8 @@ i386_align_code (fragP, count)
     {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,       /* leal 0L(%esi,1),%esi */
      0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};      /* leal 0L(%edi,1),%edi */
   static const char f32_15[] =
-    {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90,       /* jmp .+15; lotsa nops */
-     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
+    {0xeb,0x0d,0xCC,0xCC,0xCC,0xCC,0xCC,       /* jmp .+15; lotsa int3 */
+     0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC};
   static const char f16_3[] =
     {0x8d,0x74,0x00};                          /* lea 0(%esi),%esi     */
   static const char f16_4[] =
@@ -556,6 +556,8 @@ i386_align_code (fragP, count)
   static const char f16_8[] =
     {0x8d,0xb4,0x00,0x00,                      /* lea 0w(%si),%si      */
      0x8d,0xbd,0x00,0x00};                     /* lea 0w(%di),%di      */
+  static const char f64_2[] =
+    {0x66,0x90};        /* data16, nop*/
   static const char *const f32_patt[] = {
     f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
     f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
@@ -564,32 +566,21 @@ i386_align_code (fragP, count)
     f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8,
     f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15
   };
+  static const char *const f64_patt[] = {
+    f32_1, f64_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15,
+    f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15
+  };
 
   if (count <= 0 || count > 15)
     return;
 
-  /* The recommended way to pad 64bit code is to use NOPs preceded by
-     maximally four 0x66 prefixes.  Balance the size of nops.  */
   if (flag_code == CODE_64BIT)
     {
-      int i;
-      int nnops = (count + 3) / 4;
-      int len = count / nnops;
-      int remains = count - nnops * len;
-      int pos = 0;
-
-      for (i = 0; i < remains; i++)
-       {
-         memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len);
-         fragP->fr_literal[fragP->fr_fix + pos + len] = 0x90;
-         pos += len + 1;
-       }
-      for (; i < nnops; i++)
-       {
-         memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len - 1);
-         fragP->fr_literal[fragP->fr_fix + pos + len - 1] = 0x90;
-         pos += len;
-       }
+      memcpy(fragP->fr_literal + fragP->fr_fix,
+          f64_patt[count -1], count);
+        if (count > 2)
+          /* Adjust jump offset */
+          fragP->fr_literal[fragP->fr_fix + 1] = count - 2;
     }
   else
     if (flag_code == CODE_16BIT)

Reply via email to