> 2. This patch also hits NOP sleds > 8 bytes on i386. We could also hit > the NOP sleds between 3 and 7 bytes if there are no objections.
The attached diff implements the same trapsled mechanism for i386 and amd64 for all padding sequences between 3 and 15 bytes. I have put this through a kernel and base build on i386 without apparent ill effect, and the amd64 parts are unchanged from the last diff. Todd
Index: gas/config/tc-i386.c =================================================================== RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/config/tc-i386.c,v retrieving revision 1.7 diff -u -p -u -p -r1.7 tc-i386.c --- gas/config/tc-i386.c 4 Jun 2017 20:26:18 -0000 1.7 +++ gas/config/tc-i386.c 21 Jun 2017 00:43:14 -0000 @@ -505,41 +505,9 @@ i386_align_code (fragP, count) {0x90}; /* nop */ static const char f32_2[] = {0x89,0xf6}; /* movl %esi,%esi */ - static const char f32_3[] = - {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */ - static const char f32_4[] = - {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ - static const char f32_5[] = - {0x90, /* nop */ - 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ - static const char f32_6[] = - {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ - static const char f32_7[] = - {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ - static const char f32_8[] = - {0x90, /* nop */ - 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ - static const char f32_9[] = - {0x89,0xf6, /* movl %esi,%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_10[] = - {0x8d,0x76,0x00, /* leal 0(%esi),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_11[] = - {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_12[] = - {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ - 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ - static const char f32_13[] = - {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_14[] = - {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ static const char f32_15[] = - {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + {0xeb,0x0d,0xCC,0xCC,0xCC,0xCC,0xCC, /* jmp .+15; lotsa int3 */ + 0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC}; static const char f16_3[] = {0x8d,0x74,0x00}; /* lea 0(%esi),%esi */ static const char f16_4[] = @@ -556,40 +524,31 @@ i386_align_code (fragP, count) static const char f16_8[] = {0x8d,0xb4,0x00,0x00, /* lea 0w(%si),%si */ 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */ + static const char f64_2[] = + {0x66,0x90}; /* data16, nop*/ static const char *const f32_patt[] = { - f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, - f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 + f32_1, f32_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, + f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 }; static const char *const f16_patt[] = { f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 }; + static const char *const f64_patt[] = { + f32_1, f64_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, + f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 + }; if (count <= 0 || count > 15) return; - /* The recommended way to pad 64bit code is to use NOPs preceded by - maximally four 0x66 prefixes. Balance the size of nops. */ if (flag_code == CODE_64BIT) { - int i; - int nnops = (count + 3) / 4; - int len = count / nnops; - int remains = count - nnops * len; - int pos = 0; - - for (i = 0; i < remains; i++) - { - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len); - fragP->fr_literal[fragP->fr_fix + pos + len] = 0x90; - pos += len + 1; - } - for (; i < nnops; i++) - { - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len - 1); - fragP->fr_literal[fragP->fr_fix + pos + len - 1] = 0x90; - pos += len; - } + memcpy(fragP->fr_literal + fragP->fr_fix, + f64_patt[count -1], count); + if (count > 2) + /* Adjust jump offset */ + fragP->fr_literal[fragP->fr_fix + 1] = count - 2; } else if (flag_code == CODE_16BIT) @@ -601,8 +560,13 @@ i386_align_code (fragP, count) fragP->fr_literal[fragP->fr_fix + 1] = count - 2; } else + { memcpy (fragP->fr_literal + fragP->fr_fix, f32_patt[count - 1], count); + if (count > 2) + /* Adjust jump offset */ + fragP->fr_literal[fragP->fr_fix + 1] = count - 2; + } fragP->fr_var = count; }