Re: Trapsleds
On Tue, Jun 20, 2017 at 10:34:00PM -0400, Todd Mortimer wrote: > > 2. This patch also hits NOP sleds > 8 bytes on i386. We could also hit > > the NOP sleds between 3 and 7 bytes if there are no objections. > > The attached diff implements the same trapsled mechanism for i386 and > amd64 for all padding sequences between 3 and 15 bytes. > > I have put this through a kernel and base build on i386 without apparent > ill effect, and the amd64 parts are unchanged from the last diff. > > Todd > > reads ok to me, thanks again for your work here. -ml > Index: gas/config/tc-i386.c > === > RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/config/tc-i386.c,v > retrieving revision 1.7 > diff -u -p -u -p -r1.7 tc-i386.c > --- gas/config/tc-i386.c 4 Jun 2017 20:26:18 - 1.7 > +++ gas/config/tc-i386.c 21 Jun 2017 00:43:14 - > @@ -505,41 +505,9 @@ i386_align_code (fragP, count) > {0x90}; /* nop */ >static const char f32_2[] = > {0x89,0xf6}; /* movl %esi,%esi */ > - static const char f32_3[] = > -{0x8d,0x76,0x00};/* leal 0(%esi),%esi > */ > - static const char f32_4[] = > -{0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ > - static const char f32_5[] = > -{0x90, /* nop */ > - 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ > - static const char f32_6[] = > -{0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ > - static const char f32_7[] = > -{0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};/* leal 0L(%esi,1),%esi */ > - static const char f32_8[] = > -{0x90, /* nop */ > - 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};/* leal 0L(%esi,1),%esi */ > - static const char f32_9[] = > -{0x89,0xf6, /* movl %esi,%esi > */ > - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};/* leal 0L(%edi,1),%edi */ > - static const char f32_10[] = > -{0x8d,0x76,0x00, /* leal 0(%esi),%esi*/ > - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};/* leal 0L(%edi,1),%edi */ > - static const char f32_11[] = > -{0x8d,0x74,0x26,0x00,/* leal 0(%esi,1),%esi */ > - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};/* leal 0L(%edi,1),%edi */ > - static const char f32_12[] = > -{0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ > - 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ > - static const char f32_13[] = > -{0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ > - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};/* leal 0L(%edi,1),%edi */ > - static const char f32_14[] = > -{0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ > - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};/* leal 0L(%edi,1),%edi */ >static const char f32_15[] = > -{0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ > - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; > +{0xeb,0x0d,0xCC,0xCC,0xCC,0xCC,0xCC, /* jmp .+15; lotsa int3 */ > + 0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC}; >static const char f16_3[] = > {0x8d,0x74,0x00};/* lea 0(%esi),%esi > */ >static const char f16_4[] = > @@ -556,40 +524,31 @@ i386_align_code (fragP, count) >static const char f16_8[] = > {0x8d,0xb4,0x00,0x00,/* lea 0w(%si),%si */ > 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */ > + static const char f64_2[] = > +{0x66,0x90};/* data16, nop*/ >static const char *const f32_patt[] = { > -f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, > -f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 > +f32_1, f32_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, > +f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 >}; >static const char *const f16_patt[] = { > f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8, > f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 >}; > + static const char *const f64_patt[] = { > +f32_1, f64_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, > +f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 > + }; > >if (count <= 0 || count > 15) > return; > > - /* The recommended way to pad 64bit code is to use NOPs preceded by > - maximally four 0x66 prefixes. Balance the size of nops. */ >if (flag_code == CODE_64BIT) > { > - int i; > - int nnops = (count + 3) / 4; > - int len = count / nnops; > - int remains = count - nnops * len; > - int pos = 0; > - > - for (i = 0; i < remains; i++) > - { > - memset (fragP->fr_literal + fragP->fr_fix
Re: Trapsleds
> 2. This patch also hits NOP sleds > 8 bytes on i386. We could also hit > the NOP sleds between 3 and 7 bytes if there are no objections. The attached diff implements the same trapsled mechanism for i386 and amd64 for all padding sequences between 3 and 15 bytes. I have put this through a kernel and base build on i386 without apparent ill effect, and the amd64 parts are unchanged from the last diff. Todd Index: gas/config/tc-i386.c === RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/config/tc-i386.c,v retrieving revision 1.7 diff -u -p -u -p -r1.7 tc-i386.c --- gas/config/tc-i386.c4 Jun 2017 20:26:18 - 1.7 +++ gas/config/tc-i386.c21 Jun 2017 00:43:14 - @@ -505,41 +505,9 @@ i386_align_code (fragP, count) {0x90};/* nop */ static const char f32_2[] = {0x89,0xf6}; /* movl %esi,%esi */ - static const char f32_3[] = -{0x8d,0x76,0x00}; /* leal 0(%esi),%esi*/ - static const char f32_4[] = -{0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ - static const char f32_5[] = -{0x90, /* nop */ - 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ - static const char f32_6[] = -{0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ - static const char f32_7[] = -{0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ - static const char f32_8[] = -{0x90, /* nop */ - 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ - static const char f32_9[] = -{0x89,0xf6,/* movl %esi,%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_10[] = -{0x8d,0x76,0x00, /* leal 0(%esi),%esi*/ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_11[] = -{0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_12[] = -{0x8d,0xb6,0x00,0x00,0x00,0x00,/* leal 0L(%esi),%esi */ - 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ - static const char f32_13[] = -{0x8d,0xb6,0x00,0x00,0x00,0x00,/* leal 0L(%esi),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_14[] = -{0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ static const char f32_15[] = -{0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; +{0xeb,0x0d,0xCC,0xCC,0xCC,0xCC,0xCC, /* jmp .+15; lotsa int3 */ + 0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC}; static const char f16_3[] = {0x8d,0x74,0x00}; /* lea 0(%esi),%esi */ static const char f16_4[] = @@ -556,40 +524,31 @@ i386_align_code (fragP, count) static const char f16_8[] = {0x8d,0xb4,0x00,0x00, /* lea 0w(%si),%si */ 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */ + static const char f64_2[] = +{0x66,0x90};/* data16, nop*/ static const char *const f32_patt[] = { -f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, -f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 +f32_1, f32_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, +f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 }; static const char *const f16_patt[] = { f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 }; + static const char *const f64_patt[] = { +f32_1, f64_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, +f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 + }; if (count <= 0 || count > 15) return; - /* The recommended way to pad 64bit code is to use NOPs preceded by - maximally four 0x66 prefixes. Balance the size of nops. */ if (flag_code == CODE_64BIT) { - int i; - int nnops = (count + 3) / 4; - int len = count / nnops; - int remains = count - nnops * len; - int pos = 0; - - for (i = 0; i < remains; i++) - { - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len); - fragP->fr_literal[fragP->fr_fix + pos + len] = 0x90; - pos += len + 1; - } - for (; i < nnops; i++) - { - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len - 1); - fragP->fr_literal[fragP->fr_fix + pos +
Re: Trapsleds
On Mon, Jun 19, 2017 at 09:22:57PM -0400, Todd Mortimer wrote: > Hello tech, > > I have attached a patch that converts NOP padding from the assembler > into INT3 padding on amd64. The idea is to remove potentially conveinent > NOP sleds from programs and libraries, which makes it harder for an > attacker to hit any ROP gadgets or other instructions after a NOP sled. > > NOP sleds are used for text alignment in order to get jump targets onto > 16 byte boundaries. They can appear both in the middle of a function > and at the end. The trapsleds implemented in this diff convert NOP sleds > longer than 2 bytes from a series of 0x6690 instructions to a 2 byte > short JMP over a series of INT3 instructions that fill the rest of the > gap. Programs that would have normally just slid through the NOP sled > will now jump over. An attacker trying to hit the NOP sled will now get > a core dump. > > I have been running this on my system for over a week without any > apparent ill effects. Specifically, there don't appear to be any > performance penalties associated with doing this. A full base build > on a system completely converted over to this took slightly less time to > complete than the same build on a normal system, and my synthetic > testing shows trapsleds perform similarly to nopsleds (performance > difference was <1%, which is within error over multiple runs). > > If people like this, I can do up the equivalent diff for clang. > > Things that could could be improved: > > 1. For padding inserted at the end of a function, the JMP is > unnecessary, and could also be a 0x. I am going to have a go at gcc > to see if I can coerce it into distinguishing end-of-function padding > from padding that is intended to be executed. If some kind soul with gcc > experience knows where I should look, any pointers would be welcome - my > previous attempt was not fruitful. > > 2. This patch also hits NOP sleds > 8 bytes on i386. We could also hit > the NOP sleds between 3 and 7 bytes if there are no objections. > > Comments and suggestions are welcome. Thanks to Theo for suggesting it > in the hallway track at BSDCan. > > Todd > > Index: gas/config/tc-i386.c > === > RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/config/tc-i386.c,v > retrieving revision 1.7 > diff -u -p -u -p -r1.7 tc-i386.c > --- gas/config/tc-i386.c 4 Jun 2017 20:26:18 - 1.7 > +++ gas/config/tc-i386.c 20 Jun 2017 00:36:27 - > @@ -538,8 +538,8 @@ i386_align_code (fragP, count) > {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ > 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};/* leal 0L(%edi,1),%edi */ >static const char f32_15[] = > -{0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ > - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; > +{0xeb,0x0d,0xCC,0xCC,0xCC,0xCC,0xCC, /* jmp .+15; lotsa int3 */ > + 0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC}; >static const char f16_3[] = > {0x8d,0x74,0x00};/* lea 0(%esi),%esi > */ >static const char f16_4[] = > @@ -556,6 +556,8 @@ i386_align_code (fragP, count) >static const char f16_8[] = > {0x8d,0xb4,0x00,0x00,/* lea 0w(%si),%si */ > 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */ > + static const char f64_2[] = > +{0x66,0x90};/* data16, nop*/ >static const char *const f32_patt[] = { > f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, > f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 > @@ -564,32 +566,21 @@ i386_align_code (fragP, count) > f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8, > f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 >}; > + static const char *const f64_patt[] = { > +f32_1, f64_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, > +f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 > + }; > >if (count <= 0 || count > 15) > return; > > - /* The recommended way to pad 64bit code is to use NOPs preceded by > - maximally four 0x66 prefixes. Balance the size of nops. */ >if (flag_code == CODE_64BIT) > { > - int i; > - int nnops = (count + 3) / 4; > - int len = count / nnops; > - int remains = count - nnops * len; > - int pos = 0; > - > - for (i = 0; i < remains; i++) > - { > - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len); > - fragP->fr_literal[fragP->fr_fix + pos + len] = 0x90; > - pos += len + 1; > - } > - for (; i < nnops;
Re: Trapsleds
On Mon, Jun 19, 2017 at 09:22:57PM -0400, Todd Mortimer wrote: > Hello tech, > > I have attached a patch that converts NOP padding from the assembler > into INT3 padding on amd64. The idea is to remove potentially conveinent > NOP sleds from programs and libraries, which makes it harder for an > attacker to hit any ROP gadgets or other instructions after a NOP sled. > > NOP sleds are used for text alignment in order to get jump targets onto > 16 byte boundaries. They can appear both in the middle of a function > and at the end. The trapsleds implemented in this diff convert NOP sleds > longer than 2 bytes from a series of 0x6690 instructions to a 2 byte > short JMP over a series of INT3 instructions that fill the rest of the > gap. Programs that would have normally just slid through the NOP sled > will now jump over. An attacker trying to hit the NOP sled will now get > a core dump. > > I have been running this on my system for over a week without any > apparent ill effects. Specifically, there don't appear to be any > performance penalties associated with doing this. A full base build > on a system completely converted over to this took slightly less time to > complete than the same build on a normal system, and my synthetic > testing shows trapsleds perform similarly to nopsleds (performance > difference was <1%, which is within error over multiple runs). > > If people like this, I can do up the equivalent diff for clang. > > Things that could could be improved: > > 1. For padding inserted at the end of a function, the JMP is > unnecessary, and could also be a 0x. I am going to have a go at gcc > to see if I can coerce it into distinguishing end-of-function padding > from padding that is intended to be executed. If some kind soul with gcc > experience knows where I should look, any pointers would be welcome - my > previous attempt was not fruitful. > > 2. This patch also hits NOP sleds > 8 bytes on i386. We could also hit > the NOP sleds between 3 and 7 bytes if there are no objections. > > Comments and suggestions are welcome. Thanks to Theo for suggesting it > in the hallway track at BSDCan. > > Todd > Nice, well done! I had this on my to do list for a while now and I'm happy to see someone beat me to it. -ml > Index: gas/config/tc-i386.c > === > RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/config/tc-i386.c,v > retrieving revision 1.7 > diff -u -p -u -p -r1.7 tc-i386.c > --- gas/config/tc-i386.c 4 Jun 2017 20:26:18 - 1.7 > +++ gas/config/tc-i386.c 20 Jun 2017 00:36:27 - > @@ -538,8 +538,8 @@ i386_align_code (fragP, count) > {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ > 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};/* leal 0L(%edi,1),%edi */ >static const char f32_15[] = > -{0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ > - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; > +{0xeb,0x0d,0xCC,0xCC,0xCC,0xCC,0xCC, /* jmp .+15; lotsa int3 */ > + 0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC}; >static const char f16_3[] = > {0x8d,0x74,0x00};/* lea 0(%esi),%esi > */ >static const char f16_4[] = > @@ -556,6 +556,8 @@ i386_align_code (fragP, count) >static const char f16_8[] = > {0x8d,0xb4,0x00,0x00,/* lea 0w(%si),%si */ > 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */ > + static const char f64_2[] = > +{0x66,0x90};/* data16, nop*/ >static const char *const f32_patt[] = { > f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, > f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 > @@ -564,32 +566,21 @@ i386_align_code (fragP, count) > f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8, > f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 >}; > + static const char *const f64_patt[] = { > +f32_1, f64_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, > +f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 > + }; > >if (count <= 0 || count > 15) > return; > > - /* The recommended way to pad 64bit code is to use NOPs preceded by > - maximally four 0x66 prefixes. Balance the size of nops. */ >if (flag_code == CODE_64BIT) > { > - int i; > - int nnops = (count + 3) / 4; > - int len = count / nnops; > - int remains = count - nnops * len; > - int pos = 0; > - > - for (i = 0; i < remains; i++) > - { > - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len); > - fragP->fr_lit
Trapsleds
Hello tech, I have attached a patch that converts NOP padding from the assembler into INT3 padding on amd64. The idea is to remove potentially conveinent NOP sleds from programs and libraries, which makes it harder for an attacker to hit any ROP gadgets or other instructions after a NOP sled. NOP sleds are used for text alignment in order to get jump targets onto 16 byte boundaries. They can appear both in the middle of a function and at the end. The trapsleds implemented in this diff convert NOP sleds longer than 2 bytes from a series of 0x6690 instructions to a 2 byte short JMP over a series of INT3 instructions that fill the rest of the gap. Programs that would have normally just slid through the NOP sled will now jump over. An attacker trying to hit the NOP sled will now get a core dump. I have been running this on my system for over a week without any apparent ill effects. Specifically, there don't appear to be any performance penalties associated with doing this. A full base build on a system completely converted over to this took slightly less time to complete than the same build on a normal system, and my synthetic testing shows trapsleds perform similarly to nopsleds (performance difference was <1%, which is within error over multiple runs). If people like this, I can do up the equivalent diff for clang. Things that could could be improved: 1. For padding inserted at the end of a function, the JMP is unnecessary, and could also be a 0x. I am going to have a go at gcc to see if I can coerce it into distinguishing end-of-function padding from padding that is intended to be executed. If some kind soul with gcc experience knows where I should look, any pointers would be welcome - my previous attempt was not fruitful. 2. This patch also hits NOP sleds > 8 bytes on i386. We could also hit the NOP sleds between 3 and 7 bytes if there are no objections. Comments and suggestions are welcome. Thanks to Theo for suggesting it in the hallway track at BSDCan. Todd Index: gas/config/tc-i386.c === RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/config/tc-i386.c,v retrieving revision 1.7 diff -u -p -u -p -r1.7 tc-i386.c --- gas/config/tc-i386.c4 Jun 2017 20:26:18 - 1.7 +++ gas/config/tc-i386.c20 Jun 2017 00:36:27 - @@ -538,8 +538,8 @@ i386_align_code (fragP, count) {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ static const char f32_15[] = -{0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; +{0xeb,0x0d,0xCC,0xCC,0xCC,0xCC,0xCC, /* jmp .+15; lotsa int3 */ + 0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC}; static const char f16_3[] = {0x8d,0x74,0x00}; /* lea 0(%esi),%esi */ static const char f16_4[] = @@ -556,6 +556,8 @@ i386_align_code (fragP, count) static const char f16_8[] = {0x8d,0xb4,0x00,0x00, /* lea 0w(%si),%si */ 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */ + static const char f64_2[] = +{0x66,0x90};/* data16, nop*/ static const char *const f32_patt[] = { f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 @@ -564,32 +566,21 @@ i386_align_code (fragP, count) f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 }; + static const char *const f64_patt[] = { +f32_1, f64_2, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, +f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 + }; if (count <= 0 || count > 15) return; - /* The recommended way to pad 64bit code is to use NOPs preceded by - maximally four 0x66 prefixes. Balance the size of nops. */ if (flag_code == CODE_64BIT) { - int i; - int nnops = (count + 3) / 4; - int len = count / nnops; - int remains = count - nnops * len; - int pos = 0; - - for (i = 0; i < remains; i++) - { - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len); - fragP->fr_literal[fragP->fr_fix + pos + len] = 0x90; - pos += len + 1; - } - for (; i < nnops; i++) - { - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len - 1); - fragP->fr_literal[fragP->fr_fix + pos + len - 1] = 0x90; - pos += len; - } + memcpy(fragP->fr_literal + fragP->fr_fix, + f64_patt[count -1], count); +if (count > 2) + /* Adjust jump offset */ + fragP->fr_literal[fragP->fr_fix + 1] = count - 2; } else if (flag_code == CODE_16BIT)