Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
>>> On 12.02.18 at 12:23, wrote: > --- a/xen/arch/x86/alternative.c > +++ b/xen/arch/x86/alternative.c > @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct > alt_instr *start, > uint8_t *orig = ALT_ORIG_PTR(a); > uint8_t *repl = ALT_REPL_PTR(a); > uint8_t buf[MAX_PATCH_LEN]; > +unsigned int total_len = a->orig_len + a->pad_len; > > -BUG_ON(a->repl_len > a->orig_len); > -BUG_ON(a->orig_len > sizeof(buf)); > +BUG_ON(a->repl_len > total_len); > +BUG_ON(total_len > sizeof(buf)); > BUG_ON(a->cpuid >= NCAPINTS * 32); > > if ( !boot_cpu_has(a->cpuid) ) > +{ > +unsigned int i; > + > +/* No replacement to make, but try to optimise any padding. */ Better move the comment ahead of the declaration? > @@ -26,44 +27,64 @@ extern void apply_alternatives(const struct alt_instr > *start, > const struct alt_instr *end); > extern void alternative_instructions(void); > > -#define OLDINSTR(oldinstr) ".L%=_orig_s:\n\t" oldinstr > "\n.L%=_orig_e:\n" > - > #define repl_s(num) ".L%=_repl_s"#num > #define repl_e(num) ".L%=_repl_e"#num > > #define alt_orig_len"(.L%=_orig_e - .L%=_orig_s)" > +#define alt_pad_len "(.L%=_orig_p - .L%=_orig_e)" > +#define alt_total_len "(.L%=_orig_p - .L%=_orig_s)" > #define alt_repl_len(num) "(" repl_e(num) " - " repl_s(num) ")" > +#define gas_max(a, b) \ > +"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")" > + > +#define OLDINSTR_1(oldinstr, n1) \ > +".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \ > +".skip (-(("alt_repl_len(n1)"-"alt_orig_len") > 0) * "\ > + "("alt_repl_len(n1)"-"alt_orig_len")), 0x90\n\t" \ > +".L%=_orig_p:\n\t" > + > +#define ALT_PADDING_LEN(n1, n2) \ > +gas_max((alt_repl_len(n1), alt_repl_len(n2))"-"alt_orig_len > + > +#define OLDINSTR_2(oldinstr, n1, n2) \ > +".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \ > +".skip (-(("ALT_PADDING_LEN(n1, n2)") > 0) * "\ > + "("ALT_PADDING_LEN(n1, n2)")), 0x90\n\t" \ > +".L%=_orig_p:\n\t" > > #define ALTINSTR_ENTRY(feature, num)\ > " .long .L%=_orig_s - .\n"/* label */ \ > " .long " repl_s(num)" - .\n" /* new instruction */ \ > " .word " __stringify(feature) "\n" /* feature bit */ \ > " .byte " alt_orig_len "\n" /* source len */ \ > -" .byte " alt_repl_len(num) "\n" /* replacement len */ > +" .byte " alt_repl_len(num) "\n" /* replacement len */ \ > +" .byte " alt_pad_len "\n"/* padding len */ > > -#define DISCARD_ENTRY(num)/* repl <= orig */\ > -" .byte 0xff + (" alt_repl_len(num) ") - (" alt_orig_len ")\n" > +#define DISCARD_ENTRY(num)/* repl <= total */ \ > +" .byte 0xff + (" alt_repl_len(num) ") - (" alt_total_len ")\n" I don't think this is of much use anymore, now that you add the padding automatically (same for the respective part of the check in the assembler macro). Use ".byte " alt_total_len "\n" /* total_len <= 255 */ here instead (eliminating their explicit uses below)? Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On Tue, Feb 13, 2018 at 10:09:15AM +, Andrew Cooper wrote: > On 13/02/2018 09:45, Roger Pau Monné wrote: > > On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote: > >> .macro ALTERNATIVE oldinstr, newinstr, feature > >> .L\@_orig_s: > >> \oldinstr > >> .L\@_orig_e: > >> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), > >> 0x90 > > clang chokes on this expression, because of the negation at the > > beginning and I'm also failing to see why are you adding such > > negation. AFAICT using: > > > > .skip (((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 > > > > Is correct: it adds the right padding if the alternative code is > > bigger than the original one, while not adding anything is the > > original code is greater than the alternative one. > > > > The negation just turns the 1 to -1, thus converting the result of the > > whole expression into a negative value. > > /sigh so Clang and GAS have different ideas of true. > > The reason for this negation is stated in the commit message. "x > 0" > in GAS yields 0 or -1, rather than the expected 1. That's unfortunate. What about something along the lines of: ---8<--- diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk index aeae01cd97..db442a45b7 100644 --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -23,6 +23,7 @@ $(call as-insn-check,CFLAGS,CC,"rdseed %eax",-DHAVE_GAS_RDSEED) $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \ -U__OBJECT_LABEL__ -DHAVE_GAS_QUOTED_SYM \ '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@') +$(call as-insn-check,CFLAGS,CC,".skip (-(1 > 0))$$(comma)0x90",-DAS_NEGATIVE_TRUE) CFLAGS += -mno-red-zone -fpic -fno-asynchronous-unwind-tables diff --git a/xen/include/asm-x86/alternative-asm.h b/xen/include/asm-x86/alternative-asm.h index f7e37cb891..6ce6479e5b 100644 --- a/xen/include/asm-x86/alternative-asm.h +++ b/xen/include/asm-x86/alternative-asm.h @@ -25,11 +25,18 @@ #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr: #define gas_max(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b) +#ifdef AS_NEGATIVE_TRUE +#define as_true - +#else +#define as_true +#endif + .macro ALTERNATIVE oldinstr, newinstr, feature .L\@_orig_s: \oldinstr .L\@_orig_e: - .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 + .skip (as_true((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), \ + 0x90 .L\@_orig_p: .pushsection .altinstructions, "a", @progbits @@ -56,8 +63,8 @@ .L\@_orig_s: \oldinstr .L\@_orig_e: -.skip (-((gas_max(repl_len(1), repl_len(2)) - orig_len) > 0) * \ - (gas_max(repl_len(1), repl_len(2)) - orig_len)), 0x90 +.skip (as_true((gas_max(repl_len(1), repl_len(2)) - orig_len) > 0) * \ + (gas_max(repl_len(1), repl_len(2)) - orig_len)), 0x90 .L\@_orig_p: .pushsection .altinstructions, "a", @progbits diff --git a/xen/include/asm-x86/alternative.h b/xen/include/asm-x86/alternative.h index 20dea2245a..ea76fa9f8d 100644 --- a/xen/include/asm-x86/alternative.h +++ b/xen/include/asm-x86/alternative.h @@ -37,19 +37,25 @@ extern void alternative_instructions(void); #define gas_max(a, b) \ "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")" -#define OLDINSTR_1(oldinstr, n1) \ -".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \ -".skip (-(("alt_repl_len(n1)"-"alt_orig_len") > 0) * "\ - "("alt_repl_len(n1)"-"alt_orig_len")), 0x90\n\t" \ +#ifdef AS_NEGATIVE_TRUE +#define as_true - +#else +#define as_true +#endif + +#define OLDINSTR_1(oldinstr, n1)\ +".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \ +".skip ("as_true"(("alt_repl_len(n1)"-"alt_orig_len") > 0) * " \ + "("alt_repl_len(n1)"-"alt_orig_len")), 0x90\n\t" \ ".L%=_orig_p:\n\t" #define ALT_PADDING_LEN(n1, n2) \ gas_max((alt_repl_len(n1), alt_repl_len(n2))"-"alt_orig_len -#define OLDINSTR_2(oldinstr, n1, n2) \ -".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \ -".skip (-(("ALT_PADDING_LEN(n1, n2)") > 0) * "\ - "("ALT_PADDING_LEN(n1, n2)")), 0x90\n\t" \ +#define OLDINSTR_2(oldinstr, n1, n2)\ +".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \ +".skip ("as_true"(("ALT_PADDING_LEN(n1, n2)") > 0) * " \ + "("ALT_PADDING_LEN(n1, n2)")), 0x90\n\t" \ ".L%=_orig_p:\n\t" #define ALTINSTR_ENTRY(feature, num)\ ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On 13/02/2018 09:45, Roger Pau Monné wrote: > On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote: >> .macro ALTERNATIVE oldinstr, newinstr, feature >> .L\@_orig_s: >> \oldinstr >> .L\@_orig_e: >> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), >> 0x90 > clang chokes on this expression, because of the negation at the > beginning and I'm also failing to see why are you adding such > negation. AFAICT using: > > .skip (((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 > > Is correct: it adds the right padding if the alternative code is > bigger than the original one, while not adding anything is the > original code is greater than the alternative one. > > The negation just turns the 1 to -1, thus converting the result of the > whole expression into a negative value. /sigh so Clang and GAS have different ideas of true. The reason for this negation is stated in the commit message. "x > 0" in GAS yields 0 or -1, rather than the expected 1. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote: > .macro ALTERNATIVE oldinstr, newinstr, feature > .L\@_orig_s: > \oldinstr > .L\@_orig_e: > + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 clang chokes on this expression, because of the negation at the beginning and I'm also failing to see why are you adding such negation. AFAICT using: .skip (((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 Is correct: it adds the right padding if the alternative code is bigger than the original one, while not adding anything is the original code is greater than the alternative one. The negation just turns the 1 to -1, thus converting the result of the whole expression into a negative value. Roger. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On 12/02/18 18:41, Roger Pau Monné wrote: > On Mon, Feb 12, 2018 at 03:04:21PM +, Andrew Cooper wrote: >> On 12/02/18 14:39, Wei Liu wrote: >>> On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote: .macro ALTERNATIVE oldinstr, newinstr, feature .L\@_orig_s: \oldinstr .L\@_orig_e: + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 >>> Seeing the negation at the beginning, I suppose this should also be a >>> gas specific macro? >> The build failures are because clang's integrated assembler can't cope >> with non-absolute references with .skip, but we already know about this >> and have code identical to this in tree. (I temporarily removed it in >> patch 4). > Newer clang (6) supports .skip with labels, but doesn't support the > (-(... And it's having some issues with the rest of the expression, > will have to check more closely tomorrow. > > I wonder, what's Linux doing in this regard? It seems like clang/llvm > is quite committed to support building Linux, so it might be good to > follow suit in this case. This is basically the same as what Linux does. Linux unconditionally uses -no-integrated-as. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On Mon, Feb 12, 2018 at 03:04:21PM +, Andrew Cooper wrote: > On 12/02/18 14:39, Wei Liu wrote: > > On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote: > >> .macro ALTERNATIVE oldinstr, newinstr, feature > >> .L\@_orig_s: > >> \oldinstr > >> .L\@_orig_e: > >> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), > >> 0x90 > > Seeing the negation at the beginning, I suppose this should also be a > > gas specific macro? > > The build failures are because clang's integrated assembler can't cope > with non-absolute references with .skip, but we already know about this > and have code identical to this in tree. (I temporarily removed it in > patch 4). Newer clang (6) supports .skip with labels, but doesn't support the (-(... And it's having some issues with the rest of the expression, will have to check more closely tomorrow. I wonder, what's Linux doing in this regard? It seems like clang/llvm is quite committed to support building Linux, so it might be good to follow suit in this case. Roger. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote: > The correct amount of padding in an origin patch site can be calculated > automatically, based on the relative lengths of the replacements. > > This requires a bit of trickery to calculate correctly, especially in the > ALTENRATIVE_2 case where a branchless max() calculation in needed. The > calculation is further complicated because GAS's idea of true is -1 rather > than 1, which is why the extra negations are required. > > Additionally, have apply_alternatives() attempt to optimise the padding nops. > > Signed-off-by: Andrew Cooper LGTM, just a couple of nits: Reviewed-by: Roger Pau Monné > --- > CC: Jan Beulich > CC: Konrad Rzeszutek Wilk > CC: Roger Pau Monné > CC: Wei Liu > --- > xen/arch/x86/alternative.c| 32 > xen/include/asm-x86/alternative-asm.h | 40 > +++ > xen/include/asm-x86/alternative.h | 39 ++ > 3 files changed, 89 insertions(+), 22 deletions(-) > > diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c > index f8ddab5..ec87ff4 100644 > --- a/xen/arch/x86/alternative.c > +++ b/xen/arch/x86/alternative.c > @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct > alt_instr *start, > uint8_t *orig = ALT_ORIG_PTR(a); > uint8_t *repl = ALT_REPL_PTR(a); > uint8_t buf[MAX_PATCH_LEN]; > +unsigned int total_len = a->orig_len + a->pad_len; > > -BUG_ON(a->repl_len > a->orig_len); > -BUG_ON(a->orig_len > sizeof(buf)); > +BUG_ON(a->repl_len > total_len); > +BUG_ON(total_len > sizeof(buf)); > BUG_ON(a->cpuid >= NCAPINTS * 32); > > if ( !boot_cpu_has(a->cpuid) ) > +{ > +unsigned int i; > + > +/* No replacement to make, but try to optimise any padding. */ > +if ( a->pad_len <= 1 ) > +continue; > + > +/* Search the padding area for any byte which isn't a nop. */ > +for ( i = a->orig_len; i < total_len; ++i ) > +if ( orig[i] != 0x90 ) Maybe better to compare against ASM_NOP1? > +break; > + > +/* > + * Only make any changes if all padding bytes are unoptimised > + * nops. With multiple alternatives over the same origin site, > we > + * may have already made a replacement, or optimised the nops. > + */ > +if ( i != total_len ) > +continue; > + > +add_nops(buf, a->pad_len); > +text_poke(orig + a->orig_len, buf, a->pad_len); > continue; > +} > > memcpy(buf, repl, a->repl_len); > > @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct > alt_instr *start, > if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 ) > *(s32 *)(buf + 1) += repl - orig; > > -add_nops(buf + a->repl_len, a->orig_len - a->repl_len); > -text_poke(orig, buf, a->orig_len); > +add_nops(buf + a->repl_len, total_len - a->repl_len); > +text_poke(orig, buf, total_len); > } > } > > diff --git a/xen/include/asm-x86/alternative-asm.h > b/xen/include/asm-x86/alternative-asm.h > index 150bd1a..f7e37cb 100644 > --- a/xen/include/asm-x86/alternative-asm.h > +++ b/xen/include/asm-x86/alternative-asm.h > @@ -9,30 +9,41 @@ > * enough information for the alternatives patching code to patch an > * instruction. See apply_alternatives(). > */ > -.macro altinstruction_entry orig repl feature orig_len repl_len > +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len > .long \orig - . > .long \repl - . > .word \feature > .byte \orig_len > .byte \repl_len > +.byte \pad_len > .endm > > #define orig_len (.L\@_orig_e - .L\@_orig_s) > +#define pad_len(.L\@_orig_p - .L\@_orig_e) > +#define total_len (.L\@_orig_p - .L\@_orig_s) > #define repl_len(nr) (.L\@_repl_e\()nr - .L\@_repl_s\()nr) > #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr: > +#define gas_max(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b) That seems to work fine at least on newish versions of clang, so I'm not sure the g prefix is required (as_max). > > .macro ALTERNATIVE oldinstr, newinstr, feature > .L\@_orig_s: > \oldinstr > .L\@_orig_e: > + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 > +.L\@_orig_p: > > .pushsection .altinstructions, "a", @progbits > altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \ > -orig_len, repl_len(1) > +orig_len, repl_len(1), pad_len > > .section .discard, "a", @progbits > -/* Assembler-time check that \newinstr isn't longer than \oldinstr. */ > -.byte
Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On 12/02/18 14:39, Wei Liu wrote: > On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote: >> The correct amount of padding in an origin patch site can be calculated >> automatically, based on the relative lengths of the replacements. >> >> This requires a bit of trickery to calculate correctly, especially in the >> ALTENRATIVE_2 case where a branchless max() calculation in needed. The >> calculation is further complicated because GAS's idea of true is -1 rather >> than 1, which is why the extra negations are required. >> >> Additionally, have apply_alternatives() attempt to optimise the padding nops. >> >> Signed-off-by: Andrew Cooper >> --- >> CC: Jan Beulich >> CC: Konrad Rzeszutek Wilk >> CC: Roger Pau Monné >> CC: Wei Liu >> --- >> xen/arch/x86/alternative.c| 32 >> xen/include/asm-x86/alternative-asm.h | 40 >> +++ >> xen/include/asm-x86/alternative.h | 39 >> ++ >> 3 files changed, 89 insertions(+), 22 deletions(-) >> >> diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c >> index f8ddab5..ec87ff4 100644 >> --- a/xen/arch/x86/alternative.c >> +++ b/xen/arch/x86/alternative.c >> @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct >> alt_instr *start, >> uint8_t *orig = ALT_ORIG_PTR(a); >> uint8_t *repl = ALT_REPL_PTR(a); >> uint8_t buf[MAX_PATCH_LEN]; >> +unsigned int total_len = a->orig_len + a->pad_len; >> >> -BUG_ON(a->repl_len > a->orig_len); >> -BUG_ON(a->orig_len > sizeof(buf)); >> +BUG_ON(a->repl_len > total_len); >> +BUG_ON(total_len > sizeof(buf)); >> BUG_ON(a->cpuid >= NCAPINTS * 32); >> >> if ( !boot_cpu_has(a->cpuid) ) >> +{ >> +unsigned int i; >> + >> +/* No replacement to make, but try to optimise any padding. */ >> +if ( a->pad_len <= 1 ) >> +continue; >> + >> +/* Search the padding area for any byte which isn't a nop. */ >> +for ( i = a->orig_len; i < total_len; ++i ) >> +if ( orig[i] != 0x90 ) >> +break; >> + >> +/* >> + * Only make any changes if all padding bytes are unoptimised >> + * nops. With multiple alternatives over the same origin site, >> we >> + * may have already made a replacement, or optimised the nops. >> + */ >> +if ( i != total_len ) >> +continue; >> + >> +add_nops(buf, a->pad_len); >> +text_poke(orig + a->orig_len, buf, a->pad_len); >> continue; >> +} > Is the expectation here the alternative instructions already contain > optimised paddings (including live patches)? Otherwise why is the same > optimisation no needed when later? The problem is that we don't store the actual original bytes, so can't trivially detect whether we've already patched this site before. We've a number of cases which are an ALTERNATIVE_2 based on SMEP and SMAP, so on a fair chunk of hardware, we first make a replacement because of SMEP, then fail the SMAP check and don't make the second replacement. Later, we are discarding everything in orig+pad, and replacing it with repl+any necessary padding, which is made of optimised nops. > >> >> memcpy(buf, repl, a->repl_len); >> >> @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct >> alt_instr *start, >> if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 ) >> *(s32 *)(buf + 1) += repl - orig; >> >> -add_nops(buf + a->repl_len, a->orig_len - a->repl_len); >> -text_poke(orig, buf, a->orig_len); >> +add_nops(buf + a->repl_len, total_len - a->repl_len); >> +text_poke(orig, buf, total_len); >> } >> } >> >> diff --git a/xen/include/asm-x86/alternative-asm.h >> b/xen/include/asm-x86/alternative-asm.h >> index 150bd1a..f7e37cb 100644 >> --- a/xen/include/asm-x86/alternative-asm.h >> +++ b/xen/include/asm-x86/alternative-asm.h >> @@ -9,30 +9,41 @@ >> * enough information for the alternatives patching code to patch an >> * instruction. See apply_alternatives(). >> */ >> -.macro altinstruction_entry orig repl feature orig_len repl_len >> +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len >> .long \orig - . >> .long \repl - . >> .word \feature >> .byte \orig_len >> .byte \repl_len >> +.byte \pad_len >> .endm >> >> #define orig_len (.L\@_orig_e - .L\@_orig_s) >> +#define pad_len(.L\@_orig_p - .L\@_orig_e) >> +#define total_len (.L\@_orig_p - .L\@_orig_s) >> #define repl_len(nr) (.L\@_repl_e\()nr - .L\@_repl_s\()nr) >> #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr: >> +#define gas_m
Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote: > The correct amount of padding in an origin patch site can be calculated > automatically, based on the relative lengths of the replacements. > > This requires a bit of trickery to calculate correctly, especially in the > ALTENRATIVE_2 case where a branchless max() calculation in needed. The > calculation is further complicated because GAS's idea of true is -1 rather > than 1, which is why the extra negations are required. > > Additionally, have apply_alternatives() attempt to optimise the padding nops. > > Signed-off-by: Andrew Cooper > --- > CC: Jan Beulich > CC: Konrad Rzeszutek Wilk > CC: Roger Pau Monné > CC: Wei Liu > --- > xen/arch/x86/alternative.c| 32 > xen/include/asm-x86/alternative-asm.h | 40 > +++ > xen/include/asm-x86/alternative.h | 39 ++ > 3 files changed, 89 insertions(+), 22 deletions(-) > > diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c > index f8ddab5..ec87ff4 100644 > --- a/xen/arch/x86/alternative.c > +++ b/xen/arch/x86/alternative.c > @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct > alt_instr *start, > uint8_t *orig = ALT_ORIG_PTR(a); > uint8_t *repl = ALT_REPL_PTR(a); > uint8_t buf[MAX_PATCH_LEN]; > +unsigned int total_len = a->orig_len + a->pad_len; > > -BUG_ON(a->repl_len > a->orig_len); > -BUG_ON(a->orig_len > sizeof(buf)); > +BUG_ON(a->repl_len > total_len); > +BUG_ON(total_len > sizeof(buf)); > BUG_ON(a->cpuid >= NCAPINTS * 32); > > if ( !boot_cpu_has(a->cpuid) ) > +{ > +unsigned int i; > + > +/* No replacement to make, but try to optimise any padding. */ > +if ( a->pad_len <= 1 ) > +continue; > + > +/* Search the padding area for any byte which isn't a nop. */ > +for ( i = a->orig_len; i < total_len; ++i ) > +if ( orig[i] != 0x90 ) > +break; > + > +/* > + * Only make any changes if all padding bytes are unoptimised > + * nops. With multiple alternatives over the same origin site, > we > + * may have already made a replacement, or optimised the nops. > + */ > +if ( i != total_len ) > +continue; > + > +add_nops(buf, a->pad_len); > +text_poke(orig + a->orig_len, buf, a->pad_len); > continue; > +} Is the expectation here the alternative instructions already contain optimised paddings (including live patches)? Otherwise why is the same optimisation no needed when later? > > memcpy(buf, repl, a->repl_len); > > @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct > alt_instr *start, > if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 ) > *(s32 *)(buf + 1) += repl - orig; > > -add_nops(buf + a->repl_len, a->orig_len - a->repl_len); > -text_poke(orig, buf, a->orig_len); > +add_nops(buf + a->repl_len, total_len - a->repl_len); > +text_poke(orig, buf, total_len); > } > } > > diff --git a/xen/include/asm-x86/alternative-asm.h > b/xen/include/asm-x86/alternative-asm.h > index 150bd1a..f7e37cb 100644 > --- a/xen/include/asm-x86/alternative-asm.h > +++ b/xen/include/asm-x86/alternative-asm.h > @@ -9,30 +9,41 @@ > * enough information for the alternatives patching code to patch an > * instruction. See apply_alternatives(). > */ > -.macro altinstruction_entry orig repl feature orig_len repl_len > +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len > .long \orig - . > .long \repl - . > .word \feature > .byte \orig_len > .byte \repl_len > +.byte \pad_len > .endm > > #define orig_len (.L\@_orig_e - .L\@_orig_s) > +#define pad_len(.L\@_orig_p - .L\@_orig_e) > +#define total_len (.L\@_orig_p - .L\@_orig_s) > #define repl_len(nr) (.L\@_repl_e\()nr - .L\@_repl_s\()nr) > #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr: > +#define gas_max(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b) What about clang's assembler? At least give it a stub to cause compilation error? > > .macro ALTERNATIVE oldinstr, newinstr, feature > .L\@_orig_s: > \oldinstr > .L\@_orig_e: > + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 Seeing the negation at the beginning, I suppose this should also be a gas specific macro? The rest looks good. Wei. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
The correct amount of padding in an origin patch site can be calculated automatically, based on the relative lengths of the replacements. This requires a bit of trickery to calculate correctly, especially in the ALTENRATIVE_2 case where a branchless max() calculation in needed. The calculation is further complicated because GAS's idea of true is -1 rather than 1, which is why the extra negations are required. Additionally, have apply_alternatives() attempt to optimise the padding nops. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Konrad Rzeszutek Wilk CC: Roger Pau Monné CC: Wei Liu --- xen/arch/x86/alternative.c| 32 xen/include/asm-x86/alternative-asm.h | 40 +++ xen/include/asm-x86/alternative.h | 39 ++ 3 files changed, 89 insertions(+), 22 deletions(-) diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c index f8ddab5..ec87ff4 100644 --- a/xen/arch/x86/alternative.c +++ b/xen/arch/x86/alternative.c @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct alt_instr *start, uint8_t *orig = ALT_ORIG_PTR(a); uint8_t *repl = ALT_REPL_PTR(a); uint8_t buf[MAX_PATCH_LEN]; +unsigned int total_len = a->orig_len + a->pad_len; -BUG_ON(a->repl_len > a->orig_len); -BUG_ON(a->orig_len > sizeof(buf)); +BUG_ON(a->repl_len > total_len); +BUG_ON(total_len > sizeof(buf)); BUG_ON(a->cpuid >= NCAPINTS * 32); if ( !boot_cpu_has(a->cpuid) ) +{ +unsigned int i; + +/* No replacement to make, but try to optimise any padding. */ +if ( a->pad_len <= 1 ) +continue; + +/* Search the padding area for any byte which isn't a nop. */ +for ( i = a->orig_len; i < total_len; ++i ) +if ( orig[i] != 0x90 ) +break; + +/* + * Only make any changes if all padding bytes are unoptimised + * nops. With multiple alternatives over the same origin site, we + * may have already made a replacement, or optimised the nops. + */ +if ( i != total_len ) +continue; + +add_nops(buf, a->pad_len); +text_poke(orig + a->orig_len, buf, a->pad_len); continue; +} memcpy(buf, repl, a->repl_len); @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct alt_instr *start, if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 ) *(s32 *)(buf + 1) += repl - orig; -add_nops(buf + a->repl_len, a->orig_len - a->repl_len); -text_poke(orig, buf, a->orig_len); +add_nops(buf + a->repl_len, total_len - a->repl_len); +text_poke(orig, buf, total_len); } } diff --git a/xen/include/asm-x86/alternative-asm.h b/xen/include/asm-x86/alternative-asm.h index 150bd1a..f7e37cb 100644 --- a/xen/include/asm-x86/alternative-asm.h +++ b/xen/include/asm-x86/alternative-asm.h @@ -9,30 +9,41 @@ * enough information for the alternatives patching code to patch an * instruction. See apply_alternatives(). */ -.macro altinstruction_entry orig repl feature orig_len repl_len +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len .long \orig - . .long \repl - . .word \feature .byte \orig_len .byte \repl_len +.byte \pad_len .endm #define orig_len (.L\@_orig_e - .L\@_orig_s) +#define pad_len(.L\@_orig_p - .L\@_orig_e) +#define total_len (.L\@_orig_p - .L\@_orig_s) #define repl_len(nr) (.L\@_repl_e\()nr - .L\@_repl_s\()nr) #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr: +#define gas_max(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b) .macro ALTERNATIVE oldinstr, newinstr, feature .L\@_orig_s: \oldinstr .L\@_orig_e: + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 +.L\@_orig_p: .pushsection .altinstructions, "a", @progbits altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \ -orig_len, repl_len(1) +orig_len, repl_len(1), pad_len .section .discard, "a", @progbits -/* Assembler-time check that \newinstr isn't longer than \oldinstr. */ -.byte 0xff + repl_len(1) - orig_len +/* + * Assembler-time checks: + * - total_len <= 255 + * - \newinstr <= total_len + */ +.byte total_len +.byte 0xff + repl_len(1) - total_len .section .altinstr_replacement, "ax", @progbits @@ -45,18 +56,26 @@ .L\@_orig_s: \oldinstr .L\@_orig_e: +.skip (-((gas_max(repl_len(1), repl_len(2)) - orig_len) > 0) * \ + (gas_max(repl_len(1), repl_len(2)) - orig_len)), 0x90 +.L\@_orig_p: .pushsection .altinstructions, "a", @progbits