Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-14 Thread Jan Beulich
>>> On 12.02.18 at 12:23,  wrote:
> --- a/xen/arch/x86/alternative.c
> +++ b/xen/arch/x86/alternative.c
> @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct 
> alt_instr *start,
>  uint8_t *orig = ALT_ORIG_PTR(a);
>  uint8_t *repl = ALT_REPL_PTR(a);
>  uint8_t buf[MAX_PATCH_LEN];
> +unsigned int total_len = a->orig_len + a->pad_len;
>  
> -BUG_ON(a->repl_len > a->orig_len);
> -BUG_ON(a->orig_len > sizeof(buf));
> +BUG_ON(a->repl_len > total_len);
> +BUG_ON(total_len > sizeof(buf));
>  BUG_ON(a->cpuid >= NCAPINTS * 32);
>  
>  if ( !boot_cpu_has(a->cpuid) )
> +{
> +unsigned int i;
> +
> +/* No replacement to make, but try to optimise any padding. */

Better move the comment ahead of the declaration?

> @@ -26,44 +27,64 @@ extern void apply_alternatives(const struct alt_instr 
> *start,
> const struct alt_instr *end);
>  extern void alternative_instructions(void);
>  
> -#define OLDINSTR(oldinstr)  ".L%=_orig_s:\n\t" oldinstr 
> "\n.L%=_orig_e:\n"
> -
>  #define repl_s(num) ".L%=_repl_s"#num
>  #define repl_e(num) ".L%=_repl_e"#num
>  
>  #define alt_orig_len"(.L%=_orig_e - .L%=_orig_s)"
> +#define alt_pad_len "(.L%=_orig_p - .L%=_orig_e)"
> +#define alt_total_len   "(.L%=_orig_p - .L%=_orig_s)"
>  #define alt_repl_len(num)   "(" repl_e(num) " - " repl_s(num) ")"
> +#define gas_max(a, b) \
> +"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")"
> +
> +#define OLDINSTR_1(oldinstr, n1)  \
> +".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \
> +".skip (-(("alt_repl_len(n1)"-"alt_orig_len") > 0) * "\
> + "("alt_repl_len(n1)"-"alt_orig_len")), 0x90\n\t" \
> +".L%=_orig_p:\n\t"
> +
> +#define ALT_PADDING_LEN(n1, n2) \
> +gas_max((alt_repl_len(n1), alt_repl_len(n2))"-"alt_orig_len
> +
> +#define OLDINSTR_2(oldinstr, n1, n2)  \
> +".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \
> +".skip (-(("ALT_PADDING_LEN(n1, n2)") > 0) * "\
> + "("ALT_PADDING_LEN(n1, n2)")), 0x90\n\t" \
> +".L%=_orig_p:\n\t"
>  
>  #define ALTINSTR_ENTRY(feature, num)\
>  " .long .L%=_orig_s - .\n"/* label   */ \
>  " .long " repl_s(num)" - .\n" /* new instruction */ \
>  " .word " __stringify(feature) "\n"   /* feature bit */ \
>  " .byte " alt_orig_len "\n"   /* source len  */ \
> -" .byte " alt_repl_len(num) "\n"  /* replacement len */
> +" .byte " alt_repl_len(num) "\n"  /* replacement len */ \
> +" .byte " alt_pad_len "\n"/* padding len */
>  
> -#define DISCARD_ENTRY(num)/* repl <= orig */\
> -" .byte 0xff + (" alt_repl_len(num) ") - (" alt_orig_len ")\n"
> +#define DISCARD_ENTRY(num)/* repl <= total */   \
> +" .byte 0xff + (" alt_repl_len(num) ") - (" alt_total_len ")\n"

I don't think this is of much use anymore, now that you add the
padding automatically (same for the respective part of the
check in the assembler macro). Use

".byte " alt_total_len "\n" /* total_len <= 255 */

here instead (eliminating their explicit uses below)?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-13 Thread Roger Pau Monné
On Tue, Feb 13, 2018 at 10:09:15AM +, Andrew Cooper wrote:
> On 13/02/2018 09:45, Roger Pau Monné wrote:
> > On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote:
> >>  .macro ALTERNATIVE oldinstr, newinstr, feature
> >>  .L\@_orig_s:
> >>  \oldinstr
> >>  .L\@_orig_e:
> >> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 
> >> 0x90
> > clang chokes on this expression, because of the negation at the
> > beginning and I'm also failing to see why are you adding such
> > negation. AFAICT using:
> >
> > .skip (((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90
> >
> > Is correct: it adds the right padding if the alternative code is
> > bigger than the original one, while not adding anything is the
> > original code is greater than the alternative one.
> >
> > The negation just turns the 1 to -1, thus converting the result of the
> > whole expression into a negative value.
> 
> /sigh so Clang and GAS have different ideas of true.
> 
> The reason for this negation is stated in the commit message.  "x > 0"
> in GAS yields 0 or -1, rather than the expected 1.

That's unfortunate. What about something along the lines of:

---8<---
diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk
index aeae01cd97..db442a45b7 100644
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -23,6 +23,7 @@ $(call as-insn-check,CFLAGS,CC,"rdseed 
%eax",-DHAVE_GAS_RDSEED)
 $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
  -U__OBJECT_LABEL__ -DHAVE_GAS_QUOTED_SYM \
  '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')
+$(call as-insn-check,CFLAGS,CC,".skip (-(1 > 
0))$$(comma)0x90",-DAS_NEGATIVE_TRUE)
 
 CFLAGS += -mno-red-zone -fpic -fno-asynchronous-unwind-tables
 
diff --git a/xen/include/asm-x86/alternative-asm.h 
b/xen/include/asm-x86/alternative-asm.h
index f7e37cb891..6ce6479e5b 100644
--- a/xen/include/asm-x86/alternative-asm.h
+++ b/xen/include/asm-x86/alternative-asm.h
@@ -25,11 +25,18 @@
 #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr:
 #define gas_max(a, b)  ((a) ^ (((a) ^ (b)) & -(-((a) < (b)
 
+#ifdef AS_NEGATIVE_TRUE
+#define as_true -
+#else
+#define as_true
+#endif
+
 .macro ALTERNATIVE oldinstr, newinstr, feature
 .L\@_orig_s:
 \oldinstr
 .L\@_orig_e:
- .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90
+ .skip (as_true((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 
\
+   0x90
 .L\@_orig_p:
 
 .pushsection .altinstructions, "a", @progbits
@@ -56,8 +63,8 @@
 .L\@_orig_s:
 \oldinstr
 .L\@_orig_e:
-.skip (-((gas_max(repl_len(1), repl_len(2)) - orig_len) > 0) * \
- (gas_max(repl_len(1), repl_len(2)) - orig_len)), 0x90
+.skip (as_true((gas_max(repl_len(1), repl_len(2)) - orig_len) > 0) * \
+   (gas_max(repl_len(1), repl_len(2)) - orig_len)), 0x90
 .L\@_orig_p:
 
 .pushsection .altinstructions, "a", @progbits
diff --git a/xen/include/asm-x86/alternative.h 
b/xen/include/asm-x86/alternative.h
index 20dea2245a..ea76fa9f8d 100644
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -37,19 +37,25 @@ extern void alternative_instructions(void);
 #define gas_max(a, b) \
 "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")"
 
-#define OLDINSTR_1(oldinstr, n1)  \
-".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \
-".skip (-(("alt_repl_len(n1)"-"alt_orig_len") > 0) * "\
- "("alt_repl_len(n1)"-"alt_orig_len")), 0x90\n\t" \
+#ifdef AS_NEGATIVE_TRUE
+#define as_true -
+#else
+#define as_true
+#endif
+
+#define OLDINSTR_1(oldinstr, n1)\
+".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t"   \
+".skip ("as_true"(("alt_repl_len(n1)"-"alt_orig_len") > 0) * "  \
+ "("alt_repl_len(n1)"-"alt_orig_len")), 0x90\n\t"   \
 ".L%=_orig_p:\n\t"
 
 #define ALT_PADDING_LEN(n1, n2) \
 gas_max((alt_repl_len(n1), alt_repl_len(n2))"-"alt_orig_len
 
-#define OLDINSTR_2(oldinstr, n1, n2)  \
-".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \
-".skip (-(("ALT_PADDING_LEN(n1, n2)") > 0) * "\
- "("ALT_PADDING_LEN(n1, n2)")), 0x90\n\t" \
+#define OLDINSTR_2(oldinstr, n1, n2)\
+".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t"   \
+".skip ("as_true"(("ALT_PADDING_LEN(n1, n2)") > 0) * "  \
+ "("ALT_PADDING_LEN(n1, n2)")), 0x90\n\t"   \
 ".L%=_orig_p:\n\t"
 
 #define ALTINSTR_ENTRY(feature, num)\


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-13 Thread Andrew Cooper
On 13/02/2018 09:45, Roger Pau Monné wrote:
> On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote:
>>  .macro ALTERNATIVE oldinstr, newinstr, feature
>>  .L\@_orig_s:
>>  \oldinstr
>>  .L\@_orig_e:
>> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 
>> 0x90
> clang chokes on this expression, because of the negation at the
> beginning and I'm also failing to see why are you adding such
> negation. AFAICT using:
>
> .skip (((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90
>
> Is correct: it adds the right padding if the alternative code is
> bigger than the original one, while not adding anything is the
> original code is greater than the alternative one.
>
> The negation just turns the 1 to -1, thus converting the result of the
> whole expression into a negative value.

/sigh so Clang and GAS have different ideas of true.

The reason for this negation is stated in the commit message.  "x > 0"
in GAS yields 0 or -1, rather than the expected 1.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-13 Thread Roger Pau Monné
On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote:
>  .macro ALTERNATIVE oldinstr, newinstr, feature
>  .L\@_orig_s:
>  \oldinstr
>  .L\@_orig_e:
> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90

clang chokes on this expression, because of the negation at the
beginning and I'm also failing to see why are you adding such
negation. AFAICT using:

.skip (((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90

Is correct: it adds the right padding if the alternative code is
bigger than the original one, while not adding anything is the
original code is greater than the alternative one.

The negation just turns the 1 to -1, thus converting the result of the
whole expression into a negative value.

Roger.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-12 Thread Andrew Cooper
On 12/02/18 18:41, Roger Pau Monné wrote:
> On Mon, Feb 12, 2018 at 03:04:21PM +, Andrew Cooper wrote:
>> On 12/02/18 14:39, Wei Liu wrote:
>>> On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote:
  .macro ALTERNATIVE oldinstr, newinstr, feature
  .L\@_orig_s:
  \oldinstr
  .L\@_orig_e:
 + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 
 0x90
>>> Seeing the negation at the beginning, I suppose this should also be a
>>> gas specific macro?
>> The build failures are because clang's integrated assembler can't cope
>> with non-absolute references with .skip, but we already know about this
>> and have code identical to this in tree.  (I temporarily removed it in
>> patch 4).
> Newer clang (6) supports .skip with labels, but doesn't support the
> (-(... And it's having some issues with the rest of the expression,
> will have to check more closely tomorrow.
>
> I wonder, what's Linux doing in this regard? It seems like clang/llvm
> is quite committed to support building Linux, so it might be good to
> follow suit in this case.

This is basically the same as what Linux does.  Linux unconditionally
uses -no-integrated-as.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-12 Thread Roger Pau Monné
On Mon, Feb 12, 2018 at 03:04:21PM +, Andrew Cooper wrote:
> On 12/02/18 14:39, Wei Liu wrote:
> > On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote:
> >>  .macro ALTERNATIVE oldinstr, newinstr, feature
> >>  .L\@_orig_s:
> >>  \oldinstr
> >>  .L\@_orig_e:
> >> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 
> >> 0x90
> > Seeing the negation at the beginning, I suppose this should also be a
> > gas specific macro?
> 
> The build failures are because clang's integrated assembler can't cope
> with non-absolute references with .skip, but we already know about this
> and have code identical to this in tree.  (I temporarily removed it in
> patch 4).

Newer clang (6) supports .skip with labels, but doesn't support the
(-(... And it's having some issues with the rest of the expression,
will have to check more closely tomorrow.

I wonder, what's Linux doing in this regard? It seems like clang/llvm
is quite committed to support building Linux, so it might be good to
follow suit in this case.

Roger.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-12 Thread Roger Pau Monné
On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote:
> The correct amount of padding in an origin patch site can be calculated
> automatically, based on the relative lengths of the replacements.
> 
> This requires a bit of trickery to calculate correctly, especially in the
> ALTENRATIVE_2 case where a branchless max() calculation in needed.  The
> calculation is further complicated because GAS's idea of true is -1 rather
> than 1, which is why the extra negations are required.
> 
> Additionally, have apply_alternatives() attempt to optimise the padding nops.
> 
> Signed-off-by: Andrew Cooper 

LGTM, just a couple of nits:

Reviewed-by: Roger Pau Monné 

> ---
> CC: Jan Beulich 
> CC: Konrad Rzeszutek Wilk 
> CC: Roger Pau Monné 
> CC: Wei Liu 
> ---
>  xen/arch/x86/alternative.c| 32 
>  xen/include/asm-x86/alternative-asm.h | 40 
> +++
>  xen/include/asm-x86/alternative.h | 39 ++
>  3 files changed, 89 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
> index f8ddab5..ec87ff4 100644
> --- a/xen/arch/x86/alternative.c
> +++ b/xen/arch/x86/alternative.c
> @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct 
> alt_instr *start,
>  uint8_t *orig = ALT_ORIG_PTR(a);
>  uint8_t *repl = ALT_REPL_PTR(a);
>  uint8_t buf[MAX_PATCH_LEN];
> +unsigned int total_len = a->orig_len + a->pad_len;
>  
> -BUG_ON(a->repl_len > a->orig_len);
> -BUG_ON(a->orig_len > sizeof(buf));
> +BUG_ON(a->repl_len > total_len);
> +BUG_ON(total_len > sizeof(buf));
>  BUG_ON(a->cpuid >= NCAPINTS * 32);
>  
>  if ( !boot_cpu_has(a->cpuid) )
> +{
> +unsigned int i;
> +
> +/* No replacement to make, but try to optimise any padding. */
> +if ( a->pad_len <= 1 )
> +continue;
> +
> +/* Search the padding area for any byte which isn't a nop. */
> +for ( i = a->orig_len; i < total_len; ++i )
> +if ( orig[i] != 0x90 )

Maybe better to compare against ASM_NOP1?

> +break;
> +
> +/*
> + * Only make any changes if all padding bytes are unoptimised
> + * nops.  With multiple alternatives over the same origin site, 
> we
> + * may have already made a replacement, or optimised the nops.
> + */
> +if ( i != total_len )
> +continue;
> +
> +add_nops(buf, a->pad_len);
> +text_poke(orig + a->orig_len, buf, a->pad_len);
>  continue;
> +}
>  
>  memcpy(buf, repl, a->repl_len);
>  
> @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct 
> alt_instr *start,
>  if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
>  *(s32 *)(buf + 1) += repl - orig;
>  
> -add_nops(buf + a->repl_len, a->orig_len - a->repl_len);
> -text_poke(orig, buf, a->orig_len);
> +add_nops(buf + a->repl_len, total_len - a->repl_len);
> +text_poke(orig, buf, total_len);
>  }
>  }
>  
> diff --git a/xen/include/asm-x86/alternative-asm.h 
> b/xen/include/asm-x86/alternative-asm.h
> index 150bd1a..f7e37cb 100644
> --- a/xen/include/asm-x86/alternative-asm.h
> +++ b/xen/include/asm-x86/alternative-asm.h
> @@ -9,30 +9,41 @@
>   * enough information for the alternatives patching code to patch an
>   * instruction. See apply_alternatives().
>   */
> -.macro altinstruction_entry orig repl feature orig_len repl_len
> +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len
>  .long \orig - .
>  .long \repl - .
>  .word \feature
>  .byte \orig_len
>  .byte \repl_len
> +.byte \pad_len
>  .endm
>  
>  #define orig_len   (.L\@_orig_e   - .L\@_orig_s)
> +#define pad_len(.L\@_orig_p   - .L\@_orig_e)
> +#define total_len  (.L\@_orig_p   - .L\@_orig_s)
>  #define repl_len(nr)   (.L\@_repl_e\()nr  - .L\@_repl_s\()nr)
>  #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr:
> +#define gas_max(a, b)  ((a) ^ (((a) ^ (b)) & -(-((a) < (b)

That seems to work fine at least on newish versions of clang, so I'm
not sure the g prefix is required (as_max).

>  
>  .macro ALTERNATIVE oldinstr, newinstr, feature
>  .L\@_orig_s:
>  \oldinstr
>  .L\@_orig_e:
> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90
> +.L\@_orig_p:
>  
>  .pushsection .altinstructions, "a", @progbits
>  altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \
> -orig_len, repl_len(1)
> +orig_len, repl_len(1), pad_len
>  
>  .section .discard, "a", @progbits
> -/* Assembler-time check that \newinstr isn't longer than \oldinstr. */
> -.byte

Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-12 Thread Andrew Cooper
On 12/02/18 14:39, Wei Liu wrote:
> On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote:
>> The correct amount of padding in an origin patch site can be calculated
>> automatically, based on the relative lengths of the replacements.
>>
>> This requires a bit of trickery to calculate correctly, especially in the
>> ALTENRATIVE_2 case where a branchless max() calculation in needed.  The
>> calculation is further complicated because GAS's idea of true is -1 rather
>> than 1, which is why the extra negations are required.
>>
>> Additionally, have apply_alternatives() attempt to optimise the padding nops.
>>
>> Signed-off-by: Andrew Cooper 
>> ---
>> CC: Jan Beulich 
>> CC: Konrad Rzeszutek Wilk 
>> CC: Roger Pau Monné 
>> CC: Wei Liu 
>> ---
>>  xen/arch/x86/alternative.c| 32 
>>  xen/include/asm-x86/alternative-asm.h | 40 
>> +++
>>  xen/include/asm-x86/alternative.h | 39 
>> ++
>>  3 files changed, 89 insertions(+), 22 deletions(-)
>>
>> diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
>> index f8ddab5..ec87ff4 100644
>> --- a/xen/arch/x86/alternative.c
>> +++ b/xen/arch/x86/alternative.c
>> @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct 
>> alt_instr *start,
>>  uint8_t *orig = ALT_ORIG_PTR(a);
>>  uint8_t *repl = ALT_REPL_PTR(a);
>>  uint8_t buf[MAX_PATCH_LEN];
>> +unsigned int total_len = a->orig_len + a->pad_len;
>>  
>> -BUG_ON(a->repl_len > a->orig_len);
>> -BUG_ON(a->orig_len > sizeof(buf));
>> +BUG_ON(a->repl_len > total_len);
>> +BUG_ON(total_len > sizeof(buf));
>>  BUG_ON(a->cpuid >= NCAPINTS * 32);
>>  
>>  if ( !boot_cpu_has(a->cpuid) )
>> +{
>> +unsigned int i;
>> +
>> +/* No replacement to make, but try to optimise any padding. */
>> +if ( a->pad_len <= 1 )
>> +continue;
>> +
>> +/* Search the padding area for any byte which isn't a nop. */
>> +for ( i = a->orig_len; i < total_len; ++i )
>> +if ( orig[i] != 0x90 )
>> +break;
>> +
>> +/*
>> + * Only make any changes if all padding bytes are unoptimised
>> + * nops.  With multiple alternatives over the same origin site, 
>> we
>> + * may have already made a replacement, or optimised the nops.
>> + */
>> +if ( i != total_len )
>> +continue;
>> +
>> +add_nops(buf, a->pad_len);
>> +text_poke(orig + a->orig_len, buf, a->pad_len);
>>  continue;
>> +}
> Is the expectation here the alternative instructions already contain
> optimised paddings (including live patches)? Otherwise why is the same
> optimisation no needed when later?

The problem is that we don't store the actual original bytes, so can't
trivially detect whether we've already patched this site before.  We've
a number of cases which are an ALTERNATIVE_2 based on SMEP and SMAP, so
on a fair chunk of hardware, we first make a replacement because of
SMEP, then fail the SMAP check and don't make the second replacement.

Later, we are discarding everything in orig+pad, and replacing it with
repl+any necessary padding, which is made of optimised nops.

>
>>  
>>  memcpy(buf, repl, a->repl_len);
>>  
>> @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct 
>> alt_instr *start,
>>  if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
>>  *(s32 *)(buf + 1) += repl - orig;
>>  
>> -add_nops(buf + a->repl_len, a->orig_len - a->repl_len);
>> -text_poke(orig, buf, a->orig_len);
>> +add_nops(buf + a->repl_len, total_len - a->repl_len);
>> +text_poke(orig, buf, total_len);
>>  }
>>  }
>>  
>> diff --git a/xen/include/asm-x86/alternative-asm.h 
>> b/xen/include/asm-x86/alternative-asm.h
>> index 150bd1a..f7e37cb 100644
>> --- a/xen/include/asm-x86/alternative-asm.h
>> +++ b/xen/include/asm-x86/alternative-asm.h
>> @@ -9,30 +9,41 @@
>>   * enough information for the alternatives patching code to patch an
>>   * instruction. See apply_alternatives().
>>   */
>> -.macro altinstruction_entry orig repl feature orig_len repl_len
>> +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len
>>  .long \orig - .
>>  .long \repl - .
>>  .word \feature
>>  .byte \orig_len
>>  .byte \repl_len
>> +.byte \pad_len
>>  .endm
>>  
>>  #define orig_len   (.L\@_orig_e   - .L\@_orig_s)
>> +#define pad_len(.L\@_orig_p   - .L\@_orig_e)
>> +#define total_len  (.L\@_orig_p   - .L\@_orig_s)
>>  #define repl_len(nr)   (.L\@_repl_e\()nr  - .L\@_repl_s\()nr)
>>  #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr:
>> +#define gas_m

Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-12 Thread Wei Liu
On Mon, Feb 12, 2018 at 11:23:05AM +, Andrew Cooper wrote:
> The correct amount of padding in an origin patch site can be calculated
> automatically, based on the relative lengths of the replacements.
> 
> This requires a bit of trickery to calculate correctly, especially in the
> ALTENRATIVE_2 case where a branchless max() calculation in needed.  The
> calculation is further complicated because GAS's idea of true is -1 rather
> than 1, which is why the extra negations are required.
> 
> Additionally, have apply_alternatives() attempt to optimise the padding nops.
> 
> Signed-off-by: Andrew Cooper 
> ---
> CC: Jan Beulich 
> CC: Konrad Rzeszutek Wilk 
> CC: Roger Pau Monné 
> CC: Wei Liu 
> ---
>  xen/arch/x86/alternative.c| 32 
>  xen/include/asm-x86/alternative-asm.h | 40 
> +++
>  xen/include/asm-x86/alternative.h | 39 ++
>  3 files changed, 89 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
> index f8ddab5..ec87ff4 100644
> --- a/xen/arch/x86/alternative.c
> +++ b/xen/arch/x86/alternative.c
> @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct 
> alt_instr *start,
>  uint8_t *orig = ALT_ORIG_PTR(a);
>  uint8_t *repl = ALT_REPL_PTR(a);
>  uint8_t buf[MAX_PATCH_LEN];
> +unsigned int total_len = a->orig_len + a->pad_len;
>  
> -BUG_ON(a->repl_len > a->orig_len);
> -BUG_ON(a->orig_len > sizeof(buf));
> +BUG_ON(a->repl_len > total_len);
> +BUG_ON(total_len > sizeof(buf));
>  BUG_ON(a->cpuid >= NCAPINTS * 32);
>  
>  if ( !boot_cpu_has(a->cpuid) )
> +{
> +unsigned int i;
> +
> +/* No replacement to make, but try to optimise any padding. */
> +if ( a->pad_len <= 1 )
> +continue;
> +
> +/* Search the padding area for any byte which isn't a nop. */
> +for ( i = a->orig_len; i < total_len; ++i )
> +if ( orig[i] != 0x90 )
> +break;
> +
> +/*
> + * Only make any changes if all padding bytes are unoptimised
> + * nops.  With multiple alternatives over the same origin site, 
> we
> + * may have already made a replacement, or optimised the nops.
> + */
> +if ( i != total_len )
> +continue;
> +
> +add_nops(buf, a->pad_len);
> +text_poke(orig + a->orig_len, buf, a->pad_len);
>  continue;
> +}

Is the expectation here the alternative instructions already contain
optimised paddings (including live patches)? Otherwise why is the same
optimisation no needed when later?

>  
>  memcpy(buf, repl, a->repl_len);
>  
> @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct 
> alt_instr *start,
>  if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
>  *(s32 *)(buf + 1) += repl - orig;
>  
> -add_nops(buf + a->repl_len, a->orig_len - a->repl_len);
> -text_poke(orig, buf, a->orig_len);
> +add_nops(buf + a->repl_len, total_len - a->repl_len);
> +text_poke(orig, buf, total_len);
>  }
>  }
>  
> diff --git a/xen/include/asm-x86/alternative-asm.h 
> b/xen/include/asm-x86/alternative-asm.h
> index 150bd1a..f7e37cb 100644
> --- a/xen/include/asm-x86/alternative-asm.h
> +++ b/xen/include/asm-x86/alternative-asm.h
> @@ -9,30 +9,41 @@
>   * enough information for the alternatives patching code to patch an
>   * instruction. See apply_alternatives().
>   */
> -.macro altinstruction_entry orig repl feature orig_len repl_len
> +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len
>  .long \orig - .
>  .long \repl - .
>  .word \feature
>  .byte \orig_len
>  .byte \repl_len
> +.byte \pad_len
>  .endm
>  
>  #define orig_len   (.L\@_orig_e   - .L\@_orig_s)
> +#define pad_len(.L\@_orig_p   - .L\@_orig_e)
> +#define total_len  (.L\@_orig_p   - .L\@_orig_s)
>  #define repl_len(nr)   (.L\@_repl_e\()nr  - .L\@_repl_s\()nr)
>  #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr:
> +#define gas_max(a, b)  ((a) ^ (((a) ^ (b)) & -(-((a) < (b)

What about clang's assembler? At least give it a stub to cause
compilation error?

>  
>  .macro ALTERNATIVE oldinstr, newinstr, feature
>  .L\@_orig_s:
>  \oldinstr
>  .L\@_orig_e:
> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90

Seeing the negation at the beginning, I suppose this should also be a
gas specific macro?

The rest looks good.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations

2018-02-12 Thread Andrew Cooper
The correct amount of padding in an origin patch site can be calculated
automatically, based on the relative lengths of the replacements.

This requires a bit of trickery to calculate correctly, especially in the
ALTENRATIVE_2 case where a branchless max() calculation in needed.  The
calculation is further complicated because GAS's idea of true is -1 rather
than 1, which is why the extra negations are required.

Additionally, have apply_alternatives() attempt to optimise the padding nops.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Konrad Rzeszutek Wilk 
CC: Roger Pau Monné 
CC: Wei Liu 
---
 xen/arch/x86/alternative.c| 32 
 xen/include/asm-x86/alternative-asm.h | 40 +++
 xen/include/asm-x86/alternative.h | 39 ++
 3 files changed, 89 insertions(+), 22 deletions(-)

diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
index f8ddab5..ec87ff4 100644
--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct 
alt_instr *start,
 uint8_t *orig = ALT_ORIG_PTR(a);
 uint8_t *repl = ALT_REPL_PTR(a);
 uint8_t buf[MAX_PATCH_LEN];
+unsigned int total_len = a->orig_len + a->pad_len;
 
-BUG_ON(a->repl_len > a->orig_len);
-BUG_ON(a->orig_len > sizeof(buf));
+BUG_ON(a->repl_len > total_len);
+BUG_ON(total_len > sizeof(buf));
 BUG_ON(a->cpuid >= NCAPINTS * 32);
 
 if ( !boot_cpu_has(a->cpuid) )
+{
+unsigned int i;
+
+/* No replacement to make, but try to optimise any padding. */
+if ( a->pad_len <= 1 )
+continue;
+
+/* Search the padding area for any byte which isn't a nop. */
+for ( i = a->orig_len; i < total_len; ++i )
+if ( orig[i] != 0x90 )
+break;
+
+/*
+ * Only make any changes if all padding bytes are unoptimised
+ * nops.  With multiple alternatives over the same origin site, we
+ * may have already made a replacement, or optimised the nops.
+ */
+if ( i != total_len )
+continue;
+
+add_nops(buf, a->pad_len);
+text_poke(orig + a->orig_len, buf, a->pad_len);
 continue;
+}
 
 memcpy(buf, repl, a->repl_len);
 
@@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct 
alt_instr *start,
 if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
 *(s32 *)(buf + 1) += repl - orig;
 
-add_nops(buf + a->repl_len, a->orig_len - a->repl_len);
-text_poke(orig, buf, a->orig_len);
+add_nops(buf + a->repl_len, total_len - a->repl_len);
+text_poke(orig, buf, total_len);
 }
 }
 
diff --git a/xen/include/asm-x86/alternative-asm.h 
b/xen/include/asm-x86/alternative-asm.h
index 150bd1a..f7e37cb 100644
--- a/xen/include/asm-x86/alternative-asm.h
+++ b/xen/include/asm-x86/alternative-asm.h
@@ -9,30 +9,41 @@
  * enough information for the alternatives patching code to patch an
  * instruction. See apply_alternatives().
  */
-.macro altinstruction_entry orig repl feature orig_len repl_len
+.macro altinstruction_entry orig repl feature orig_len repl_len pad_len
 .long \orig - .
 .long \repl - .
 .word \feature
 .byte \orig_len
 .byte \repl_len
+.byte \pad_len
 .endm
 
 #define orig_len   (.L\@_orig_e   - .L\@_orig_s)
+#define pad_len(.L\@_orig_p   - .L\@_orig_e)
+#define total_len  (.L\@_orig_p   - .L\@_orig_s)
 #define repl_len(nr)   (.L\@_repl_e\()nr  - .L\@_repl_s\()nr)
 #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr:
+#define gas_max(a, b)  ((a) ^ (((a) ^ (b)) & -(-((a) < (b)
 
 .macro ALTERNATIVE oldinstr, newinstr, feature
 .L\@_orig_s:
 \oldinstr
 .L\@_orig_e:
+ .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90
+.L\@_orig_p:
 
 .pushsection .altinstructions, "a", @progbits
 altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \
-orig_len, repl_len(1)
+orig_len, repl_len(1), pad_len
 
 .section .discard, "a", @progbits
-/* Assembler-time check that \newinstr isn't longer than \oldinstr. */
-.byte 0xff + repl_len(1) - orig_len
+/*
+ * Assembler-time checks:
+ *   - total_len <= 255
+ *   - \newinstr <= total_len
+ */
+.byte total_len
+.byte 0xff + repl_len(1) - total_len
 
 .section .altinstr_replacement, "ax", @progbits
 
@@ -45,18 +56,26 @@
 .L\@_orig_s:
 \oldinstr
 .L\@_orig_e:
+.skip (-((gas_max(repl_len(1), repl_len(2)) - orig_len) > 0) * \
+ (gas_max(repl_len(1), repl_len(2)) - orig_len)), 0x90
+.L\@_orig_p:
 
 .pushsection .altinstructions, "a", @progbits