Re: [Xen-devel] [PATCH v5 06/18] xen/arm64: Implement a fast path for handling SMCCC_ARCH_WORKAROUND_1

2018-02-26 Thread Andre Przywara
Hi,

On 23/02/18 18:57, Julien Grall wrote:
> The function SMCCC_ARCH_WORKAROUND_1 will be called by the guest for
> hardening the branch predictor. So we want the handling to be as fast as
> possible.
> 
> As the mitigation is applied on every guest exit, we can check for the
> call before saving all the context and return very early.
> 
> For now, only provide a fast path for HVC64 call. Because the code rely
> on 2 registers, x0 and x1 are saved in advance.
> 
> Signed-off-by: Julien Grall 
> Reviewed-by: Volodymyr Babchuk 
> Reviewed-by: Stefano Stabellini 

Thanks, that looks good now.

Reviewed-by: Andre Przywara 

Cheers,
Andre.

> ---
> guest_sync only handle 64-bit guest, so I have only implemented the
> 64-bit side for now. We can discuss whether it is useful to
> implement it for 32-bit guests.
> 
> We could also consider to implement the fast path for SMC64,
> althought a guest should always use HVC.
> 
> I decided to keep the reviewed-by as mostly the documentation was
> updated to make it clearer.
> 
> Changes in v4:
> - Add Stefano's reviewed-by
> - Use xzr to clobber x1 instead of x0
> - Update comments in the code
> 
> Changes in v2:
> - Add Volodymyr's reviewed-by
> ---
>  xen/arch/arm/arm64/entry.S  | 59 
> +++--
>  xen/include/asm-arm/processor.h |  2 ++
>  2 files changed, 59 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
> index 6d99e46f0f..ffa9a1c492 100644
> --- a/xen/arch/arm/arm64/entry.S
> +++ b/xen/arch/arm/arm64/entry.S
> @@ -1,6 +1,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  /*
> @@ -90,8 +91,12 @@ lr  .reqx30 /* link register */
>  .endm
>  /*
>   * Save state on entry to hypervisor, restore on exit
> + *
> + * save_x0_x1: Does the macro needs to save x0/x1? Defaults to 1
> + * If 0, we rely on the on x0/x1 to have been saved at the correct
> + * position on the stack before.
>   */
> -.macro  entry, hyp, compat
> +.macro  entry, hyp, compat, save_x0_x1=1
>  sub sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, LR */
>  pushx28, x29
>  pushx26, x27
> @@ -107,7 +112,16 @@ lr  .reqx30 /* link register */
>  pushx6, x7
>  pushx4, x5
>  pushx2, x3
> +/*
> + * The caller may already have saved x0/x1 on the stack at the
> + * correct address and corrupt them with another value. Only
> + * save them if save_x0_x1 == 1.
> + */
> +.if \save_x0_x1 == 1
>  pushx0, x1
> +.else
> +sub sp, sp, #16
> +.endif
>  
>  .if \hyp == 1/* Hypervisor mode */
>  
> @@ -200,7 +214,48 @@ hyp_irq:
>  exithyp=1
>  
>  guest_sync:
> -entry   hyp=0, compat=0
> +/*
> + * Save x0, x1 in advance
> + */
> +stp x0, x1, [sp, #-(UREGS_kernel_sizeof - UREGS_X0)]
> +
> +/*
> + * x1 is used because x0 may contain the function identifier.
> + * This avoids to restore x0 from the stack.
> + */
> +mrs x1, esr_el2
> +lsr x1, x1, #HSR_EC_SHIFT   /* x1 = ESR_EL2.EC */
> +cmp x1, #HSR_EC_HVC64
> +b.ne1f  /* Not a HVC skip fastpath. 
> */
> +
> +mrs x1, esr_el2
> +and x1, x1, #0x /* Check the immediate 
> [0:16] */
> +cbnzx1, 1f  /* should be 0 for HVC #0 */
> +
> +/*
> + * Fastest path possible for ARM_SMCCC_ARCH_WORKAROUND_1.
> + * The workaround has already been applied on the exception
> + * entry from the guest, so let's quickly get back to the guest.
> + *
> + * Note that eor is used because the function identifier cannot
> + * be encoded as an immediate for cmp.
> + */
> +eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
> +cbnzw0, 1f
> +
> +/*
> + * Clobber both x0 and x1 to prevent leakage. Note that thanks
> + * the eor, x0 = 0.
> + */
> +mov x1, xzr
> +eret
> +
> +1:
> +/*
> + * x0/x1 may have been scratch by the fast path above, so avoid
> + * to save them.
> + */
> +entry   hyp=0, compat=0, save_x0_x1=0
>  /*
>   * The vSError will be checked while 
> SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT
>   * is not set. If a vSError took place, the initial exception will be
> diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
> index c0f79d0093..222a02dd99 100644
> --- a/xen/include/asm-arm/processor.h
> +++ 

[Xen-devel] [PATCH v5 06/18] xen/arm64: Implement a fast path for handling SMCCC_ARCH_WORKAROUND_1

2018-02-23 Thread Julien Grall
The function SMCCC_ARCH_WORKAROUND_1 will be called by the guest for
hardening the branch predictor. So we want the handling to be as fast as
possible.

As the mitigation is applied on every guest exit, we can check for the
call before saving all the context and return very early.

For now, only provide a fast path for HVC64 call. Because the code rely
on 2 registers, x0 and x1 are saved in advance.

Signed-off-by: Julien Grall 
Reviewed-by: Volodymyr Babchuk 
Reviewed-by: Stefano Stabellini 

---
guest_sync only handle 64-bit guest, so I have only implemented the
64-bit side for now. We can discuss whether it is useful to
implement it for 32-bit guests.

We could also consider to implement the fast path for SMC64,
althought a guest should always use HVC.

I decided to keep the reviewed-by as mostly the documentation was
updated to make it clearer.

Changes in v4:
- Add Stefano's reviewed-by
- Use xzr to clobber x1 instead of x0
- Update comments in the code

Changes in v2:
- Add Volodymyr's reviewed-by
---
 xen/arch/arm/arm64/entry.S  | 59 +++--
 xen/include/asm-arm/processor.h |  2 ++
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
index 6d99e46f0f..ffa9a1c492 100644
--- a/xen/arch/arm/arm64/entry.S
+++ b/xen/arch/arm/arm64/entry.S
@@ -1,6 +1,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /*
@@ -90,8 +91,12 @@ lr  .reqx30 /* link register */
 .endm
 /*
  * Save state on entry to hypervisor, restore on exit
+ *
+ * save_x0_x1: Does the macro needs to save x0/x1? Defaults to 1
+ * If 0, we rely on the on x0/x1 to have been saved at the correct
+ * position on the stack before.
  */
-.macro  entry, hyp, compat
+.macro  entry, hyp, compat, save_x0_x1=1
 sub sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, LR */
 pushx28, x29
 pushx26, x27
@@ -107,7 +112,16 @@ lr  .reqx30 /* link register */
 pushx6, x7
 pushx4, x5
 pushx2, x3
+/*
+ * The caller may already have saved x0/x1 on the stack at the
+ * correct address and corrupt them with another value. Only
+ * save them if save_x0_x1 == 1.
+ */
+.if \save_x0_x1 == 1
 pushx0, x1
+.else
+sub sp, sp, #16
+.endif
 
 .if \hyp == 1/* Hypervisor mode */
 
@@ -200,7 +214,48 @@ hyp_irq:
 exithyp=1
 
 guest_sync:
-entry   hyp=0, compat=0
+/*
+ * Save x0, x1 in advance
+ */
+stp x0, x1, [sp, #-(UREGS_kernel_sizeof - UREGS_X0)]
+
+/*
+ * x1 is used because x0 may contain the function identifier.
+ * This avoids to restore x0 from the stack.
+ */
+mrs x1, esr_el2
+lsr x1, x1, #HSR_EC_SHIFT   /* x1 = ESR_EL2.EC */
+cmp x1, #HSR_EC_HVC64
+b.ne1f  /* Not a HVC skip fastpath. */
+
+mrs x1, esr_el2
+and x1, x1, #0x /* Check the immediate [0:16] 
*/
+cbnzx1, 1f  /* should be 0 for HVC #0 */
+
+/*
+ * Fastest path possible for ARM_SMCCC_ARCH_WORKAROUND_1.
+ * The workaround has already been applied on the exception
+ * entry from the guest, so let's quickly get back to the guest.
+ *
+ * Note that eor is used because the function identifier cannot
+ * be encoded as an immediate for cmp.
+ */
+eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
+cbnzw0, 1f
+
+/*
+ * Clobber both x0 and x1 to prevent leakage. Note that thanks
+ * the eor, x0 = 0.
+ */
+mov x1, xzr
+eret
+
+1:
+/*
+ * x0/x1 may have been scratch by the fast path above, so avoid
+ * to save them.
+ */
+entry   hyp=0, compat=0, save_x0_x1=0
 /*
  * The vSError will be checked while SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT
  * is not set. If a vSError took place, the initial exception will be
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index c0f79d0093..222a02dd99 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -306,6 +306,8 @@
 #define HDCR_TPM(_AC(1,U)<<6)   /* Trap Performance Monitors 
accesses */
 #define HDCR_TPMCR  (_AC(1,U)<<5)   /* Trap PMCR accesses */
 
+#define HSR_EC_SHIFT26
+
 #define HSR_EC_UNKNOWN  0x00
 #define HSR_EC_WFI_WFE  0x01
 #define HSR_EC_CP15_32  0x03
-- 
2.11.0