On Mon, Aug 10, 2015 at 09:26:02PM +0800, Zhichao Huang wrote:
> Implement switching of the debug registers. While the number
> of registers is massive, CPUs usually don't implement them all
> (A15 has 6 breakpoints and 4 watchpoints, which gives us a total
> of 22 registers "only").
> 
> Signed-off-by: Zhichao Huang <[email protected]>
> ---
>  arch/arm/kvm/interrupts_head.S | 170 
> ++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 159 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
> index 7ac5e51..b9e7410 100644
> --- a/arch/arm/kvm/interrupts_head.S
> +++ b/arch/arm/kvm/interrupts_head.S
> @@ -5,6 +5,7 @@
>  #define VCPU_USR_SP          (VCPU_USR_REG(13))
>  #define VCPU_USR_LR          (VCPU_USR_REG(14))
>  #define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4))
> +#define CP14_OFFSET(_cp14_reg_idx) ((_cp14_reg_idx) * 4)
>  
>  /*
>   * Many of these macros need to access the VCPU structure, which is always
> @@ -239,6 +240,136 @@ vcpu    .req    r0              @ vcpu pointer always 
> in r0
>       save_guest_regs_mode irq, #VCPU_IRQ_REGS
>  .endm
>  
> +/* Assume r10/r11/r12 are in use, clobbers r2-r3 */
> +.macro cp14_read_and_str base Op2 cp14_reg0 skip_num
> +     adr     r3, 1f
> +     add     r3, r3, \skip_num, lsl #3

can this code be compiled in Thumb-2 ?  If so, are all the instructions
below 32-bit wide?

> +     bx      r3
> +1:
> +     mrc     p14, 0, r2, c0, c15, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+15)]
> +     mrc     p14, 0, r2, c0, c14, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+14)]
> +     mrc     p14, 0, r2, c0, c13, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+13)]
> +     mrc     p14, 0, r2, c0, c12, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+12)]
> +     mrc     p14, 0, r2, c0, c11, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+11)]
> +     mrc     p14, 0, r2, c0, c10, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+10)]
> +     mrc     p14, 0, r2, c0, c9, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+9)]
> +     mrc     p14, 0, r2, c0, c8, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+8)]
> +     mrc     p14, 0, r2, c0, c7, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+7)]
> +     mrc     p14, 0, r2, c0, c6, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+6)]
> +     mrc     p14, 0, r2, c0, c5, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+5)]
> +     mrc     p14, 0, r2, c0, c4, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+4)]
> +     mrc     p14, 0, r2, c0, c3, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+3)]
> +     mrc     p14, 0, r2, c0, c2, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+2)]
> +     mrc     p14, 0, r2, c0, c1, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0+1)]
> +     mrc     p14, 0, r2, c0, c0, \Op2
> +     str     r2, [\base, #CP14_OFFSET(\cp14_reg0)]
> +.endm
> +
> +/* Assume r11/r12 are in use, clobbers r2-r3 */
> +.macro cp14_ldr_and_write base Op2 cp14_reg0 skip_num
> +     adr     r3, 1f
> +     add     r3, r3, \skip_num, lsl #3

see above

> +     bx      r3
> +1:
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+15)]
> +     mcr     p14, 0, r2, c0, c15, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+14)]
> +     mcr     p14, 0, r2, c0, c14, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+13)]
> +     mcr     p14, 0, r2, c0, c13, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+12)]
> +     mcr     p14, 0, r2, c0, c12, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+11)]
> +     mcr     p14, 0, r2, c0, c11, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+10)]
> +     mcr     p14, 0, r2, c0, c10, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+9)]
> +     mcr     p14, 0, r2, c0, c9, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+8)]
> +     mcr     p14, 0, r2, c0, c8, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+7)]
> +     mcr     p14, 0, r2, c0, c7, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+6)]
> +     mcr     p14, 0, r2, c0, c6, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+5)]
> +     mcr     p14, 0, r2, c0, c5, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+4)]
> +     mcr     p14, 0, r2, c0, c4, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+3)]
> +     mcr     p14, 0, r2, c0, c3, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+2)]
> +     mcr     p14, 0, r2, c0, c2, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0+1)]
> +     mcr     p14, 0, r2, c0, c1, \Op2
> +     ldr     r2, [\base, #CP14_OFFSET(\cp14_reg0)]
> +     mcr     p14, 0, r2, c0, c0, \Op2
> +.endm
> +
> +/* Get extract number of BRPs and WRPs. Saved in r11/r12 */
> +.macro read_hw_dbg_num
> +     mrc     p14, 0, r2, c0, c0, 0

can you add @ DBGDIDR here, so we know which register we are looking at?

> +     ubfx    r11, r2, #24, #4
> +     add     r11, r11, #1            @ Extract BRPs
> +     ubfx    r12, r2, #28, #4
> +     add     r12, r12, #1            @ Extract WRPs
> +     mov     r2, #16
> +     sub     r11, r2, r11            @ How many BPs to skip
> +     sub     r12, r2, r12            @ How many WPs to skip
> +.endm
> +
> +/* Reads cp14 registers from hardware.
> + * Writes cp14 registers in-order to the CP14 struct pointed to by r10
> + *
> + * Assumes vcpu pointer in vcpu reg
> + *
> + * Clobbers r2-r12
> + */
> +.macro save_debug_state
> +     read_hw_dbg_num
> +     cp14_read_and_str r10, 4, cp14_DBGBVR0, r11
> +     cp14_read_and_str r10, 5, cp14_DBGBCR0, r11
> +     cp14_read_and_str r10, 6, cp14_DBGWVR0, r12
> +     cp14_read_and_str r10, 7, cp14_DBGWCR0, r12
> +
> +     /* DBGDSCR reg */
> +     mrc     p14, 0, r2, c0, c1, 0
> +     str     r2, [r10, #CP14_OFFSET(cp14_DBGDSCRext)]

so again we're touching the scary register on every world-switch.  Since
it sounds like we have experience telling us that this can cause
troubles, I'm wondering if we can get around it by:

Only ever allow the guest to use debugging registers if we managed to
enter_monitor_mode on the host, and in that case only allow guest
debugging with the configuration of DBGDSCR that the host has.

If the host never managed to enable debugging, the guest probably won't
succeed either, and we should just trap all guest accesses to the debug
registers.

Does this work?

> +.endm
> +
> +/* Reads cp14 registers in-order from the CP14 struct pointed to by r10
> + * Writes cp14 registers to hardware.
> + *
> + * Assumes vcpu pointer in vcpu reg
> + *
> + * Clobbers r2-r12
> + */
> +.macro restore_debug_state
> +     read_hw_dbg_num
> +     cp14_ldr_and_write r10, 4, cp14_DBGBVR0, r11
> +     cp14_ldr_and_write r10, 5, cp14_DBGBCR0, r11
> +     cp14_ldr_and_write r10, 6, cp14_DBGWVR0, r12
> +     cp14_ldr_and_write r10, 7, cp14_DBGWCR0, r12
> +
> +     /* DBGDSCR reg */
> +     ldr     r2, [r10, #CP14_OFFSET(cp14_DBGDSCRext)]
> +     mcr     p14, 0, r2, c0, c2, 2

same as above

> +.endm
> +
>  /* Reads cp14/cp15 registers from hardware and stores them in memory
>   * @store_to_vcpu: If 0, registers are written in-order to the stack,
>   *              otherwise to the VCPU struct pointed to by vcpup
> @@ -248,11 +379,17 @@ vcpu    .req    r0              @ vcpu pointer always 
> in r0
>   * Clobbers r2 - r12
>   */
>  .macro read_coproc_state store_to_vcpu
> -     .if \store_to_vcpu == 0
> -     mrc     p14, 0, r2, c0, c1, 0   @ DBGDSCR
> -     push    {r2}
> +     .if \store_to_vcpu == 1
> +     add     r10, vcpu, #VCPU_CP14
> +     .else
> +     add     r10, vcpu, #VCPU_HOST_CONTEXT
> +     ldr     r10, [r10]
> +     add     r10, r10, #VCPU_CP14_HOST
>       .endif
>  
> +     /* Assumes r10 pointer in cp14 regs  */
> +     bl __save_debug_state
> +
>       mrc     p15, 0, r2, c1, c0, 0   @ SCTLR
>       mrc     p15, 0, r3, c1, c0, 2   @ CPACR
>       mrc     p15, 0, r4, c2, c0, 2   @ TTBCR
> @@ -331,6 +468,17 @@ vcpu     .req    r0              @ vcpu pointer always 
> in r0
>   * Assumes vcpu pointer in vcpu reg
>   */
>  .macro write_coproc_state read_from_vcpu
> +     .if \read_from_vcpu == 1
> +     add     r10, vcpu, #VCPU_CP14
> +     .else
> +     add     r10, vcpu, #VCPU_HOST_CONTEXT
> +     ldr     r10, [r10]
> +     add     r10, r10, #VCPU_CP14_HOST
> +     .endif
> +
> +     /* Assumes r10 pointer in cp14 regs  */
> +     bl __restore_debug_state
> +
>       .if \read_from_vcpu == 0
>       pop     {r2,r4-r7}
>       .else
> @@ -399,14 +547,6 @@ vcpu     .req    r0              @ vcpu pointer always 
> in r0
>       mcr     p15, 0, r10, c10, c2, 0 @ PRRR
>       mcr     p15, 0, r11, c10, c2, 1 @ NMRR
>       mcr     p15, 2, r12, c0, c0, 0  @ CSSELR
> -
> -     .if \read_from_vcpu == 0
> -     pop     {r2}
> -     .else
> -     mov     r2, #0
> -     .endif
> -
> -     mcr     p14, 0, r2, c0, c2, 2   @ DBGDSCR
>  .endm
>  
>  /*
> @@ -657,3 +797,11 @@ ARM_BE8(rev      r6, r6  )
>  .macro load_vcpu
>       mrc     p15, 4, vcpu, c13, c0, 2        @ HTPIDR
>  .endm
> +
> +__save_debug_state:
> +     save_debug_state
> +     bx      lr
> +
> +__restore_debug_state:
> +     restore_debug_state
> +     bx      lr
> -- 
> 1.7.12.4
> 

The rest of the register mangling looks ok this time though.

-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to