On 08.08.2025 22:23, Andrew Cooper wrote:
> Under FRED, there's one entrypoint from Ring 3, and one from Ring 0.
> 
> FRED gives us a good stack (even for SYSCALL/SYSENTER), and a unified event
> frame on the stack, meaing that all software needs to do is spill the GPRs
> with a line of PUSHes.  Introduce PUSH_AND_CLEAR_GPRS and POP_GPRS for this
> purpose.
> 
> Introduce entry_FRED_R0() which to a first appoximation is complete for all
> event handling within Xen.
> 
> entry_FRED_R0() needs deriving from entry_FRED_R3(), so introduce a basic
> handler.  There is more work required to make the return-to-guest path work
> under FRED, so leave a BUG clearly in place.
> 
> Also introduce entry_from_{xen,pv}() to be the C level handlers.  By simply
> copying regs->fred_ss.vector into regs->entry_vector, we can reuse all the
> existing fault handlers.
> 
> Extend fatal_trap() to render the event type, including by name, when FRED is
> active.  This is slightly complicated, because X86_ET_OTHER must not use
> vector_name() or SYSCALL and SYSENTER get rendered as #BP and #DB.  Also,
> {read,write}_gs_shadow() needs modifying to avoid the SWAPGS instruction,
> which is disallowed in FRED mode.
> 
> This is sufficient to handle all interrupts and exceptions encountered during
> development, including plenty of Double Faults.
> 
> Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com>
> ---
> CC: Jan Beulich <jbeul...@suse.com>
> CC: Roger Pau Monné <roger....@citrix.com>
> 
> SIMICS hasn't been updated to the FRED v9, and still wants ENDBR instructions
> at the entrypoints.
> ---
>  xen/arch/x86/include/asm/asm_defns.h |  65 ++++++++++++
>  xen/arch/x86/include/asm/msr.h       |   8 +-
>  xen/arch/x86/traps.c                 | 153 ++++++++++++++++++++++++++-
>  xen/arch/x86/x86_64/Makefile         |   1 +
>  xen/arch/x86/x86_64/entry-fred.S     |  35 ++++++
>  5 files changed, 256 insertions(+), 6 deletions(-)
>  create mode 100644 xen/arch/x86/x86_64/entry-fred.S
> 
> diff --git a/xen/arch/x86/include/asm/asm_defns.h 
> b/xen/arch/x86/include/asm/asm_defns.h
> index 72a0082d319d..a81a4043d0f1 100644
> --- a/xen/arch/x86/include/asm/asm_defns.h
> +++ b/xen/arch/x86/include/asm/asm_defns.h
> @@ -315,6 +315,71 @@ static always_inline void stac(void)
>          subq  $-(UREGS_error_code-UREGS_r15+\adj), %rsp
>  .endm
>  
> +/*
> + * Push and clear GPRs
> + */
> +.macro PUSH_AND_CLEAR_GPRS
> +        push  %rdi
> +        xor   %edi, %edi
> +        push  %rsi
> +        xor   %esi, %esi
> +        push  %rdx
> +        xor   %edx, %edx
> +        push  %rcx
> +        xor   %ecx, %ecx
> +        push  %rax
> +        xor   %eax, %eax
> +        push  %r8
> +        xor   %r8d, %r8d
> +        push  %r9
> +        xor   %r9d, %r9d
> +        push  %r10
> +        xor   %r10d, %r10d
> +        push  %r11
> +        xor   %r11d, %r11d
> +        push  %rbx
> +        xor   %ebx, %ebx
> +        push  %rbp
> +#ifdef CONFIG_FRAME_POINTER
> +/* Indicate special exception stack frame by inverting the frame pointer. */
> +        mov   %rsp, %rbp
> +        notq  %rbp
> +#else
> +        xor   %ebp, %ebp
> +#endif
> +        push  %r12
> +        xor   %r12d, %r12d
> +        push  %r13
> +        xor   %r13d, %r13d
> +        push  %r14
> +        xor   %r14d, %r14d
> +        push  %r15
> +        xor   %r15d, %r15d
> +.endm
> +
> +/*
> + * POP GPRs from a UREGS_* frame on the stack.  Does not modify flags.
> + *
> + * @rax: Alternative destination for the %rax value on the stack.
> + */
> +.macro POP_GPRS rax=%rax
> +        pop   %r15
> +        pop   %r14
> +        pop   %r13
> +        pop   %r12
> +        pop   %rbp
> +        pop   %rbx
> +        pop   %r11
> +        pop   %r10
> +        pop   %r9
> +        pop   %r8
> +        pop   \rax
> +        pop   %rcx
> +        pop   %rdx
> +        pop   %rsi
> +        pop   %rdi
> +.endm
> +
>  #ifdef CONFIG_PV32
>  #define CR4_PV32_RESTORE                               \
>      ALTERNATIVE_2 "",                                  \
> diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
> index b6b85b04c3fd..01f510315ffe 100644
> --- a/xen/arch/x86/include/asm/msr.h
> +++ b/xen/arch/x86/include/asm/msr.h
> @@ -202,9 +202,9 @@ static inline unsigned long read_gs_base(void)
>  
>  static inline unsigned long read_gs_shadow(void)
>  {
> -    unsigned long base;
> +    unsigned long base, cr4 = read_cr4();
>  
> -    if ( read_cr4() & X86_CR4_FSGSBASE )
> +    if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) )
>      {
>          asm volatile ( "swapgs" );
>          base = __rdgsbase();
> @@ -234,7 +234,9 @@ static inline void write_gs_base(unsigned long base)
>  
>  static inline void write_gs_shadow(unsigned long base)
>  {
> -    if ( read_cr4() & X86_CR4_FSGSBASE )
> +    unsigned long cr4 = read_cr4();
> +
> +    if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) )
>      {
>          asm volatile ( "swapgs\n\t"
>                         "wrgsbase %0\n\t"
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 270b93ed623e..e67a428e4362 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -1013,6 +1013,32 @@ void show_execution_state_nmi(const cpumask_t *mask, 
> bool show_all)
>          printk("Non-responding CPUs: {%*pbl}\n", 
> CPUMASK_PR(&show_state_mask));
>  }
>  
> +static const char *x86_et_name(unsigned int type)
> +{
> +    static const char *const names[] = {
> +        [X86_ET_EXT_INTR]    = "EXT_INTR",
> +        [X86_ET_NMI]         = "NMI",
> +        [X86_ET_HW_EXC]      = "HW_EXC",
> +        [X86_ET_SW_INT]      = "SW_INT",
> +        [X86_ET_PRIV_SW_EXC] = "PRIV_SW_EXEC",
> +        [X86_ET_SW_EXC]      = "SW_EXEC",
> +        [X86_ET_OTHER]       = "OTHER",
> +    };
> +
> +    return (type < ARRAY_SIZE(names) && names[type]) ? names[type] : "???";
> +}
> +
> +static const char *x86_et_other_name(unsigned int vec)
> +{
> +    static const char *const names[] = {
> +        [0] = "MTF",
> +        [1] = "SYSCALL",
> +        [2] = "SYSENTER",
> +    };
> +
> +    return (vec < ARRAY_SIZE(names) && names[vec][0]) ? names[vec] : "???";
> +}
> +
>  const char *vector_name(unsigned int vec)
>  {
>      static const char names[][4] = {
> @@ -1091,9 +1117,42 @@ void fatal_trap(const struct cpu_user_regs *regs, bool 
> show_remote)
>          }
>      }
>  
> -    panic("FATAL TRAP: vec %u, %s[%04x]%s\n",
> -          trapnr, vector_name(trapnr), regs->error_code,
> -          (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT");
> +    if ( read_cr4() & X86_CR4_FRED )
> +    {
> +        bool render_ec = false;
> +        const char *vec_name = NULL;
> +
> +        switch ( regs->fred_ss.type )
> +        {
> +        case X86_ET_HW_EXC:
> +        case X86_ET_SW_INT:
> +        case X86_ET_PRIV_SW_EXC:
> +        case X86_ET_SW_EXC:
> +            render_ec = true;
> +            vec_name = vector_name(regs->fred_ss.vector);
> +            break;
> +
> +        case X86_ET_OTHER:
> +            vec_name = x86_et_other_name(regs->fred_ss.vector);
> +            break;
> +        }
> +
> +        if ( render_ec )
> +            panic("Fatal TRAP: type %u, %s, vec %u, %s[%04x]%s\n",
> +                  regs->fred_ss.type, x86_et_name(regs->fred_ss.type),
> +                  regs->fred_ss.vector, vec_name ?: "",
> +                  regs->error_code,
> +                  (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT 
> CONTEXT");
> +        else
> +            panic("Fatal TRAP: type %u, %s, vec %u, %s%s\n",
> +                  regs->fred_ss.type, x86_et_name(regs->fred_ss.type),
> +                  regs->fred_ss.vector, vec_name ?: "",
> +                  (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT 
> CONTEXT");
> +    }
> +    else
> +        panic("FATAL TRAP: vec %u, %s[%04x]%s\n",
> +              trapnr, vector_name(trapnr), regs->error_code,
> +              (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT");
>  }
>  
>  void asmlinkage noreturn do_unhandled_trap(struct cpu_user_regs *regs)
> @@ -2181,6 +2240,94 @@ void asmlinkage check_ist_exit(const struct 
> cpu_user_regs *regs, bool ist_exit)
>  }
>  #endif
>  
> +void asmlinkage entry_from_pv(struct cpu_user_regs *regs)
> +{
> +    /* Copy fred_ss.vector into entry_vector as IDT delivery would have 
> done. */
> +    regs->entry_vector = regs->fred_ss.vector;
> +
> +    switch ( regs->fred_ss.type )
> +    {
> +    case X86_ET_EXT_INTR:
> +        do_IRQ(regs);
> +        break;
> +
> +    case X86_ET_NMI:
> +        do_nmi(regs);
> +        break;
> +
> +    case X86_ET_HW_EXC:
> +    case X86_ET_SW_INT:
> +    case X86_ET_PRIV_SW_EXC:
> +    case X86_ET_SW_EXC:
> +        goto fatal;
> +
> +    default:
> +        goto fatal;
> +    }
> +
> +    return;
> +
> + fatal:
> +    fatal_trap(regs, false);
> +}

Noticed only now: Shouldn't this be surrounded with #ifdef CONFIG_PV (with
knock-on effects elsewhere)?

Jan

Reply via email to