On 08.08.2025 22:23, Andrew Cooper wrote: > Under FRED, there's one entrypoint from Ring 3, and one from Ring 0. > > FRED gives us a good stack (even for SYSCALL/SYSENTER), and a unified event > frame on the stack, meaing that all software needs to do is spill the GPRs > with a line of PUSHes. Introduce PUSH_AND_CLEAR_GPRS and POP_GPRS for this > purpose. > > Introduce entry_FRED_R0() which to a first appoximation is complete for all > event handling within Xen. > > entry_FRED_R0() needs deriving from entry_FRED_R3(), so introduce a basic > handler. There is more work required to make the return-to-guest path work > under FRED, so leave a BUG clearly in place. > > Also introduce entry_from_{xen,pv}() to be the C level handlers. By simply > copying regs->fred_ss.vector into regs->entry_vector, we can reuse all the > existing fault handlers. > > Extend fatal_trap() to render the event type, including by name, when FRED is > active. This is slightly complicated, because X86_ET_OTHER must not use > vector_name() or SYSCALL and SYSENTER get rendered as #BP and #DB. Also, > {read,write}_gs_shadow() needs modifying to avoid the SWAPGS instruction, > which is disallowed in FRED mode. > > This is sufficient to handle all interrupts and exceptions encountered during > development, including plenty of Double Faults. > > Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> > --- > CC: Jan Beulich <jbeul...@suse.com> > CC: Roger Pau Monné <roger....@citrix.com> > > SIMICS hasn't been updated to the FRED v9, and still wants ENDBR instructions > at the entrypoints. > --- > xen/arch/x86/include/asm/asm_defns.h | 65 ++++++++++++ > xen/arch/x86/include/asm/msr.h | 8 +- > xen/arch/x86/traps.c | 153 ++++++++++++++++++++++++++- > xen/arch/x86/x86_64/Makefile | 1 + > xen/arch/x86/x86_64/entry-fred.S | 35 ++++++ > 5 files changed, 256 insertions(+), 6 deletions(-) > create mode 100644 xen/arch/x86/x86_64/entry-fred.S > > diff --git a/xen/arch/x86/include/asm/asm_defns.h > b/xen/arch/x86/include/asm/asm_defns.h > index 72a0082d319d..a81a4043d0f1 100644 > --- a/xen/arch/x86/include/asm/asm_defns.h > +++ b/xen/arch/x86/include/asm/asm_defns.h > @@ -315,6 +315,71 @@ static always_inline void stac(void) > subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp > .endm > > +/* > + * Push and clear GPRs > + */ > +.macro PUSH_AND_CLEAR_GPRS > + push %rdi > + xor %edi, %edi > + push %rsi > + xor %esi, %esi > + push %rdx > + xor %edx, %edx > + push %rcx > + xor %ecx, %ecx > + push %rax > + xor %eax, %eax > + push %r8 > + xor %r8d, %r8d > + push %r9 > + xor %r9d, %r9d > + push %r10 > + xor %r10d, %r10d > + push %r11 > + xor %r11d, %r11d > + push %rbx > + xor %ebx, %ebx > + push %rbp > +#ifdef CONFIG_FRAME_POINTER > +/* Indicate special exception stack frame by inverting the frame pointer. */ > + mov %rsp, %rbp > + notq %rbp > +#else > + xor %ebp, %ebp > +#endif > + push %r12 > + xor %r12d, %r12d > + push %r13 > + xor %r13d, %r13d > + push %r14 > + xor %r14d, %r14d > + push %r15 > + xor %r15d, %r15d > +.endm > + > +/* > + * POP GPRs from a UREGS_* frame on the stack. Does not modify flags. > + * > + * @rax: Alternative destination for the %rax value on the stack. > + */ > +.macro POP_GPRS rax=%rax > + pop %r15 > + pop %r14 > + pop %r13 > + pop %r12 > + pop %rbp > + pop %rbx > + pop %r11 > + pop %r10 > + pop %r9 > + pop %r8 > + pop \rax > + pop %rcx > + pop %rdx > + pop %rsi > + pop %rdi > +.endm > + > #ifdef CONFIG_PV32 > #define CR4_PV32_RESTORE \ > ALTERNATIVE_2 "", \ > diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h > index b6b85b04c3fd..01f510315ffe 100644 > --- a/xen/arch/x86/include/asm/msr.h > +++ b/xen/arch/x86/include/asm/msr.h > @@ -202,9 +202,9 @@ static inline unsigned long read_gs_base(void) > > static inline unsigned long read_gs_shadow(void) > { > - unsigned long base; > + unsigned long base, cr4 = read_cr4(); > > - if ( read_cr4() & X86_CR4_FSGSBASE ) > + if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) ) > { > asm volatile ( "swapgs" ); > base = __rdgsbase(); > @@ -234,7 +234,9 @@ static inline void write_gs_base(unsigned long base) > > static inline void write_gs_shadow(unsigned long base) > { > - if ( read_cr4() & X86_CR4_FSGSBASE ) > + unsigned long cr4 = read_cr4(); > + > + if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) ) > { > asm volatile ( "swapgs\n\t" > "wrgsbase %0\n\t" > diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c > index 270b93ed623e..e67a428e4362 100644 > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1013,6 +1013,32 @@ void show_execution_state_nmi(const cpumask_t *mask, > bool show_all) > printk("Non-responding CPUs: {%*pbl}\n", > CPUMASK_PR(&show_state_mask)); > } > > +static const char *x86_et_name(unsigned int type) > +{ > + static const char *const names[] = { > + [X86_ET_EXT_INTR] = "EXT_INTR", > + [X86_ET_NMI] = "NMI", > + [X86_ET_HW_EXC] = "HW_EXC", > + [X86_ET_SW_INT] = "SW_INT", > + [X86_ET_PRIV_SW_EXC] = "PRIV_SW_EXEC", > + [X86_ET_SW_EXC] = "SW_EXEC", > + [X86_ET_OTHER] = "OTHER", > + }; > + > + return (type < ARRAY_SIZE(names) && names[type]) ? names[type] : "???"; > +} > + > +static const char *x86_et_other_name(unsigned int vec) > +{ > + static const char *const names[] = { > + [0] = "MTF", > + [1] = "SYSCALL", > + [2] = "SYSENTER", > + }; > + > + return (vec < ARRAY_SIZE(names) && names[vec][0]) ? names[vec] : "???"; > +} > + > const char *vector_name(unsigned int vec) > { > static const char names[][4] = { > @@ -1091,9 +1117,42 @@ void fatal_trap(const struct cpu_user_regs *regs, bool > show_remote) > } > } > > - panic("FATAL TRAP: vec %u, %s[%04x]%s\n", > - trapnr, vector_name(trapnr), regs->error_code, > - (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT"); > + if ( read_cr4() & X86_CR4_FRED ) > + { > + bool render_ec = false; > + const char *vec_name = NULL; > + > + switch ( regs->fred_ss.type ) > + { > + case X86_ET_HW_EXC: > + case X86_ET_SW_INT: > + case X86_ET_PRIV_SW_EXC: > + case X86_ET_SW_EXC: > + render_ec = true; > + vec_name = vector_name(regs->fred_ss.vector); > + break; > + > + case X86_ET_OTHER: > + vec_name = x86_et_other_name(regs->fred_ss.vector); > + break; > + } > + > + if ( render_ec ) > + panic("Fatal TRAP: type %u, %s, vec %u, %s[%04x]%s\n", > + regs->fred_ss.type, x86_et_name(regs->fred_ss.type), > + regs->fred_ss.vector, vec_name ?: "", > + regs->error_code, > + (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT > CONTEXT"); > + else > + panic("Fatal TRAP: type %u, %s, vec %u, %s%s\n", > + regs->fred_ss.type, x86_et_name(regs->fred_ss.type), > + regs->fred_ss.vector, vec_name ?: "", > + (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT > CONTEXT"); > + } > + else > + panic("FATAL TRAP: vec %u, %s[%04x]%s\n", > + trapnr, vector_name(trapnr), regs->error_code, > + (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT"); > } > > void asmlinkage noreturn do_unhandled_trap(struct cpu_user_regs *regs) > @@ -2181,6 +2240,94 @@ void asmlinkage check_ist_exit(const struct > cpu_user_regs *regs, bool ist_exit) > } > #endif > > +void asmlinkage entry_from_pv(struct cpu_user_regs *regs) > +{ > + /* Copy fred_ss.vector into entry_vector as IDT delivery would have > done. */ > + regs->entry_vector = regs->fred_ss.vector; > + > + switch ( regs->fred_ss.type ) > + { > + case X86_ET_EXT_INTR: > + do_IRQ(regs); > + break; > + > + case X86_ET_NMI: > + do_nmi(regs); > + break; > + > + case X86_ET_HW_EXC: > + case X86_ET_SW_INT: > + case X86_ET_PRIV_SW_EXC: > + case X86_ET_SW_EXC: > + goto fatal; > + > + default: > + goto fatal; > + } > + > + return; > + > + fatal: > + fatal_trap(regs, false); > +}
Noticed only now: Shouldn't this be surrounded with #ifdef CONFIG_PV (with knock-on effects elsewhere)? Jan