Re: [Xen-devel] [PATCH v3 5/6] x86/XPTI: reduce .text.entry
>>> On 15.03.18 at 18:10, wrote: > On 13/03/18 13:50, Jan Beulich wrote: >> --- a/xen/arch/x86/x86_64/entry.S >> +++ b/xen/arch/x86/x86_64/entry.S >> @@ -14,8 +14,6 @@ >> #include >> #include >> >> -.section .text.entry, "ax", @progbits >> - >> /* %rbx: struct vcpu */ >> ENTRY(switch_to_kernel) >> leaq VCPU_trap_bounce(%rbx),%rdx >> @@ -34,8 +32,107 @@ ENTRY(switch_to_kernel) >> movb %cl,TRAPBOUNCE_flags(%rdx) >> call create_bounce_frame >> andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp) > > Newline here please, as test_all_events is logically a separate thing. I actually disagree in cases where the label sits in the middle of straight line code. > It might be worth using an ALIGN, given how many jmps land here. That'll again get us into the discussion of suitable NOPs. We'd have to extend ALIGN by ALTERNATIVE_NOP for the result to be as desired for all cases. Generally I'd expect straight line execution here to be (one of) the most common ways to reach that label. Furthermore I intentionally didn't want to alter any of the moved code, other than its spelling/formatting. > Otherwise, Reviewed-by: Andrew Cooper I'll wait with recording this until the above was clarified. Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 5/6] x86/XPTI: reduce .text.entry
On 13/03/18 13:50, Jan Beulich wrote: > --- a/xen/arch/x86/x86_64/entry.S > +++ b/xen/arch/x86/x86_64/entry.S > @@ -14,8 +14,6 @@ > #include > #include > > -.section .text.entry, "ax", @progbits > - > /* %rbx: struct vcpu */ > ENTRY(switch_to_kernel) > leaq VCPU_trap_bounce(%rbx),%rdx > @@ -34,8 +32,107 @@ ENTRY(switch_to_kernel) > movb %cl,TRAPBOUNCE_flags(%rdx) > call create_bounce_frame > andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp) Newline here please, as test_all_events is logically a separate thing. It might be worth using an ALIGN, given how many jmps land here. Otherwise, Reviewed-by: Andrew Cooper > +/* %rbx: struct vcpu */ > +test_all_events: > +ASSERT_NOT_IN_ATOMIC > +cli # tests must not race interrupts > +/*test_softirqs:*/ > +movl VCPU_processor(%rbx), %eax > +shll $IRQSTAT_shift, %eax > +leaq irq_stat+IRQSTAT_softirq_pending(%rip), %rcx > +cmpl $0, (%rcx, %rax, 1) > +jne process_softirqs > +cmpb $0, VCPU_mce_pending(%rbx) > +jne process_mce > +.Ltest_guest_nmi: > +cmpb $0, VCPU_nmi_pending(%rbx) > +jne process_nmi > +test_guest_events: > +movq VCPU_vcpu_info(%rbx), %rax > +movzwl VCPUINFO_upcall_pending(%rax), %eax > +decl %eax > +cmpl $0xfe, %eax > +jarestore_all_guest > +/*process_guest_events:*/ > +sti > +leaq VCPU_trap_bounce(%rbx), %rdx > +movq VCPU_event_addr(%rbx), %rax > +movq %rax, TRAPBOUNCE_eip(%rdx) > +movb $TBF_INTERRUPT, TRAPBOUNCE_flags(%rdx) > +call create_bounce_frame > jmp test_all_events > > +ALIGN > +/* %rbx: struct vcpu */ > +process_softirqs: > +sti > +call do_softirq > +jmp test_all_events > + > +ALIGN > +/* %rbx: struct vcpu */ > +process_mce: > +testb $1 << VCPU_TRAP_MCE, VCPU_async_exception_mask(%rbx) > +jnz .Ltest_guest_nmi > +sti > +movb $0, VCPU_mce_pending(%rbx) > +call set_guest_machinecheck_trapbounce > +test %eax, %eax > +jz test_all_events > +movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the > +movb %dl, VCPU_mce_old_mask(%rbx)# iret hypercall > +orl $1 << VCPU_TRAP_MCE, %edx > +movb %dl, VCPU_async_exception_mask(%rbx) > +jmp process_trap > + > +ALIGN > +/* %rbx: struct vcpu */ > +process_nmi: > +testb $1 << VCPU_TRAP_NMI, VCPU_async_exception_mask(%rbx) > +jnz test_guest_events > +sti > +movb $0, VCPU_nmi_pending(%rbx) > +call set_guest_nmi_trapbounce > +test %eax, %eax > +jz test_all_events > +movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the > +movb %dl, VCPU_nmi_old_mask(%rbx)# iret hypercall > +orl $1 << VCPU_TRAP_NMI, %edx > +movb %dl, VCPU_async_exception_mask(%rbx) > +/* FALLTHROUGH */ > +process_trap: > +leaq VCPU_trap_bounce(%rbx), %rdx > +call create_bounce_frame > +jmp test_all_events > + > +/* No special register assumptions. */ > +ENTRY(ret_from_intr) > +GET_CURRENT(bx) > +testb $3, UREGS_cs(%rsp) > +jzrestore_all_xen > +movq VCPU_domain(%rbx), %rax > +cmpb $0, DOMAIN_is_32bit_pv(%rax) > +jetest_all_events > +jmp compat_test_all_events > + > +/* Enable NMIs. No special register assumptions. Only %rax is not > preserved. */ > +ENTRY(enable_nmis) > +movq %rsp, %rax /* Grab RSP before pushing */ > + > +/* Set up stack frame */ > +pushq $0 /* SS */ > +pushq %rax /* RSP */ > +pushfq /* RFLAGS */ > +pushq $__HYPERVISOR_CS /* CS */ > +leaq 1f(%rip), %rax > +pushq %rax /* RIP */ > + > +iretq /* Disable the hardware NMI latch */ > +1: > +ret > + > +.section .text.entry, "ax", @progbits > + > /* %rbx: struct vcpu, interrupts disabled */ > restore_all_guest: > ASSERT_INTERRUPTS_DISABLED > ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 5/6] x86/XPTI: reduce .text.entry
This exposes less code pieces and at the same time reduces the range covered from slightly above 3 pages to a little below 2 of them. The code being moved is unchanged, except for the removal of trailing blanks, insertion of blanks between operands, and a pointless q suffix from "retq". A few more small pieces could be moved, but it seems better to me to leave them where they are to not make it overly hard to follow code paths. Signed-off-by: Jan Beulich --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -13,8 +13,6 @@ #include #include -.section .text.entry, "ax", @progbits - ENTRY(entry_int82) ASM_CLAC pushq $0 @@ -192,6 +190,8 @@ ENTRY(compat_post_handle_exception) movb $0,TRAPBOUNCE_flags(%rdx) jmp compat_test_all_events +.section .text.entry, "ax", @progbits + /* See lstar_enter for entry register state. */ ENTRY(cstar_enter) ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI @@ -249,6 +249,8 @@ UNLIKELY_END(compat_syscall_gpf) movb %cl,TRAPBOUNCE_flags(%rdx) jmp .Lcompat_bounce_exception +.text + ENTRY(compat_sysenter) CR4_PV32_RESTORE movq VCPU_trap_ctxt(%rbx),%rcx @@ -268,9 +270,6 @@ ENTRY(compat_int80_direct_trap) call compat_create_bounce_frame jmp compat_test_all_events -/* compat_create_bounce_frame & helpers don't need to be in .text.entry */ -.text - /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:*/ /* {[ERRCODE,] EIP, CS, EFLAGS, [ESP, SS]} */ /* %rdx: trap_bounce, %rbx: struct vcpu */ --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -14,8 +14,6 @@ #include #include -.section .text.entry, "ax", @progbits - /* %rbx: struct vcpu */ ENTRY(switch_to_kernel) leaq VCPU_trap_bounce(%rbx),%rdx @@ -34,8 +32,107 @@ ENTRY(switch_to_kernel) movb %cl,TRAPBOUNCE_flags(%rdx) call create_bounce_frame andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp) +/* %rbx: struct vcpu */ +test_all_events: +ASSERT_NOT_IN_ATOMIC +cli # tests must not race interrupts +/*test_softirqs:*/ +movl VCPU_processor(%rbx), %eax +shll $IRQSTAT_shift, %eax +leaq irq_stat+IRQSTAT_softirq_pending(%rip), %rcx +cmpl $0, (%rcx, %rax, 1) +jne process_softirqs +cmpb $0, VCPU_mce_pending(%rbx) +jne process_mce +.Ltest_guest_nmi: +cmpb $0, VCPU_nmi_pending(%rbx) +jne process_nmi +test_guest_events: +movq VCPU_vcpu_info(%rbx), %rax +movzwl VCPUINFO_upcall_pending(%rax), %eax +decl %eax +cmpl $0xfe, %eax +jarestore_all_guest +/*process_guest_events:*/ +sti +leaq VCPU_trap_bounce(%rbx), %rdx +movq VCPU_event_addr(%rbx), %rax +movq %rax, TRAPBOUNCE_eip(%rdx) +movb $TBF_INTERRUPT, TRAPBOUNCE_flags(%rdx) +call create_bounce_frame jmp test_all_events +ALIGN +/* %rbx: struct vcpu */ +process_softirqs: +sti +call do_softirq +jmp test_all_events + +ALIGN +/* %rbx: struct vcpu */ +process_mce: +testb $1 << VCPU_TRAP_MCE, VCPU_async_exception_mask(%rbx) +jnz .Ltest_guest_nmi +sti +movb $0, VCPU_mce_pending(%rbx) +call set_guest_machinecheck_trapbounce +test %eax, %eax +jz test_all_events +movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the +movb %dl, VCPU_mce_old_mask(%rbx)# iret hypercall +orl $1 << VCPU_TRAP_MCE, %edx +movb %dl, VCPU_async_exception_mask(%rbx) +jmp process_trap + +ALIGN +/* %rbx: struct vcpu */ +process_nmi: +testb $1 << VCPU_TRAP_NMI, VCPU_async_exception_mask(%rbx) +jnz test_guest_events +sti +movb $0, VCPU_nmi_pending(%rbx) +call set_guest_nmi_trapbounce +test %eax, %eax +jz test_all_events +movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the +movb %dl, VCPU_nmi_old_mask(%rbx)# iret hypercall +orl $1 << VCPU_TRAP_NMI, %edx +movb %dl, VCPU_async_exception_mask(%rbx) +/* FALLTHROUGH */ +process_trap: +leaq VCPU_trap_bounce(%rbx), %rdx +call create_bounce_frame +jmp test_all_events + +/* No special register assumptions. */ +ENTRY(ret_from_intr) +GET_CURRENT(bx) +testb $3, UREGS_cs(%rsp) +jzrestore_all_xen +movq VCPU_domain(%rbx), %rax +cmpb $0, DOMAIN_is_32bit_pv(%rax) +jetest_all_events +jmp compat_test_all_events + +/* Enable NMIs. No special register assumptions. Only %rax is not preserved. */ +ENTRY(enable_nmis) +movq %rsp, %rax /* Grab RSP before pus