Re: [Xen-devel] [PATCH v3 5/6] x86/XPTI: reduce .text.entry

2018-03-16 Thread Jan Beulich
>>> On 15.03.18 at 18:10,  wrote:
> On 13/03/18 13:50, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_64/entry.S
>> +++ b/xen/arch/x86/x86_64/entry.S
>> @@ -14,8 +14,6 @@
>>  #include 
>>  #include 
>>  
>> -.section .text.entry, "ax", @progbits
>> -
>>  /* %rbx: struct vcpu */
>>  ENTRY(switch_to_kernel)
>>  leaq  VCPU_trap_bounce(%rbx),%rdx
>> @@ -34,8 +32,107 @@ ENTRY(switch_to_kernel)
>>  movb  %cl,TRAPBOUNCE_flags(%rdx)
>>  call  create_bounce_frame
>>  andl  $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
> 
> Newline here please, as test_all_events is logically a separate thing. 

I actually disagree in cases where the label sits in the middle of
straight line code.

> It might be worth using an ALIGN, given how many jmps land here.

That'll again get us into the discussion of suitable NOPs. We'd
have to extend ALIGN by ALTERNATIVE_NOP for the result to
be as desired for all cases. Generally I'd expect straight line
execution here to be (one of) the most common ways to reach
that label.

Furthermore I intentionally didn't want to alter any of the moved
code, other than its spelling/formatting.

> Otherwise, Reviewed-by: Andrew Cooper 

I'll wait with recording this until the above was clarified.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3 5/6] x86/XPTI: reduce .text.entry

2018-03-15 Thread Andrew Cooper
On 13/03/18 13:50, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -14,8 +14,6 @@
>  #include 
>  #include 
>  
> -.section .text.entry, "ax", @progbits
> -
>  /* %rbx: struct vcpu */
>  ENTRY(switch_to_kernel)
>  leaq  VCPU_trap_bounce(%rbx),%rdx
> @@ -34,8 +32,107 @@ ENTRY(switch_to_kernel)
>  movb  %cl,TRAPBOUNCE_flags(%rdx)
>  call  create_bounce_frame
>  andl  $~X86_EFLAGS_DF,UREGS_eflags(%rsp)

Newline here please, as test_all_events is logically a separate thing. 
It might be worth using an ALIGN, given how many jmps land here.

Otherwise, Reviewed-by: Andrew Cooper 

> +/* %rbx: struct vcpu */
> +test_all_events:
> +ASSERT_NOT_IN_ATOMIC
> +cli # tests must not race interrupts
> +/*test_softirqs:*/
> +movl  VCPU_processor(%rbx), %eax
> +shll  $IRQSTAT_shift, %eax
> +leaq  irq_stat+IRQSTAT_softirq_pending(%rip), %rcx
> +cmpl  $0, (%rcx, %rax, 1)
> +jne   process_softirqs
> +cmpb  $0, VCPU_mce_pending(%rbx)
> +jne   process_mce
> +.Ltest_guest_nmi:
> +cmpb  $0, VCPU_nmi_pending(%rbx)
> +jne   process_nmi
> +test_guest_events:
> +movq  VCPU_vcpu_info(%rbx), %rax
> +movzwl VCPUINFO_upcall_pending(%rax), %eax
> +decl  %eax
> +cmpl  $0xfe, %eax
> +jarestore_all_guest
> +/*process_guest_events:*/
> +sti
> +leaq  VCPU_trap_bounce(%rbx), %rdx
> +movq  VCPU_event_addr(%rbx), %rax
> +movq  %rax, TRAPBOUNCE_eip(%rdx)
> +movb  $TBF_INTERRUPT, TRAPBOUNCE_flags(%rdx)
> +call  create_bounce_frame
>  jmp   test_all_events
>  
> +ALIGN
> +/* %rbx: struct vcpu */
> +process_softirqs:
> +sti
> +call do_softirq
> +jmp  test_all_events
> +
> +ALIGN
> +/* %rbx: struct vcpu */
> +process_mce:
> +testb $1 << VCPU_TRAP_MCE, VCPU_async_exception_mask(%rbx)
> +jnz  .Ltest_guest_nmi
> +sti
> +movb $0, VCPU_mce_pending(%rbx)
> +call set_guest_machinecheck_trapbounce
> +test %eax, %eax
> +jz   test_all_events
> +movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
> +movb %dl, VCPU_mce_old_mask(%rbx)# iret hypercall
> +orl  $1 << VCPU_TRAP_MCE, %edx
> +movb %dl, VCPU_async_exception_mask(%rbx)
> +jmp  process_trap
> +
> +ALIGN
> +/* %rbx: struct vcpu */
> +process_nmi:
> +testb $1 << VCPU_TRAP_NMI, VCPU_async_exception_mask(%rbx)
> +jnz  test_guest_events
> +sti
> +movb $0, VCPU_nmi_pending(%rbx)
> +call set_guest_nmi_trapbounce
> +test %eax, %eax
> +jz   test_all_events
> +movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
> +movb %dl, VCPU_nmi_old_mask(%rbx)# iret hypercall
> +orl  $1 << VCPU_TRAP_NMI, %edx
> +movb %dl, VCPU_async_exception_mask(%rbx)
> +/* FALLTHROUGH */
> +process_trap:
> +leaq VCPU_trap_bounce(%rbx), %rdx
> +call create_bounce_frame
> +jmp  test_all_events
> +
> +/* No special register assumptions. */
> +ENTRY(ret_from_intr)
> +GET_CURRENT(bx)
> +testb $3, UREGS_cs(%rsp)
> +jzrestore_all_xen
> +movq  VCPU_domain(%rbx), %rax
> +cmpb  $0, DOMAIN_is_32bit_pv(%rax)
> +jetest_all_events
> +jmp   compat_test_all_events
> +
> +/* Enable NMIs.  No special register assumptions. Only %rax is not 
> preserved. */
> +ENTRY(enable_nmis)
> +movq  %rsp, %rax /* Grab RSP before pushing */
> +
> +/* Set up stack frame */
> +pushq $0   /* SS */
> +pushq %rax /* RSP */
> +pushfq /* RFLAGS */
> +pushq $__HYPERVISOR_CS /* CS */
> +leaq  1f(%rip), %rax
> +pushq %rax /* RIP */
> +
> +iretq /* Disable the hardware NMI latch */
> +1:
> +ret
> +
> +.section .text.entry, "ax", @progbits
> +
>  /* %rbx: struct vcpu, interrupts disabled */
>  restore_all_guest:
>  ASSERT_INTERRUPTS_DISABLED
>


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v3 5/6] x86/XPTI: reduce .text.entry

2018-03-13 Thread Jan Beulich
This exposes less code pieces and at the same time reduces the range
covered from slightly above 3 pages to a little below 2 of them.

The code being moved is unchanged, except for the removal of trailing
blanks, insertion of blanks between operands, and a pointless q suffix
from "retq".

A few more small pieces could be moved, but it seems better to me to
leave them where they are to not make it overly hard to follow code
paths.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -13,8 +13,6 @@
 #include 
 #include 
 
-.section .text.entry, "ax", @progbits
-
 ENTRY(entry_int82)
 ASM_CLAC
 pushq $0
@@ -192,6 +190,8 @@ ENTRY(compat_post_handle_exception)
 movb  $0,TRAPBOUNCE_flags(%rdx)
 jmp   compat_test_all_events
 
+.section .text.entry, "ax", @progbits
+
 /* See lstar_enter for entry register state. */
 ENTRY(cstar_enter)
 ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
@@ -249,6 +249,8 @@ UNLIKELY_END(compat_syscall_gpf)
 movb  %cl,TRAPBOUNCE_flags(%rdx)
 jmp   .Lcompat_bounce_exception
 
+.text
+
 ENTRY(compat_sysenter)
 CR4_PV32_RESTORE
 movq  VCPU_trap_ctxt(%rbx),%rcx
@@ -268,9 +270,6 @@ ENTRY(compat_int80_direct_trap)
 call  compat_create_bounce_frame
 jmp   compat_test_all_events
 
-/* compat_create_bounce_frame & helpers don't need to be in 
.text.entry */
-.text
-
 /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:*/
 /*   {[ERRCODE,] EIP, CS, EFLAGS, [ESP, SS]} */
 /* %rdx: trap_bounce, %rbx: struct vcpu  */
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -14,8 +14,6 @@
 #include 
 #include 
 
-.section .text.entry, "ax", @progbits
-
 /* %rbx: struct vcpu */
 ENTRY(switch_to_kernel)
 leaq  VCPU_trap_bounce(%rbx),%rdx
@@ -34,8 +32,107 @@ ENTRY(switch_to_kernel)
 movb  %cl,TRAPBOUNCE_flags(%rdx)
 call  create_bounce_frame
 andl  $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
+/* %rbx: struct vcpu */
+test_all_events:
+ASSERT_NOT_IN_ATOMIC
+cli # tests must not race interrupts
+/*test_softirqs:*/
+movl  VCPU_processor(%rbx), %eax
+shll  $IRQSTAT_shift, %eax
+leaq  irq_stat+IRQSTAT_softirq_pending(%rip), %rcx
+cmpl  $0, (%rcx, %rax, 1)
+jne   process_softirqs
+cmpb  $0, VCPU_mce_pending(%rbx)
+jne   process_mce
+.Ltest_guest_nmi:
+cmpb  $0, VCPU_nmi_pending(%rbx)
+jne   process_nmi
+test_guest_events:
+movq  VCPU_vcpu_info(%rbx), %rax
+movzwl VCPUINFO_upcall_pending(%rax), %eax
+decl  %eax
+cmpl  $0xfe, %eax
+jarestore_all_guest
+/*process_guest_events:*/
+sti
+leaq  VCPU_trap_bounce(%rbx), %rdx
+movq  VCPU_event_addr(%rbx), %rax
+movq  %rax, TRAPBOUNCE_eip(%rdx)
+movb  $TBF_INTERRUPT, TRAPBOUNCE_flags(%rdx)
+call  create_bounce_frame
 jmp   test_all_events
 
+ALIGN
+/* %rbx: struct vcpu */
+process_softirqs:
+sti
+call do_softirq
+jmp  test_all_events
+
+ALIGN
+/* %rbx: struct vcpu */
+process_mce:
+testb $1 << VCPU_TRAP_MCE, VCPU_async_exception_mask(%rbx)
+jnz  .Ltest_guest_nmi
+sti
+movb $0, VCPU_mce_pending(%rbx)
+call set_guest_machinecheck_trapbounce
+test %eax, %eax
+jz   test_all_events
+movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
+movb %dl, VCPU_mce_old_mask(%rbx)# iret hypercall
+orl  $1 << VCPU_TRAP_MCE, %edx
+movb %dl, VCPU_async_exception_mask(%rbx)
+jmp  process_trap
+
+ALIGN
+/* %rbx: struct vcpu */
+process_nmi:
+testb $1 << VCPU_TRAP_NMI, VCPU_async_exception_mask(%rbx)
+jnz  test_guest_events
+sti
+movb $0, VCPU_nmi_pending(%rbx)
+call set_guest_nmi_trapbounce
+test %eax, %eax
+jz   test_all_events
+movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
+movb %dl, VCPU_nmi_old_mask(%rbx)# iret hypercall
+orl  $1 << VCPU_TRAP_NMI, %edx
+movb %dl, VCPU_async_exception_mask(%rbx)
+/* FALLTHROUGH */
+process_trap:
+leaq VCPU_trap_bounce(%rbx), %rdx
+call create_bounce_frame
+jmp  test_all_events
+
+/* No special register assumptions. */
+ENTRY(ret_from_intr)
+GET_CURRENT(bx)
+testb $3, UREGS_cs(%rsp)
+jzrestore_all_xen
+movq  VCPU_domain(%rbx), %rax
+cmpb  $0, DOMAIN_is_32bit_pv(%rax)
+jetest_all_events
+jmp   compat_test_all_events
+
+/* Enable NMIs.  No special register assumptions. Only %rax is not preserved. 
*/
+ENTRY(enable_nmis)
+movq  %rsp, %rax /* Grab RSP before pus