Re: [Xen-devel] [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses
On 17/03/16 16:14, Jan Beulich wrote: > Instead of addressing these fields via the base of the stack (which > uniformly requires 4-byte displacements), address them from the end > (which for everything other than guest_cpu_user_regs requires just > 1-byte ones). This yields a code size reduction somewhere between 8k > and 12k in my builds. > > Signed-off-by: Jan BeulichReviewed-by: Andrew Cooper ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses
>>> On 30.03.16 at 16:28,wrote: > On Tue, Mar 29, 2016 at 12:59:24AM -0600, Jan Beulich wrote: >> >>> On 25.03.16 at 19:47, wrote: >> > On Thu, Mar 17, 2016 at 10:14:22AM -0600, Jan Beulich wrote: >> > >> > Something is off with your patch. This is 5/4 :-) >> >> Well, yes - this got added later on top of the previously sent series, >> to make the dependency obvious. >> >> >> Instead of addressing these fields via the base of the stack (which >> >> uniformly requires 4-byte displacements), address them from the end >> >> (which for everything other than guest_cpu_user_regs requires just >> >> 1-byte ones). This yields a code size reduction somewhere between 8k >> >> and 12k in my builds. >> > >> > Also you made the macro a bit different - the %r is removed. >> > >> > Particular reason? >> >> This is an integral part of the change, so the macro can derive >> e.g. both %eax and %rax from the passed argument > > Could you pls include that explanation in the commit description.. > > And with that you can put Reviewed-by: Konrad Rzeszutek Wilk > > > on the patch. Well, I've got one of these already, without having been asked to do any adjustments. Did I mis-interpret <20160325184701.ge20...@char.us.oracle.com>? As to adding something like this to the commit message: I have a hard time seeing how this would belong there. The fact _that_ this is being done is very obvious from the patch itself, and once the reader has spotted this the fact _why_ this is needed then should become immediately obvious too. I'm all for explaining technically difficult or hard to see things, but I'm against needless bloat. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses
On Tue, Mar 29, 2016 at 12:59:24AM -0600, Jan Beulich wrote: > >>> On 25.03.16 at 19:47,wrote: > > On Thu, Mar 17, 2016 at 10:14:22AM -0600, Jan Beulich wrote: > > > > Something is off with your patch. This is 5/4 :-) > > Well, yes - this got added later on top of the previously sent series, > to make the dependency obvious. > > >> Instead of addressing these fields via the base of the stack (which > >> uniformly requires 4-byte displacements), address them from the end > >> (which for everything other than guest_cpu_user_regs requires just > >> 1-byte ones). This yields a code size reduction somewhere between 8k > >> and 12k in my builds. > > > > Also you made the macro a bit different - the %r is removed. > > > > Particular reason? > > This is an integral part of the change, so the macro can derive > e.g. both %eax and %rax from the passed argument Could you pls include that explanation in the commit description.. And with that you can put Reviewed-by: Konrad Rzeszutek Wilk on the patch. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses
>>> On 25.03.16 at 19:47,wrote: > On Thu, Mar 17, 2016 at 10:14:22AM -0600, Jan Beulich wrote: > > Something is off with your patch. This is 5/4 :-) Well, yes - this got added later on top of the previously sent series, to make the dependency obvious. >> Instead of addressing these fields via the base of the stack (which >> uniformly requires 4-byte displacements), address them from the end >> (which for everything other than guest_cpu_user_regs requires just >> 1-byte ones). This yields a code size reduction somewhere between 8k >> and 12k in my builds. > > Also you made the macro a bit different - the %r is removed. > > Particular reason? This is an integral part of the change, so the macro can derive e.g. both %eax and %rax from the passed argument Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses
On Thu, Mar 17, 2016 at 10:14:22AM -0600, Jan Beulich wrote: Something is off with your patch. This is 5/4 :-) > Instead of addressing these fields via the base of the stack (which > uniformly requires 4-byte displacements), address them from the end > (which for everything other than guest_cpu_user_regs requires just > 1-byte ones). This yields a code size reduction somewhere between 8k > and 12k in my builds. Also you made the macro a bit different - the %r is removed. Particular reason? > > Signed-off-by: Jan Beulich> --- > Note that just like patch 4 of the series this also isn't directly > related to the SMEP/SMAP issue, but is again just a result of things > realized while doing that work, and again depends on the earlier > patches to apply cleanly. > .. snip.. > --- a/xen/include/asm-x86/asm_defns.h > +++ b/xen/include/asm-x86/asm_defns.h > @@ -127,19 +127,19 @@ void ret_from_intr(void); > UNLIKELY_DONE(mp, tag); \ > __UNLIKELY_END(tag) > > -#define STACK_CPUINFO_FIELD(field) > (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##field) > -#define GET_STACK_BASE(reg) \ > -movq $~(STACK_SIZE-1),reg;\ > -andq %rsp,reg > +#define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) > +#define GET_STACK_END(reg)\ > +movl $STACK_SIZE-1, %e##reg; \ > +orq %rsp, %r##reg > > #define GET_CPUINFO_FIELD(field, reg) \ > -GET_STACK_BASE(reg); \ > -addq $STACK_CPUINFO_FIELD(field),reg > +GET_STACK_END(reg); \ > +addq $STACK_CPUINFO_FIELD(field), %r##reg Not subq? The GET_STACK_END gets us ..[ edit: missed first time the change to STACK_CPUINFO_FIELD]. Reviewed-by: Konrad Rzeszutek Wilk ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses
Instead of addressing these fields via the base of the stack (which uniformly requires 4-byte displacements), address them from the end (which for everything other than guest_cpu_user_regs requires just 1-byte ones). This yields a code size reduction somewhere between 8k and 12k in my builds. Signed-off-by: Jan Beulich--- Note that just like patch 4 of the series this also isn't directly related to the SMEP/SMAP issue, but is again just a result of things realized while doing that work, and again depends on the earlier patches to apply cleanly. --- a/xen/arch/x86/hvm/svm/entry.S +++ b/xen/arch/x86/hvm/svm/entry.S @@ -31,7 +31,7 @@ #define CLGI .byte 0x0F,0x01,0xDD ENTRY(svm_asm_do_resume) -GET_CURRENT(%rbx) +GET_CURRENT(bx) .Lsvm_do_resume: call svm_intr_assist mov %rsp,%rdi @@ -97,7 +97,7 @@ UNLIKELY_END(svm_trace) VMRUN -GET_CURRENT(%rax) +GET_CURRENT(ax) push %rdi push %rsi push %rdx --- a/xen/arch/x86/hvm/vmx/entry.S +++ b/xen/arch/x86/hvm/vmx/entry.S @@ -40,7 +40,7 @@ ENTRY(vmx_asm_vmexit_handler) push %r10 push %r11 push %rbx -GET_CURRENT(%rbx) +GET_CURRENT(bx) push %rbp push %r12 push %r13 @@ -113,7 +113,7 @@ UNLIKELY_END(realmode) BUG /* vmx_vmentry_failure() shouldn't return. */ ENTRY(vmx_asm_do_vmentry) -GET_CURRENT(%rbx) +GET_CURRENT(bx) jmp .Lvmx_do_vmentry .Lvmx_goto_emulator: --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -26,7 +26,7 @@ UNLIKELY_START(ne, msi_check) UNLIKELY_END(msi_check) movl UREGS_rax(%rsp),%eax -GET_CURRENT(%rbx) +GET_CURRENT(bx) cmpl $NR_hypercalls,%eax jae compat_bad_hypercall @@ -202,7 +202,7 @@ ENTRY(compat_restore_all_guest) /* This mustn't modify registers other than %rax. */ ENTRY(cr4_pv32_restore) push %rdx -GET_CPUINFO_FIELD(cr4, %rdx) +GET_CPUINFO_FIELD(cr4, dx) mov (%rdx), %rax test $X86_CR4_SMEP|X86_CR4_SMAP,%eax jnz 0f @@ -245,7 +245,7 @@ ENTRY(cstar_enter) pushq %rcx pushq $0 SAVE_VOLATILE TRAP_syscall -GET_CURRENT(%rbx) +GET_CURRENT(bx) movq VCPU_domain(%rbx),%rcx cmpb $0,DOMAIN_is_32bit_pv(%rcx) jeswitch_to_kernel --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -97,7 +97,7 @@ ENTRY(lstar_enter) pushq %rcx pushq $0 SAVE_VOLATILE TRAP_syscall -GET_CURRENT(%rbx) +GET_CURRENT(bx) testb $TF_kernel_mode,VCPU_thread_flags(%rbx) jzswitch_to_kernel @@ -246,7 +246,7 @@ GLOBAL(sysenter_eflags_saved) pushq $0 /* null rip */ pushq $0 SAVE_VOLATILE TRAP_syscall -GET_CURRENT(%rbx) +GET_CURRENT(bx) cmpb $0,VCPU_sysenter_disables_events(%rbx) movq VCPU_sysenter_addr(%rbx),%rax setne %cl @@ -288,7 +288,7 @@ UNLIKELY_START(ne, msi_check) call check_for_unexpected_msi UNLIKELY_END(msi_check) -GET_CURRENT(%rbx) +GET_CURRENT(bx) /* Check that the callback is non-null. */ leaq VCPU_int80_bounce(%rbx),%rdx @@ -420,10 +420,10 @@ domain_crash_page_fault: call show_page_walk ENTRY(dom_crash_sync_extable) # Get out of the guest-save area of the stack. -GET_STACK_BASE(%rax) +GET_STACK_END(ax) leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp # create_bounce_frame() temporarily clobbers CS.RPL. Fix up. -__GET_CURRENT(%rax) +__GET_CURRENT(ax) movq VCPU_domain(%rax),%rax testb $1,DOMAIN_is_32bit_pv(%rax) setz %al @@ -441,7 +441,7 @@ ENTRY(common_interrupt) /* No special register assumptions. */ ENTRY(ret_from_intr) -GET_CURRENT(%rbx) +GET_CURRENT(bx) testb $3,UREGS_cs(%rsp) jzrestore_all_xen movq VCPU_domain(%rbx),%rax @@ -455,7 +455,7 @@ ENTRY(page_fault) GLOBAL(handle_exception) SAVE_ALL CLAC handle_exception_saved: -GET_CURRENT(%rbx) +GET_CURRENT(bx) testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) jzexception_with_ints_disabled @@ -649,7 +649,7 @@ handle_ist_exception: testb $3,UREGS_cs(%rsp) jz1f /* Interrupted guest context. Copy the context to stack bottom. */ -GET_CPUINFO_FIELD(guest_cpu_user_regs,%rdi) +GET_CPUINFO_FIELD(guest_cpu_user_regs,di) movq %rsp,%rsi movl $UREGS_kernel_sizeof/8,%ecx movq %rdi,%rsp @@ -664,7 +664,7 @@ handle_ist_exception: /* We want to get straight to the IRET on the NMI exit path. */ testb $3,UREGS_cs(%rsp) jzrestore_all_xen -GET_CURRENT(%rbx) +GET_CURRENT(bx)