Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing
On Wed, Feb 14, 2018 at 09:21:12PM +0300, Alexey Dobriyan wrote: > > - xorq%r8, %r8/* nospec r8 */ > > + xorl%r8d, %r8d /* nospec r8 */ > > The suffix should be simply dropped as operand size is unambigious. > It is just one more character than necessary on the screen. No strong feelings about this issue, but I prefer it to be explicit. And that's what seems to be the standard in arch/x86/entry/ . Thanks, Dominik
Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing
On Wed, Feb 14, 2018 at 09:21:12PM +0300, Alexey Dobriyan wrote: > > - xorq%r8, %r8/* nospec r8 */ > > + xorl%r8d, %r8d /* nospec r8 */ > > The suffix should be simply dropped as operand size is unambigious. > It is just one more character than necessary on the screen. No strong feelings about this issue, but I prefer it to be explicit. And that's what seems to be the standard in arch/x86/entry/ . Thanks, Dominik
Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing
> - xorq%r8, %r8/* nospec r8 */ > + xorl%r8d, %r8d /* nospec r8 */ The suffix should be simply dropped as operand size is unambigious. It is just one more character than necessary on the screen.
Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing
> - xorq%r8, %r8/* nospec r8 */ > + xorl%r8d, %r8d /* nospec r8 */ The suffix should be simply dropped as operand size is unambigious. It is just one more character than necessary on the screen.
[PATCH 2/2] x86/entry/64: use xorl for register clearing
Using xorq to clear general-purpose registers is slower than xorl on some architectures. As xorl is sufficient to clear all 64bit of these registers,[*] switch the x86 64-bit entry code to use xorl. [*] According to Intel 64 and IA-32 Architecture Software Developer's Manual, section 3.4.1.1, the result of 32-bit operands are "zero- extended to a 64-bit result in the destination general-purpose register." The AMD64 Architecture Programmer’s Manual Volume 3, Appendix B.1, describes the same behaviour. Suggested-by: Denys VlasenkoSigned-off-by: Dominik Brodowski --- arch/x86/entry/calling.h | 16 ++-- arch/x86/entry/entry_64_compat.S | 54 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 79ead48e6fe1..adaf5fd9840d 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with pushq %rcx/* pt_regs->cx */ pushq \rax/* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorq%r8, %r8/* nospec r8 */ + xorl%r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorq%r9, %r9/* nospec r9 */ + xorl%r9d, %r9d /* nospec r9 */ pushq %r10/* pt_regs->r10 */ - xorq%r10, %r10 /* nospec r10 */ + xorl%r10d, %r10d/* nospec r10 */ pushq %r11/* pt_regs->r11 */ - xorq%r11, %r11 /* nospec r11*/ + xorl%r11d, %r11d/* nospec r11*/ pushq %rbx/* pt_regs->rbx */ xorl%ebx, %ebx /* nospec rbx*/ pushq %rbp/* pt_regs->rbp */ xorl%ebp, %ebp /* nospec rbp*/ pushq %r12/* pt_regs->r12 */ - xorq%r12, %r12 /* nospec r12*/ + xorl%r12d, %r12d/* nospec r12*/ pushq %r13/* pt_regs->r13 */ - xorq%r13, %r13 /* nospec r13*/ + xorl%r13d, %r13d/* nospec r13*/ pushq %r14/* pt_regs->r14 */ - xorq%r14, %r14 /* nospec r14*/ + xorl%r14d, %r14d/* nospec r14*/ pushq %r15/* pt_regs->r15 */ - xorq%r15, %r15 /* nospec r15*/ + xorl%r15d, %r15d/* nospec r15*/ UNWIND_HINT_REGS .if \save_ret pushq %rsi/* return address on top of stack */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index fd65e016e413..364ea4a207be 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx/* pt_regs->cx */ pushq $-ENOSYS/* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq%r8, %r8/* nospec r8 */ + xorl%r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq%r9, %r9/* nospec r9 */ + xorl%r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq%r10, %r10 /* nospec r10 */ + xorl%r10d, %r10d/* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq%r11, %r11 /* nospec r11 */ + xorl%r11d, %r11d/* nospec r11 */ pushq %rbx/* pt_regs->rbx */ xorl%ebx, %ebx /* nospec rbx */ pushq %rbp/* pt_regs->rbp (will be overwritten) */ xorl%ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ - xorq%r12, %r12 /* nospec r12 */ + xorl%r12d, %r12d/* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ - xorq%r13, %r13 /* nospec r13 */ + xorl%r13d, %r13d/* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ - xorq%r14, %r14 /* nospec r14 */ + xorl%r14d, %r14d/* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ - xorq%r15, %r15 /* nospec r15 */ + xorl%r15d, %r15d/* nospec r15 */ cld /* @@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq %rbp/* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS/* pt_regs->ax */
[PATCH 2/2] x86/entry/64: use xorl for register clearing
Using xorq to clear general-purpose registers is slower than xorl on some architectures. As xorl is sufficient to clear all 64bit of these registers,[*] switch the x86 64-bit entry code to use xorl. [*] According to Intel 64 and IA-32 Architecture Software Developer's Manual, section 3.4.1.1, the result of 32-bit operands are "zero- extended to a 64-bit result in the destination general-purpose register." The AMD64 Architecture Programmer’s Manual Volume 3, Appendix B.1, describes the same behaviour. Suggested-by: Denys Vlasenko Signed-off-by: Dominik Brodowski --- arch/x86/entry/calling.h | 16 ++-- arch/x86/entry/entry_64_compat.S | 54 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 79ead48e6fe1..adaf5fd9840d 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with pushq %rcx/* pt_regs->cx */ pushq \rax/* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorq%r8, %r8/* nospec r8 */ + xorl%r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorq%r9, %r9/* nospec r9 */ + xorl%r9d, %r9d /* nospec r9 */ pushq %r10/* pt_regs->r10 */ - xorq%r10, %r10 /* nospec r10 */ + xorl%r10d, %r10d/* nospec r10 */ pushq %r11/* pt_regs->r11 */ - xorq%r11, %r11 /* nospec r11*/ + xorl%r11d, %r11d/* nospec r11*/ pushq %rbx/* pt_regs->rbx */ xorl%ebx, %ebx /* nospec rbx*/ pushq %rbp/* pt_regs->rbp */ xorl%ebp, %ebp /* nospec rbp*/ pushq %r12/* pt_regs->r12 */ - xorq%r12, %r12 /* nospec r12*/ + xorl%r12d, %r12d/* nospec r12*/ pushq %r13/* pt_regs->r13 */ - xorq%r13, %r13 /* nospec r13*/ + xorl%r13d, %r13d/* nospec r13*/ pushq %r14/* pt_regs->r14 */ - xorq%r14, %r14 /* nospec r14*/ + xorl%r14d, %r14d/* nospec r14*/ pushq %r15/* pt_regs->r15 */ - xorq%r15, %r15 /* nospec r15*/ + xorl%r15d, %r15d/* nospec r15*/ UNWIND_HINT_REGS .if \save_ret pushq %rsi/* return address on top of stack */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index fd65e016e413..364ea4a207be 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx/* pt_regs->cx */ pushq $-ENOSYS/* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq%r8, %r8/* nospec r8 */ + xorl%r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq%r9, %r9/* nospec r9 */ + xorl%r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq%r10, %r10 /* nospec r10 */ + xorl%r10d, %r10d/* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq%r11, %r11 /* nospec r11 */ + xorl%r11d, %r11d/* nospec r11 */ pushq %rbx/* pt_regs->rbx */ xorl%ebx, %ebx /* nospec rbx */ pushq %rbp/* pt_regs->rbp (will be overwritten) */ xorl%ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ - xorq%r12, %r12 /* nospec r12 */ + xorl%r12d, %r12d/* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ - xorq%r13, %r13 /* nospec r13 */ + xorl%r13d, %r13d/* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ - xorq%r14, %r14 /* nospec r14 */ + xorl%r14d, %r14d/* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ - xorq%r15, %r15 /* nospec r15 */ + xorl%r15d, %r15d/* nospec r15 */ cld /* @@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq %rbp/* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS/* pt_regs->ax */ pushq $0 /*