Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing

2018-02-14 Thread Dominik Brodowski
On Wed, Feb 14, 2018 at 09:21:12PM +0300, Alexey Dobriyan wrote:
> > -   xorq%r8, %r8/* nospec   r8 */
> > +   xorl%r8d, %r8d  /* nospec   r8 */
> 
> The suffix should be simply dropped as operand size is unambigious.
> It is just one more character than necessary on the screen.

No strong feelings about this issue, but I prefer it to be explicit. And
that's what seems to be the standard in arch/x86/entry/ .

Thanks,
Dominik


Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing

2018-02-14 Thread Dominik Brodowski
On Wed, Feb 14, 2018 at 09:21:12PM +0300, Alexey Dobriyan wrote:
> > -   xorq%r8, %r8/* nospec   r8 */
> > +   xorl%r8d, %r8d  /* nospec   r8 */
> 
> The suffix should be simply dropped as operand size is unambigious.
> It is just one more character than necessary on the screen.

No strong feelings about this issue, but I prefer it to be explicit. And
that's what seems to be the standard in arch/x86/entry/ .

Thanks,
Dominik


Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing

2018-02-14 Thread Alexey Dobriyan
> - xorq%r8, %r8/* nospec   r8 */
> + xorl%r8d, %r8d  /* nospec   r8 */

The suffix should be simply dropped as operand size is unambigious.
It is just one more character than necessary on the screen.


Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing

2018-02-14 Thread Alexey Dobriyan
> - xorq%r8, %r8/* nospec   r8 */
> + xorl%r8d, %r8d  /* nospec   r8 */

The suffix should be simply dropped as operand size is unambigious.
It is just one more character than necessary on the screen.


[PATCH 2/2] x86/entry/64: use xorl for register clearing

2018-02-14 Thread Dominik Brodowski
Using xorq to clear general-purpose registers is slower than
xorl on some architectures. As xorl is sufficient to clear all
64bit of these registers,[*] switch the x86 64-bit entry code
to use xorl.

[*] According to Intel 64 and IA-32 Architecture Software Developer's
Manual, section 3.4.1.1, the result of 32-bit operands are "zero-
extended to a 64-bit result in the destination general-purpose
register." The AMD64 Architecture Programmer’s Manual Volume 3,
Appendix B.1, describes the same behaviour.

Suggested-by: Denys Vlasenko 
Signed-off-by: Dominik Brodowski 
---
 arch/x86/entry/calling.h | 16 ++--
 arch/x86/entry/entry_64_compat.S | 54 
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 79ead48e6fe1..adaf5fd9840d 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is 
built with
pushq   %rcx/* pt_regs->cx */
pushq   \rax/* pt_regs->ax */
pushq   %r8 /* pt_regs->r8 */
-   xorq%r8, %r8/* nospec   r8 */
+   xorl%r8d, %r8d  /* nospec   r8 */
pushq   %r9 /* pt_regs->r9 */
-   xorq%r9, %r9/* nospec   r9 */
+   xorl%r9d, %r9d  /* nospec   r9 */
pushq   %r10/* pt_regs->r10 */
-   xorq%r10, %r10  /* nospec   r10 */
+   xorl%r10d, %r10d/* nospec   r10 */
pushq   %r11/* pt_regs->r11 */
-   xorq%r11, %r11  /* nospec   r11*/
+   xorl%r11d, %r11d/* nospec   r11*/
pushq   %rbx/* pt_regs->rbx */
xorl%ebx, %ebx  /* nospec   rbx*/
pushq   %rbp/* pt_regs->rbp */
xorl%ebp, %ebp  /* nospec   rbp*/
pushq   %r12/* pt_regs->r12 */
-   xorq%r12, %r12  /* nospec   r12*/
+   xorl%r12d, %r12d/* nospec   r12*/
pushq   %r13/* pt_regs->r13 */
-   xorq%r13, %r13  /* nospec   r13*/
+   xorl%r13d, %r13d/* nospec   r13*/
pushq   %r14/* pt_regs->r14 */
-   xorq%r14, %r14  /* nospec   r14*/
+   xorl%r14d, %r14d/* nospec   r14*/
pushq   %r15/* pt_regs->r15 */
-   xorq%r15, %r15  /* nospec   r15*/
+   xorl%r15d, %r15d/* nospec   r15*/
UNWIND_HINT_REGS
.if \save_ret
pushq   %rsi/* return address on top of stack */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e016e413..364ea4a207be 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
pushq   %rcx/* pt_regs->cx */
pushq   $-ENOSYS/* pt_regs->ax */
pushq   $0  /* pt_regs->r8  = 0 */
-   xorq%r8, %r8/* nospec   r8 */
+   xorl%r8d, %r8d  /* nospec   r8 */
pushq   $0  /* pt_regs->r9  = 0 */
-   xorq%r9, %r9/* nospec   r9 */
+   xorl%r9d, %r9d  /* nospec   r9 */
pushq   $0  /* pt_regs->r10 = 0 */
-   xorq%r10, %r10  /* nospec   r10 */
+   xorl%r10d, %r10d/* nospec   r10 */
pushq   $0  /* pt_regs->r11 = 0 */
-   xorq%r11, %r11  /* nospec   r11 */
+   xorl%r11d, %r11d/* nospec   r11 */
pushq   %rbx/* pt_regs->rbx */
xorl%ebx, %ebx  /* nospec   rbx */
pushq   %rbp/* pt_regs->rbp (will be overwritten) */
xorl%ebp, %ebp  /* nospec   rbp */
pushq   $0  /* pt_regs->r12 = 0 */
-   xorq%r12, %r12  /* nospec   r12 */
+   xorl%r12d, %r12d/* nospec   r12 */
pushq   $0  /* pt_regs->r13 = 0 */
-   xorq%r13, %r13  /* nospec   r13 */
+   xorl%r13d, %r13d/* nospec   r13 */
pushq   $0  /* pt_regs->r14 = 0 */
-   xorq%r14, %r14  /* nospec   r14 */
+   xorl%r14d, %r14d/* nospec   r14 */
pushq   $0  /* pt_regs->r15 = 0 */
-   xorq%r15, %r15  /* nospec   r15 */
+   xorl%r15d, %r15d/* nospec   r15 */
cld
 
/*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq   %rbp/* pt_regs->cx (stashed in bp) */
pushq   $-ENOSYS/* pt_regs->ax */
 

[PATCH 2/2] x86/entry/64: use xorl for register clearing

2018-02-14 Thread Dominik Brodowski
Using xorq to clear general-purpose registers is slower than
xorl on some architectures. As xorl is sufficient to clear all
64bit of these registers,[*] switch the x86 64-bit entry code
to use xorl.

[*] According to Intel 64 and IA-32 Architecture Software Developer's
Manual, section 3.4.1.1, the result of 32-bit operands are "zero-
extended to a 64-bit result in the destination general-purpose
register." The AMD64 Architecture Programmer’s Manual Volume 3,
Appendix B.1, describes the same behaviour.

Suggested-by: Denys Vlasenko 
Signed-off-by: Dominik Brodowski 
---
 arch/x86/entry/calling.h | 16 ++--
 arch/x86/entry/entry_64_compat.S | 54 
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 79ead48e6fe1..adaf5fd9840d 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is 
built with
pushq   %rcx/* pt_regs->cx */
pushq   \rax/* pt_regs->ax */
pushq   %r8 /* pt_regs->r8 */
-   xorq%r8, %r8/* nospec   r8 */
+   xorl%r8d, %r8d  /* nospec   r8 */
pushq   %r9 /* pt_regs->r9 */
-   xorq%r9, %r9/* nospec   r9 */
+   xorl%r9d, %r9d  /* nospec   r9 */
pushq   %r10/* pt_regs->r10 */
-   xorq%r10, %r10  /* nospec   r10 */
+   xorl%r10d, %r10d/* nospec   r10 */
pushq   %r11/* pt_regs->r11 */
-   xorq%r11, %r11  /* nospec   r11*/
+   xorl%r11d, %r11d/* nospec   r11*/
pushq   %rbx/* pt_regs->rbx */
xorl%ebx, %ebx  /* nospec   rbx*/
pushq   %rbp/* pt_regs->rbp */
xorl%ebp, %ebp  /* nospec   rbp*/
pushq   %r12/* pt_regs->r12 */
-   xorq%r12, %r12  /* nospec   r12*/
+   xorl%r12d, %r12d/* nospec   r12*/
pushq   %r13/* pt_regs->r13 */
-   xorq%r13, %r13  /* nospec   r13*/
+   xorl%r13d, %r13d/* nospec   r13*/
pushq   %r14/* pt_regs->r14 */
-   xorq%r14, %r14  /* nospec   r14*/
+   xorl%r14d, %r14d/* nospec   r14*/
pushq   %r15/* pt_regs->r15 */
-   xorq%r15, %r15  /* nospec   r15*/
+   xorl%r15d, %r15d/* nospec   r15*/
UNWIND_HINT_REGS
.if \save_ret
pushq   %rsi/* return address on top of stack */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e016e413..364ea4a207be 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
pushq   %rcx/* pt_regs->cx */
pushq   $-ENOSYS/* pt_regs->ax */
pushq   $0  /* pt_regs->r8  = 0 */
-   xorq%r8, %r8/* nospec   r8 */
+   xorl%r8d, %r8d  /* nospec   r8 */
pushq   $0  /* pt_regs->r9  = 0 */
-   xorq%r9, %r9/* nospec   r9 */
+   xorl%r9d, %r9d  /* nospec   r9 */
pushq   $0  /* pt_regs->r10 = 0 */
-   xorq%r10, %r10  /* nospec   r10 */
+   xorl%r10d, %r10d/* nospec   r10 */
pushq   $0  /* pt_regs->r11 = 0 */
-   xorq%r11, %r11  /* nospec   r11 */
+   xorl%r11d, %r11d/* nospec   r11 */
pushq   %rbx/* pt_regs->rbx */
xorl%ebx, %ebx  /* nospec   rbx */
pushq   %rbp/* pt_regs->rbp (will be overwritten) */
xorl%ebp, %ebp  /* nospec   rbp */
pushq   $0  /* pt_regs->r12 = 0 */
-   xorq%r12, %r12  /* nospec   r12 */
+   xorl%r12d, %r12d/* nospec   r12 */
pushq   $0  /* pt_regs->r13 = 0 */
-   xorq%r13, %r13  /* nospec   r13 */
+   xorl%r13d, %r13d/* nospec   r13 */
pushq   $0  /* pt_regs->r14 = 0 */
-   xorq%r14, %r14  /* nospec   r14 */
+   xorl%r14d, %r14d/* nospec   r14 */
pushq   $0  /* pt_regs->r15 = 0 */
-   xorq%r15, %r15  /* nospec   r15 */
+   xorl%r15d, %r15d/* nospec   r15 */
cld
 
/*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq   %rbp/* pt_regs->cx (stashed in bp) */
pushq   $-ENOSYS/* pt_regs->ax */
pushq   $0  /*