[tip:x86/asm] x86/asm/64: Use 32-bit XOR to zero registers

2018-07-03 Thread tip-bot for Jan Beulich
Commit-ID:  a7bea8308933aaeea76dad7d42a6e51000417626
Gitweb: https://git.kernel.org/tip/a7bea8308933aaeea76dad7d42a6e51000417626
Author: Jan Beulich 
AuthorDate: Mon, 2 Jul 2018 04:31:54 -0600
Committer:  Ingo Molnar 
CommitDate: Tue, 3 Jul 2018 09:59:29 +0200

x86/asm/64: Use 32-bit XOR to zero registers

Some Intel CPUs don't recognize 64-bit XORs as zeroing idioms. Zeroing
idioms don't require execution bandwidth, as they're being taken care
of in the frontend (through register renaming). Use 32-bit XORs instead.

Signed-off-by: Jan Beulich 
Cc: Alok Kataria 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: da...@davemloft.net
Cc: herb...@gondor.apana.org.au
Cc: pa...@ucw.cz
Cc: r...@rjwysocki.net
Link: http://lkml.kernel.org/r/5b39ff1a0278001cf...@prv1-mh.provo.novell.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/crypto/aegis128-aesni-asm.S | 2 +-
 arch/x86/crypto/aegis128l-aesni-asm.S| 2 +-
 arch/x86/crypto/aegis256-aesni-asm.S | 2 +-
 arch/x86/crypto/aesni-intel_asm.S| 8 
 arch/x86/crypto/aesni-intel_avx-x86_64.S | 4 ++--
 arch/x86/crypto/morus1280-avx2-asm.S | 2 +-
 arch/x86/crypto/morus1280-sse2-asm.S | 2 +-
 arch/x86/crypto/morus640-sse2-asm.S  | 2 +-
 arch/x86/crypto/sha1_ssse3_asm.S | 2 +-
 arch/x86/kernel/head_64.S| 2 +-
 arch/x86/kernel/paravirt_patch_64.c  | 2 +-
 arch/x86/lib/memcpy_64.S | 2 +-
 arch/x86/power/hibernate_asm_64.S| 2 +-
 13 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/x86/crypto/aegis128-aesni-asm.S 
b/arch/x86/crypto/aegis128-aesni-asm.S
index 9254e0b6cc06..d5c5e2082ae7 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -75,7 +75,7 @@
  *   %r9
  */
 __load_partial:
-   xor %r9, %r9
+   xor %r9d, %r9d
pxor MSG, MSG
 
mov LEN, %r8
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S 
b/arch/x86/crypto/aegis128l-aesni-asm.S
index 9263c344f2c7..0fbdf5f00bda 100644
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -66,7 +66,7 @@
  *   %r9
  */
 __load_partial:
-   xor %r9, %r9
+   xor %r9d, %r9d
pxor MSG0, MSG0
pxor MSG1, MSG1
 
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S 
b/arch/x86/crypto/aegis256-aesni-asm.S
index 1d977d515bf9..a49f58e2a5dd 100644
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -59,7 +59,7 @@
  *   %r9
  */
 __load_partial:
-   xor %r9, %r9
+   xor %r9d, %r9d
pxor MSG, MSG
 
mov LEN, %r8
diff --git a/arch/x86/crypto/aesni-intel_asm.S 
b/arch/x86/crypto/aesni-intel_asm.S
index e762ef417562..9bd139569b41 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -258,7 +258,7 @@ ALL_F:  .octa 0x
 .macro GCM_INIT Iv SUBKEY AAD AADLEN
mov \AADLEN, %r11
mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
-   xor %r11, %r11
+   xor %r11d, %r11d
mov %r11, InLen(%arg2) # ctx_data.in_length = 0
mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
@@ -286,7 +286,7 @@ ALL_F:  .octa 0x
movdqu HashKey(%arg2), %xmm13
add %arg5, InLen(%arg2)
 
-   xor %r11, %r11 # initialise the data pointer offset as zero
+   xor %r11d, %r11d # initialise the data pointer offset as zero
PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
 
sub %r11, %arg5 # sub partial block data used
@@ -702,7 +702,7 @@ _no_extra_mask_1_\@:
 
# GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-   xor %rax,%rax
+   xor %eax, %eax
 
mov %rax, PBlockLen(%arg2)
jmp _dec_done_\@
@@ -737,7 +737,7 @@ _no_extra_mask_2_\@:
 
# GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-   xor %rax,%rax
+   xor %eax, %eax
 
mov %rax, PBlockLen(%arg2)
jmp _encode_done_\@
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S 
b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index faecb1518bf8..1985ea0b551b 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -463,7 +463,7 @@ _get_AAD_rest_final\@:
 
 _get_AAD_done\@:
# initialize the data pointer offset as zero
-   xor %r11, %r11
+   xor %r11d, %r11d
 
# start AES for num_initial_blocks blocks
mov arg5, %rax # rax = *Y0
@@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@:
 
 _get_AAD_done\@:
 

[tip:x86/asm] x86/asm/64: Use 32-bit XOR to zero registers

2018-07-03 Thread tip-bot for Jan Beulich
Commit-ID:  a7bea8308933aaeea76dad7d42a6e51000417626
Gitweb: https://git.kernel.org/tip/a7bea8308933aaeea76dad7d42a6e51000417626
Author: Jan Beulich 
AuthorDate: Mon, 2 Jul 2018 04:31:54 -0600
Committer:  Ingo Molnar 
CommitDate: Tue, 3 Jul 2018 09:59:29 +0200

x86/asm/64: Use 32-bit XOR to zero registers

Some Intel CPUs don't recognize 64-bit XORs as zeroing idioms. Zeroing
idioms don't require execution bandwidth, as they're being taken care
of in the frontend (through register renaming). Use 32-bit XORs instead.

Signed-off-by: Jan Beulich 
Cc: Alok Kataria 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: da...@davemloft.net
Cc: herb...@gondor.apana.org.au
Cc: pa...@ucw.cz
Cc: r...@rjwysocki.net
Link: http://lkml.kernel.org/r/5b39ff1a0278001cf...@prv1-mh.provo.novell.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/crypto/aegis128-aesni-asm.S | 2 +-
 arch/x86/crypto/aegis128l-aesni-asm.S| 2 +-
 arch/x86/crypto/aegis256-aesni-asm.S | 2 +-
 arch/x86/crypto/aesni-intel_asm.S| 8 
 arch/x86/crypto/aesni-intel_avx-x86_64.S | 4 ++--
 arch/x86/crypto/morus1280-avx2-asm.S | 2 +-
 arch/x86/crypto/morus1280-sse2-asm.S | 2 +-
 arch/x86/crypto/morus640-sse2-asm.S  | 2 +-
 arch/x86/crypto/sha1_ssse3_asm.S | 2 +-
 arch/x86/kernel/head_64.S| 2 +-
 arch/x86/kernel/paravirt_patch_64.c  | 2 +-
 arch/x86/lib/memcpy_64.S | 2 +-
 arch/x86/power/hibernate_asm_64.S| 2 +-
 13 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/x86/crypto/aegis128-aesni-asm.S 
b/arch/x86/crypto/aegis128-aesni-asm.S
index 9254e0b6cc06..d5c5e2082ae7 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -75,7 +75,7 @@
  *   %r9
  */
 __load_partial:
-   xor %r9, %r9
+   xor %r9d, %r9d
pxor MSG, MSG
 
mov LEN, %r8
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S 
b/arch/x86/crypto/aegis128l-aesni-asm.S
index 9263c344f2c7..0fbdf5f00bda 100644
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -66,7 +66,7 @@
  *   %r9
  */
 __load_partial:
-   xor %r9, %r9
+   xor %r9d, %r9d
pxor MSG0, MSG0
pxor MSG1, MSG1
 
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S 
b/arch/x86/crypto/aegis256-aesni-asm.S
index 1d977d515bf9..a49f58e2a5dd 100644
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -59,7 +59,7 @@
  *   %r9
  */
 __load_partial:
-   xor %r9, %r9
+   xor %r9d, %r9d
pxor MSG, MSG
 
mov LEN, %r8
diff --git a/arch/x86/crypto/aesni-intel_asm.S 
b/arch/x86/crypto/aesni-intel_asm.S
index e762ef417562..9bd139569b41 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -258,7 +258,7 @@ ALL_F:  .octa 0x
 .macro GCM_INIT Iv SUBKEY AAD AADLEN
mov \AADLEN, %r11
mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
-   xor %r11, %r11
+   xor %r11d, %r11d
mov %r11, InLen(%arg2) # ctx_data.in_length = 0
mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
@@ -286,7 +286,7 @@ ALL_F:  .octa 0x
movdqu HashKey(%arg2), %xmm13
add %arg5, InLen(%arg2)
 
-   xor %r11, %r11 # initialise the data pointer offset as zero
+   xor %r11d, %r11d # initialise the data pointer offset as zero
PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
 
sub %r11, %arg5 # sub partial block data used
@@ -702,7 +702,7 @@ _no_extra_mask_1_\@:
 
# GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-   xor %rax,%rax
+   xor %eax, %eax
 
mov %rax, PBlockLen(%arg2)
jmp _dec_done_\@
@@ -737,7 +737,7 @@ _no_extra_mask_2_\@:
 
# GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-   xor %rax,%rax
+   xor %eax, %eax
 
mov %rax, PBlockLen(%arg2)
jmp _encode_done_\@
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S 
b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index faecb1518bf8..1985ea0b551b 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -463,7 +463,7 @@ _get_AAD_rest_final\@:
 
 _get_AAD_done\@:
# initialize the data pointer offset as zero
-   xor %r11, %r11
+   xor %r11d, %r11d
 
# start AES for num_initial_blocks blocks
mov arg5, %rax # rax = *Y0
@@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@:
 
 _get_AAD_done\@: