Re: [PATCH] arm64/crypto: issue aese/aesmc instructions in pairs

2015-03-17 Thread Will Deacon
On Tue, Mar 17, 2015 at 06:05:13PM +, Ard Biesheuvel wrote:
 This changes the AES core transform implementations to issue aese/aesmc
 (and aesd/aesimc) in pairs. This enables a micro-architectural optimization
 in recent Cortex-A5x cores that improves performance by 50-90%.
 
 Measured performance in cycles per byte (Cortex-A57):
 
 CBC enc CBC dec CTR
   before3.641.341.32
   after 1.950.850.93
 
 Note that this results in a ~5% performance decrease for older cores.
 
 Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org
 ---
 
 Will,
 
 This is the optimization you yourself mentioned to me about a year ago
 (or even longer perhaps?) Anyway, we have now been able to confirm it
 on a sample 'in the wild', (i.e., a Galaxy S6 phone)

I barely remember one day to the next, but hey! I'll queue this for 4.1.

Will
--
To unsubscribe from this list: send the line unsubscribe linux-crypto in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] arm64/crypto: issue aese/aesmc instructions in pairs

2015-03-17 Thread Ard Biesheuvel
This changes the AES core transform implementations to issue aese/aesmc
(and aesd/aesimc) in pairs. This enables a micro-architectural optimization
in recent Cortex-A5x cores that improves performance by 50-90%.

Measured performance in cycles per byte (Cortex-A57):

CBC enc CBC dec CTR
  before3.641.341.32
  after 1.950.850.93

Note that this results in a ~5% performance decrease for older cores.

Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org
---

Will,

This is the optimization you yourself mentioned to me about a year ago
(or even longer perhaps?) Anyway, we have now been able to confirm it
on a sample 'in the wild', (i.e., a Galaxy S6 phone)

 arch/arm64/crypto/aes-ce-ccm-core.S | 12 ++--
 arch/arm64/crypto/aes-ce.S  | 10 +++---
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S 
b/arch/arm64/crypto/aes-ce-ccm-core.S
index 432e4841cd81..a2a7fbcacc14 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final)
 0: mov v4.16b, v3.16b
 1: ld1 {v5.2d}, [x2], #16  /* load next round key */
aesev0.16b, v4.16b
-   aesev1.16b, v4.16b
aesmc   v0.16b, v0.16b
+   aesev1.16b, v4.16b
aesmc   v1.16b, v1.16b
 2: ld1 {v3.2d}, [x2], #16  /* load next round key */
aesev0.16b, v5.16b
-   aesev1.16b, v5.16b
aesmc   v0.16b, v0.16b
+   aesev1.16b, v5.16b
aesmc   v1.16b, v1.16b
 3: ld1 {v4.2d}, [x2], #16  /* load next round key */
subsw3, w3, #3
aesev0.16b, v3.16b
-   aesev1.16b, v3.16b
aesmc   v0.16b, v0.16b
+   aesev1.16b, v3.16b
aesmc   v1.16b, v1.16b
bpl 1b
aesev0.16b, v4.16b
@@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final)
ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
 2: /* inner loop: 3 rounds, 2x interleaved */
aesev0.16b, v4.16b
-   aesev1.16b, v4.16b
aesmc   v0.16b, v0.16b
+   aesev1.16b, v4.16b
aesmc   v1.16b, v1.16b
 3: ld1 {v3.2d}, [x10], #16 /* load next round key */
aesev0.16b, v5.16b
-   aesev1.16b, v5.16b
aesmc   v0.16b, v0.16b
+   aesev1.16b, v5.16b
aesmc   v1.16b, v1.16b
 4: ld1 {v4.2d}, [x10], #16 /* load next round key */
subsw7, w7, #3
aesev0.16b, v3.16b
-   aesev1.16b, v3.16b
aesmc   v0.16b, v0.16b
+   aesev1.16b, v3.16b
aesmc   v1.16b, v1.16b
ld1 {v5.2d}, [x10], #16 /* load next round key */
bpl 2b
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 685a18f731eb..78f3cfe92c08 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -45,18 +45,14 @@
 
.macro  do_enc_Nx, de, mc, k, i0, i1, i2, i3
aes\de  \i0\().16b, \k\().16b
-   .ifnb   \i1
-   aes\de  \i1\().16b, \k\().16b
-   .ifnb   \i3
-   aes\de  \i2\().16b, \k\().16b
-   aes\de  \i3\().16b, \k\().16b
-   .endif
-   .endif
aes\mc  \i0\().16b, \i0\().16b
.ifnb   \i1
+   aes\de  \i1\().16b, \k\().16b
aes\mc  \i1\().16b, \i1\().16b
.ifnb   \i3
+   aes\de  \i2\().16b, \k\().16b
aes\mc  \i2\().16b, \i2\().16b
+   aes\de  \i3\().16b, \k\().16b
aes\mc  \i3\().16b, \i3\().16b
.endif
.endif
-- 
1.8.3.2

--
To unsubscribe from this list: send the line unsubscribe linux-crypto in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html