Only perform the NEON yield check for every 4 blocks of input, to
prevent taking a considerable performance hit on cores with very
fast crypto instructions and comparatively slow memory accesses,
such as the Cortex-A53.

Signed-off-by: Ard Biesheuvel <ard.biesheu...@linaro.org>
---
 arch/arm64/crypto/sha1-ce-core.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 78eb35fb5056..f592c55218d0 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -129,6 +129,9 @@ CPU_LE(     rev32           v11.16b, v11.16b        )
        add             dgbv.2s, dgbv.2s, dg1v.2s
        add             dgav.4s, dgav.4s, dg0v.4s
 
+       tst             w21, #0x3               // yield only every 4 blocks
+       b.ne            1b
+
        cbz             w21, 3f
 
        if_will_cond_yield_neon
-- 
2.11.0

Reply via email to