* cipher/Makefile.am: Add 'simd-common-ppc.h'. * cipher/camellia-simd128.h [HAVE_GCC_INLINE_ASM_PPC_ALTIVEC]: Include "simd-common-ppc.h". [HAVE_GCC_INLINE_ASM_PPC_ALTIVEC] (memory_barrier_with_vec) (clear_vec_regs): Remove. * cipher/chacha20-p10le-8x.s (clear_vec_regs): New. (_gcry_chacha20_p10le_8x): Add clear_vec_regs. * cipher/chacha20-ppc.c: Include "simd-common-ppc.h". (chacha20_ppc_blocks1, chacha20_ppc_blocks4) (chacha20_poly1305_ppc_blocks4): Add clear_vec_regs. * cipher/cipher-gcm-ppc.c: Include "simd-common-ppc.h". (_gcry_ghash_setup_ppc_vpmsum, _gcry_ghash_ppc_vpmsum): Add clear_vec_regs. * cipher/poly1305-p10le.s (clear_vec_regs): New. (gcry_poly1305_p10le_4blocks): Add clear_vec_regs. * cipher/rijndael-p10le.c: Include "simd-common-ppc.h". (_gcry_aes_p10le_gcm_crypt): Add clear_vec_regs. * cipher/rijndael-ppc-common.h: Include "simd-common-ppc.h". * cipher/rijndael-ppc-functions.h (ENCRYPT_BLOCK_FUNC): (DECRYPT_BLOCK_FUNC, CFB_ENC_FUNC, ECB_CRYPT_FUNC, CFB_DEC_FUNC) (CBC_ENC_FUNC, CBC_DEC_FUNC, CTR_ENC_FUNC, OCB_CRYPT_FUNC) (OCB_AUTH_FUNC, XTS_CRYPT_FUNC, CTR32LE_ENC_FUNC): Add clear_vec_regs. * cipher/rijndael-ppc.c (_gcry_aes_ppc8_setkey) (_gcry_aes_ppc8_prepare_decryption): Add clear_vec_regs. * cipher/sha256-ppc.c: Include "simd-common-ppc.h". (sha256_transform_ppc): Add clear_vec_regs. * cipher/sha512-ppc.c: Include "simd-common-ppc.h". (sha512_transform_ppc): Add clear_vec_regs. * cipher/simd-common-ppc.h: New. * cipher/sm4-ppc.c: Include "simd-common-ppc.h". (sm4_ppc_crypt_blk1_16): Add clear_vec_regs. --
Signed-off-by: Jussi Kivilinna <jussi.kivili...@iki.fi> --- cipher/Makefile.am | 2 +- cipher/camellia-simd128.h | 4 +- cipher/chacha20-p10le-8x.s | 41 ++++++++++++++++++ cipher/chacha20-ppc.c | 7 +++ cipher/cipher-gcm-ppc.c | 5 +++ cipher/poly1305-p10le.s | 41 ++++++++++++++++++ cipher/rijndael-p10le.c | 5 +++ cipher/rijndael-ppc-common.h | 1 + cipher/rijndael-ppc-functions.h | 24 ++++++++++ cipher/rijndael-ppc.c | 4 ++ cipher/sha256-ppc.c | 3 ++ cipher/sha512-ppc.c | 3 ++ cipher/simd-common-ppc.h | 77 +++++++++++++++++++++++++++++++++ cipher/sm4-ppc.c | 34 ++++++++------- 14 files changed, 232 insertions(+), 19 deletions(-) create mode 100644 cipher/simd-common-ppc.h diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 633c53ed..90415d83 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -127,7 +127,7 @@ EXTRA_libcipher_la_SOURCES = \ seed.c \ serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S \ serpent-avx512-x86.c serpent-armv7-neon.S \ - simd-common-aarch64.h \ + simd-common-aarch64.h simd-common-ppc.h \ sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S \ sm4-gfni-avx2-amd64.S sm4-gfni-avx512-amd64.S \ sm4-aarch64.S sm4-armv8-aarch64-ce.S sm4-armv9-aarch64-sve-ce.S \ diff --git a/cipher/camellia-simd128.h b/cipher/camellia-simd128.h index 120fbe5a..df36a1a2 100644 --- a/cipher/camellia-simd128.h +++ b/cipher/camellia-simd128.h @@ -47,6 +47,7 @@ /********************************************************************** AT&T x86 asm to intrinsics conversion macros (PowerPC VSX+crypto) **********************************************************************/ +#include "simd-common-ppc.h" #include <altivec.h> typedef vector signed char int8x16_t; @@ -151,9 +152,6 @@ static const uint8x16_t shift_row = #define if_aes_subbytes(...) __VA_ARGS__ #define if_not_aes_subbytes(...) /*_*/ -#define memory_barrier_with_vec(a) __asm__("" : "+wa"(a) :: "memory") -#define clear_vec_regs() ((void)0) - #endif /* __powerpc__ */ #ifdef __ARM_NEON diff --git a/cipher/chacha20-p10le-8x.s b/cipher/chacha20-p10le-8x.s index ff68c9ef..f75ffb12 100644 --- a/cipher/chacha20-p10le-8x.s +++ b/cipher/chacha20-p10le-8x.s @@ -61,6 +61,45 @@ # .text +.macro clear_vec_regs + xxlxor 0, 0, 0 + xxlxor 1, 1, 1 + xxlxor 2, 2, 2 + xxlxor 3, 3, 3 + xxlxor 4, 4, 4 + xxlxor 5, 5, 5 + xxlxor 6, 6, 6 + xxlxor 7, 7, 7 + xxlxor 8, 8, 8 + xxlxor 9, 9, 9 + xxlxor 10, 10, 10 + xxlxor 11, 11, 11 + xxlxor 12, 12, 12 + xxlxor 13, 13, 13 + # vs14-vs31 (f14-f31) are ABI callee saved. + xxlxor 32, 32, 32 + xxlxor 33, 33, 33 + xxlxor 34, 34, 34 + xxlxor 35, 35, 35 + xxlxor 36, 36, 36 + xxlxor 37, 37, 37 + xxlxor 38, 38, 38 + xxlxor 39, 39, 39 + xxlxor 40, 40, 40 + xxlxor 41, 41, 41 + xxlxor 42, 42, 42 + xxlxor 43, 43, 43 + xxlxor 44, 44, 44 + xxlxor 45, 45, 45 + xxlxor 46, 46, 46 + xxlxor 47, 47, 47 + xxlxor 48, 48, 48 + xxlxor 49, 49, 49 + xxlxor 50, 50, 50 + xxlxor 51, 51, 51 + # vs52-vs63 (v20-v31) are ABI callee saved. +.endm + .macro QT_loop_8x # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) xxlor 0, 32+25, 32+25 @@ -782,6 +821,8 @@ Out_loop: lvx 30, 26, 9 lvx 31, 27, 9 + clear_vec_regs + add 9, 9, 27 addi 14, 17, 16 lxvx 14, 14, 9 diff --git a/cipher/chacha20-ppc.c b/cipher/chacha20-ppc.c index e640010a..376d0642 100644 --- a/cipher/chacha20-ppc.c +++ b/cipher/chacha20-ppc.c @@ -25,6 +25,7 @@ defined(USE_CHACHA20) && \ __GNUC__ >= 4 +#include "simd-common-ppc.h" #include <altivec.h> #include "bufhelp.h" #include "poly1305-internal.h" @@ -252,6 +253,8 @@ chacha20_ppc_blocks1(u32 *state, byte *dst, const byte *src, size_t nblks) vec_vsx_st(state3, 3 * 16, state); /* store counter */ + clear_vec_regs(); + return 0; } @@ -414,6 +417,8 @@ chacha20_ppc_blocks4(u32 *state, byte *dst, const byte *src, size_t nblks) vec_vsx_st(state3, 3 * 16, state); /* store counter */ + clear_vec_regs(); + return 0; } @@ -636,6 +641,8 @@ chacha20_poly1305_ppc_blocks4(u32 *state, byte *dst, const byte *src, st->h[3] = h1 >> 32; st->h[4] = h2; + clear_vec_regs(); + return 0; } diff --git a/cipher/cipher-gcm-ppc.c b/cipher/cipher-gcm-ppc.c index 648d1598..486295af 100644 --- a/cipher/cipher-gcm-ppc.c +++ b/cipher/cipher-gcm-ppc.c @@ -80,6 +80,7 @@ #ifdef GCM_USE_PPC_VPMSUM +#include "simd-common-ppc.h" #include <altivec.h> #define ALWAYS_INLINE inline __attribute__((always_inline)) @@ -370,6 +371,8 @@ _gcry_ghash_setup_ppc_vpmsum (void *gcm_table_arg, void *gcm_key) STORE_TABLE (gcm_table, 10, H4l); STORE_TABLE (gcm_table, 11, H4); STORE_TABLE (gcm_table, 12, H4h); + + clear_vec_regs(); } unsigned int ASM_FUNC_ATTR @@ -542,6 +545,8 @@ _gcry_ghash_ppc_vpmsum (byte *result, void *gcm_table, vec_store_he (vec_be_swap (cur, bswap_const), 0, result); + clear_vec_regs(); + return 0; } diff --git a/cipher/poly1305-p10le.s b/cipher/poly1305-p10le.s index 4202b41e..d21f8245 100644 --- a/cipher/poly1305-p10le.s +++ b/cipher/poly1305-p10le.s @@ -57,6 +57,45 @@ # .text +.macro clear_vec_regs + xxlxor 0, 0, 0 + xxlxor 1, 1, 1 + xxlxor 2, 2, 2 + xxlxor 3, 3, 3 + xxlxor 4, 4, 4 + xxlxor 5, 5, 5 + xxlxor 6, 6, 6 + xxlxor 7, 7, 7 + xxlxor 8, 8, 8 + xxlxor 9, 9, 9 + xxlxor 10, 10, 10 + xxlxor 11, 11, 11 + xxlxor 12, 12, 12 + xxlxor 13, 13, 13 + # vs14-vs31 (f14-f31) are ABI callee saved. + xxlxor 32, 32, 32 + xxlxor 33, 33, 33 + xxlxor 34, 34, 34 + xxlxor 35, 35, 35 + xxlxor 36, 36, 36 + xxlxor 37, 37, 37 + xxlxor 38, 38, 38 + xxlxor 39, 39, 39 + xxlxor 40, 40, 40 + xxlxor 41, 41, 41 + xxlxor 42, 42, 42 + xxlxor 43, 43, 43 + xxlxor 44, 44, 44 + xxlxor 45, 45, 45 + xxlxor 46, 46, 46 + xxlxor 47, 47, 47 + xxlxor 48, 48, 48 + xxlxor 49, 49, 49 + xxlxor 50, 50, 50 + xxlxor 51, 51, 51 + # vs52-vs63 (v20-v31) are ABI callee saved. +.endm + # Block size 16 bytes # key = (r, s) # clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF @@ -745,6 +784,8 @@ do_final_update: Out_loop: li 3, 0 + clear_vec_regs + li 14, 256 lvx 20, 14, 1 addi 14, 14, 16 diff --git a/cipher/rijndael-p10le.c b/cipher/rijndael-p10le.c index 65d804f9..448b45ed 100644 --- a/cipher/rijndael-p10le.c +++ b/cipher/rijndael-p10le.c @@ -30,6 +30,8 @@ #ifdef USE_PPC_CRYPTO_WITH_PPC9LE +#include "simd-common-ppc.h" + extern size_t _gcry_ppc10_aes_gcm_encrypt (const void *inp, void *out, size_t len, @@ -113,6 +115,9 @@ _gcry_aes_p10le_gcm_crypt(gcry_cipher_hd_t c, void *outbuf_arg, */ s = ndone / GCRY_GCM_BLOCK_LEN; s = nblocks - s; + + clear_vec_regs(); + return ( s ); } diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h index bd2ad8b1..611b5871 100644 --- a/cipher/rijndael-ppc-common.h +++ b/cipher/rijndael-ppc-common.h @@ -26,6 +26,7 @@ #ifndef G10_RIJNDAEL_PPC_COMMON_H #define G10_RIJNDAEL_PPC_COMMON_H +#include "simd-common-ppc.h" #include <altivec.h> diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h index ec5cda73..eb39717d 100644 --- a/cipher/rijndael-ppc-functions.h +++ b/cipher/rijndael-ppc-functions.h @@ -40,6 +40,8 @@ ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out, AES_ENCRYPT (b, rounds); VEC_STORE_BE (out, 0, b, bige_const); + clear_vec_regs(); + return 0; /* does not use stack */ } @@ -61,6 +63,8 @@ DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out, AES_DECRYPT (b, rounds); VEC_STORE_BE (out, 0, b, bige_const); + clear_vec_regs(); + return 0; /* does not use stack */ } @@ -116,6 +120,8 @@ CFB_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, } VEC_STORE_BE (iv_arg, 0, outiv, bige_const); + + clear_vec_regs(); } @@ -373,6 +379,8 @@ ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg, out++; in++; } + + clear_vec_regs(); } @@ -571,6 +579,8 @@ CFB_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, } VEC_STORE_BE (iv_arg, 0, iv, bige_const); + + clear_vec_regs(); } @@ -640,6 +650,8 @@ CBC_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, while (nblocks); VEC_STORE_BE (iv_arg, 0, outiv, bige_const); + + clear_vec_regs(); } @@ -845,6 +857,8 @@ CBC_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, } VEC_STORE_BE (iv_arg, 0, iv, bige_const); + + clear_vec_regs(); } @@ -1078,6 +1092,8 @@ CTR_ENC_FUNC (void *context, unsigned char *ctr_arg, void *outbuf_arg, } VEC_STORE_BE (ctr_arg, 0, ctr, bige_const); + + clear_vec_regs(); } @@ -1584,6 +1600,8 @@ OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const); c->u_mode.ocb.data_nblocks = data_nblocks; + clear_vec_regs(); + return 0; } @@ -1794,6 +1812,8 @@ OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks) VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const); c->u_mode.ocb.aad_nblocks = data_nblocks; + clear_vec_regs(); + return 0; } @@ -2295,6 +2315,8 @@ XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg, void *outbuf_arg, VEC_STORE_BE (tweak_arg, 0, tweak, bige_const); #undef GEN_TWEAK + + clear_vec_regs(); } @@ -2541,4 +2563,6 @@ CTR32LE_ENC_FUNC(void *context, unsigned char *ctr_arg, void *outbuf_arg, #undef VEC_ADD_CTRLE32 VEC_STORE_BE (ctr_arg, 0, vec_reve((block)ctr), bige_const); + + clear_vec_regs(); } diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 055b00c0..18fadd6e 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -201,6 +201,8 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) } wipememory(tk_vu32, sizeof(tk_vu32)); + + clear_vec_regs(); } @@ -208,6 +210,8 @@ void PPC_OPT_ATTR _gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) { internal_aes_ppc_prepare_decryption (ctx); + + clear_vec_regs(); } diff --git a/cipher/sha256-ppc.c b/cipher/sha256-ppc.c index e5839a84..bcc08dad 100644 --- a/cipher/sha256-ppc.c +++ b/cipher/sha256-ppc.c @@ -25,6 +25,7 @@ defined(USE_SHA256) && \ __GNUC__ >= 4 +#include "simd-common-ppc.h" #include <altivec.h> #include "bufhelp.h" @@ -590,6 +591,8 @@ sha256_transform_ppc(u32 state[8], const unsigned char *data, size_t nblks) vec_vsx_st (h0_h3, 4 * 0, state); vec_vsx_st (h4_h7, 4 * 4, state); + clear_vec_regs(); + return sizeof(w2) + sizeof(w); } diff --git a/cipher/sha512-ppc.c b/cipher/sha512-ppc.c index d213c241..ed9486ee 100644 --- a/cipher/sha512-ppc.c +++ b/cipher/sha512-ppc.c @@ -25,6 +25,7 @@ defined(USE_SHA512) && \ __GNUC__ >= 4 +#include "simd-common-ppc.h" #include <altivec.h> #include "bufhelp.h" @@ -705,6 +706,8 @@ sha512_transform_ppc(u64 state[8], const unsigned char *data, size_t nblks) vec_u64_store (h4, 8 * 4, (unsigned long long *)state); vec_u64_store (h6, 8 * 6, (unsigned long long *)state); + clear_vec_regs(); + return sizeof(w) + sizeof(w2); } diff --git a/cipher/simd-common-ppc.h b/cipher/simd-common-ppc.h new file mode 100644 index 00000000..620a3b51 --- /dev/null +++ b/cipher/simd-common-ppc.h @@ -0,0 +1,77 @@ +/* simd-common-ppc.h - Common macros for PowerPC SIMD code + * + * Copyright (C) 2024 Jussi Kivilinna <jussi.kivili...@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GCRY_SIMD_COMMON_PPC_H +#define GCRY_SIMD_COMMON_PPC_H + +#include <config.h> + +#define memory_barrier_with_vec(a) __asm__("" : "+wa"(a) :: "memory") + +#define clear_vec_regs() __asm__ volatile("xxlxor 0, 0, 0\n" \ + "xxlxor 1, 1, 1\n" \ + "xxlxor 2, 2, 2\n" \ + "xxlxor 3, 3, 3\n" \ + "xxlxor 4, 4, 4\n" \ + "xxlxor 5, 5, 5\n" \ + "xxlxor 6, 6, 6\n" \ + "xxlxor 7, 7, 7\n" \ + "xxlxor 8, 8, 8\n" \ + "xxlxor 9, 9, 9\n" \ + "xxlxor 10, 10, 10\n" \ + "xxlxor 11, 11, 11\n" \ + "xxlxor 12, 12, 12\n" \ + "xxlxor 13, 13, 13\n" \ + "xxlxor 32, 32, 32\n" \ + "xxlxor 33, 33, 33\n" \ + "xxlxor 34, 34, 34\n" \ + "xxlxor 35, 35, 35\n" \ + "xxlxor 36, 36, 36\n" \ + "xxlxor 37, 37, 37\n" \ + "xxlxor 38, 38, 38\n" \ + "xxlxor 39, 39, 39\n" \ + "xxlxor 40, 40, 40\n" \ + "xxlxor 41, 41, 41\n" \ + "xxlxor 42, 42, 42\n" \ + "xxlxor 43, 43, 43\n" \ + "xxlxor 44, 44, 44\n" \ + "xxlxor 45, 45, 45\n" \ + "xxlxor 46, 46, 46\n" \ + "xxlxor 47, 47, 47\n" \ + "xxlxor 48, 48, 48\n" \ + "xxlxor 49, 49, 49\n" \ + "xxlxor 50, 50, 50\n" \ + "xxlxor 51, 51, 51\n" \ + ::: "vs0", "vs1", "vs2", "vs3", \ + "vs4", "vs5", "vs6", "vs7", \ + "vs8", "vs9", "vs10", "vs11", \ + "vs12", "vs13", \ + /* vs14-vs31 (f14-f31) are */ \ + /* ABI callee saved. */ \ + "vs32", "vs33", "vs34", "vs35", \ + "vs36", "vs37", "vs38", "vs39", \ + "vs40", "vs41", "vs42", "vs43", \ + "vs44", "vs45", "vs46", "vs47", \ + "vs48", "vs49", "vs50", "vs51", \ + /* vs52-vs63 (v20-v31) are */ \ + /* ABI callee saved. */ \ + "memory") + +#endif /* GCRY_SIMD_COMMON_PPC_H */ diff --git a/cipher/sm4-ppc.c b/cipher/sm4-ppc.c index bb2c55e0..2b26c39d 100644 --- a/cipher/sm4-ppc.c +++ b/cipher/sm4-ppc.c @@ -25,6 +25,7 @@ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \ !defined(WORDS_BIGENDIAN) && (__GNUC__ >= 4) +#include "simd-common-ppc.h" #include <altivec.h> #include "bufhelp.h" @@ -298,25 +299,28 @@ sm4_ppc_crypt_blk1_16(u32 *rk, byte *out, const byte *in, size_t nblks) if (nblks >= 16) { sm4_ppc_crypt_blk16(rk, out, in); - return; } - - while (nblks >= 8) + else { - sm4_ppc_crypt_blk8(rk, out, in); - in += 8 * 16; - out += 8 * 16; - nblks -= 8; + while (nblks >= 8) + { + sm4_ppc_crypt_blk8(rk, out, in); + in += 8 * 16; + out += 8 * 16; + nblks -= 8; + } + + while (nblks) + { + size_t currblks = nblks > 4 ? 4 : nblks; + sm4_ppc_crypt_blk1_4(rk, out, in, currblks); + in += currblks * 16; + out += currblks * 16; + nblks -= currblks; + } } - while (nblks) - { - size_t currblks = nblks > 4 ? 4 : nblks; - sm4_ppc_crypt_blk1_4(rk, out, in, currblks); - in += currblks * 16; - out += currblks * 16; - nblks -= currblks; - } + clear_vec_regs(); } ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P8 void -- 2.45.2 _______________________________________________ Gcrypt-devel mailing list Gcrypt-devel@gnupg.org https://lists.gnupg.org/mailman/listinfo/gcrypt-devel