On a SPARC T4-2, with CAMELLIA opcodes disabled: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes camellia-128 cbc 63737.35k 66054.61k 66780.50k 66775.35k 67062.44k camellia-192 cbc 51126.33k 53836.78k 54761.73k 54964.91k 55017.47k camellia-256 cbc 51126.24k 53774.55k 54760.02k 54963.54k 55017.47k
with CAMELLIA opcodes enabled: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes camellia-128 cbc 483488.94k 608627.31k 646251.78k 645825.54k 657945.94k camellia-192 cbc 396779.71k 474317.61k 497627.22k 497634.65k 504881.15k camellia-256 cbc 396796.10k 474297.19k 497624.06k 497644.20k 504872.96k Signed-off-by: David S. Miller <da...@davemloft.net> --- If this is applied before the sparc AES opcode patches, there is a minor and easy to resolve conflict in the top-level Configure file. Tested on the full matrix of {static,shared}/linux{,64}-sparcv9 Configure | 2 +- crypto/camellia/Makefile | 2 + crypto/camellia/asm/cmll-sparcv9.S | 604 ++++++++++++++++++++++++++++++++++++ crypto/camellia/cmll_sparccore.c | 219 +++++++++++++ crypto/sparc_arch.h | 11 + 5 files changed, 837 insertions(+), 1 deletion(-) create mode 100644 crypto/camellia/asm/cmll-sparcv9.S create mode 100644 crypto/camellia/cmll_sparccore.c diff --git a/Configure b/Configure index 217a552..b4cbb56 100755 --- a/Configure +++ b/Configure @@ -130,7 +130,7 @@ my $x86_elf_asm="$x86_asm:elf"; my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o"; my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void"; my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; diff --git a/crypto/camellia/Makefile b/crypto/camellia/Makefile index 8858dd0..6802393 100644 --- a/crypto/camellia/Makefile +++ b/crypto/camellia/Makefile @@ -48,6 +48,8 @@ cmll-x86.s: asm/cmll-x86.pl ../perlasm/x86asm.pl $(PERL) asm/cmll-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ cmll-x86_64.s: asm/cmll-x86_64.pl $(PERL) asm/cmll-x86_64.pl $(PERLASM_SCHEME) > $@ +cmll-sparcv9.s: asm/cmll-sparcv9.S + $(CC) $(CFLAGS) -E asm/cmll-sparcv9.S > $@ files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO diff --git a/crypto/camellia/asm/cmll-sparcv9.S b/crypto/camellia/asm/cmll-sparcv9.S new file mode 100644 index 0000000..015d5ee --- /dev/null +++ b/crypto/camellia/asm/cmll-sparcv9.S @@ -0,0 +1,604 @@ +/* Written by David S. Miller <da...@davemloft.net> for the OpenSSL + * project. The module is, however, dual licensed under OpenSSL and + * CRYPTOGAMS licenses depending on where you obtain it. For further + * details see http://www.openssl.org/~appro/cryptogams/. + */ + +#include "sparc_arch.h" + +#ifdef __arch64__ + .register %g2,#scratch + .register %g3,#scratch +#endif + +#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ + CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \ + CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \ + CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 10, I0, I1, I0) + +#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \ + CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ + CAMELLIA_FL(KEY_BASE + 12, I0, I0) \ + CAMELLIA_FLI(KEY_BASE + 14, I1, I1) + + .data + + .align 8 +SIGMA: .xword 0xA09E667F3BCC908B + .xword 0xB67AE8584CAA73B2 + .xword 0xC6EF372FE94F82BE + .xword 0x54FF53A5F1D36F1C + .xword 0x10E527FADE682D1D + .xword 0xB05688C2B3E6C1FD + + .text + +SPARC_PIC_THUNK(g3) + + .align 32 + .globl sparc_hw_camellia_ekeygen + .type sparc_hw_camellia_ekeygen,#function +sparc_hw_camellia_ekeygen: + /* %o0=rawkey, %o1=ks, %o2=keybitlength */ + andcc %o0, 0x7, %g0 + be,pt %icc, 1f + nop + alignaddr %o0, %g0, %g1 + ldd [%g1 + 0x00], %f14 + ldd [%g1 + 0x08], %f16 + ldd [%g1 + 0x10], %f18 + faligndata %f14, %f16, %f0 + ba,pt %icc, 2f + faligndata %f16, %f18, %f2 +1: ldd [%o0 + 0x00], %f0 ! i0/i1, k[0]/k[1] + ldd [%o0 + 0x08], %f2 ! i2/i3, k[2]/k[3] +2: std %f0, [%o1 + 0x00] ! k[0, 1] + fsrc2 %f0, %f28 + std %f2, [%o1 + 0x08] ! k[2, 3] + cmp %o2, 128 + be 10f + fsrc2 %f2, %f30 + + andcc %o0, 0x7, %g0 + be,pt %icc, 1f + nop + ldd [%g1 + 0x18], %f16 + ba,pt %icc, 2f + faligndata %f18, %f16, %f0 +1: ldd [%o0 + 0x10], %f0 +2: std %f0, [%o1 + 0x20] ! k[8, 9] + cmp %o2, 192 + fone %f10 + be,a 2f + fxor %f10, %f0, %f2 + + andcc %o0, 0x7, %g0 + be,pt %icc, 1f + nop + ldd [%g1 + 0x20], %f18 + ba,pt %icc, 2f + faligndata %f16, %f18, %f2 +1: ldd [%o0 + 0x18], %f2 +2: + std %f2, [%o1 + 0x28] ! k[10, 11] + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + +10: + SPARC_LOAD_ADDRESS_LEAF(SIGMA, g3, g2) + ldd [%g3 + 0x00], %f16 + ldd [%g3 + 0x08], %f18 + ldd [%g3 + 0x10], %f20 + ldd [%g3 + 0x18], %f22 + ldd [%g3 + 0x20], %f24 + ldd [%g3 + 0x28], %f26 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + CAMELLIA_F(20, 2, 0, 2) + CAMELLIA_F(22, 0, 2, 0) + +#define ROTL128(S01, S23, TMP1, TMP2, N) \ + srlx S01, (64 - N), TMP1; \ + sllx S01, N, S01; \ + srlx S23, (64 - N), TMP2; \ + sllx S23, N, S23; \ + or S01, TMP2, S01; \ + or S23, TMP1, S23 + + cmp %o2, 128 + bne 1f + nop + /* 128-bit key */ + std %f0, [%o1 + 0x10] ! k[ 4, 5] + std %f2, [%o1 + 0x18] ! k[ 6, 7] + MOVDTOX_F0_O4 + MOVDTOX_F2_O5 + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x30] ! k[12, 13] + stx %o5, [%o1 + 0x38] ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x40] ! k[16, 17] + stx %o5, [%o1 + 0x48] ! k[18, 19] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x60] ! k[24, 25] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x70] ! k[28, 29] + stx %o5, [%o1 + 0x78] ! k[30, 31] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xa0] ! k[40, 41] + stx %o5, [%o1 + 0xa8] ! k[42, 43] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xc0] ! k[48, 49] + stx %o5, [%o1 + 0xc8] ! k[50, 51] + + ldx [%o1 + 0x00], %o4 ! k[ 0, 1] + ldx [%o1 + 0x08], %o5 ! k[ 2, 3] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x20] ! k[ 8, 9] + stx %o5, [%o1 + 0x28] ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x50] ! k[20, 21] + stx %o5, [%o1 + 0x58] ! k[22, 23] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o5, [%o1 + 0x68] ! k[26, 27] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0x80] ! k[32, 33] + stx %o5, [%o1 + 0x88] ! k[34, 35] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0x90] ! k[36, 37] + stx %o5, [%o1 + 0x98] ! k[38, 39] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xb0] ! k[44, 45] + stx %o5, [%o1 + 0xb8] ! k[46, 47] + + ba,pt %icc, 2f + mov 3, %o0 + +1: + /* 192-bit or 256-bit key */ + std %f0, [%o1 + 0x30] ! k[12, 13] + std %f2, [%o1 + 0x38] ! k[14, 15] + ldd [%o1 + 0x20], %f4 ! k[ 8, 9] + ldd [%o1 + 0x28], %f6 ! k[10, 11] + fxor %f0, %f4, %f0 + fxor %f2, %f6, %f2 + CAMELLIA_F(24, 2, 0, 2) + CAMELLIA_F(26, 0, 2, 0) + std %f0, [%o1 + 0x10] ! k[ 4, 5] + std %f2, [%o1 + 0x18] ! k[ 6, 7] + MOVDTOX_F0_O4 + MOVDTOX_F2_O5 + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x50] ! k[20, 21] + stx %o5, [%o1 + 0x58] ! k[22, 23] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0xa0] ! k[40, 41] + stx %o5, [%o1 + 0xa8] ! k[42, 43] + ROTL128(%o4, %o5, %g2, %g3, 51) + stx %o4, [%o1 + 0x100] ! k[64, 65] + stx %o5, [%o1 + 0x108] ! k[66, 67] + ldx [%o1 + 0x20], %o4 ! k[ 8, 9] + ldx [%o1 + 0x28], %o5 ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x20] ! k[ 8, 9] + stx %o5, [%o1 + 0x28] ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x40] ! k[16, 17] + stx %o5, [%o1 + 0x48] ! k[18, 19] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x90] ! k[36, 37] + stx %o5, [%o1 + 0x98] ! k[38, 39] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xd0] ! k[52, 53] + stx %o5, [%o1 + 0xd8] ! k[54, 55] + ldx [%o1 + 0x30], %o4 ! k[12, 13] + ldx [%o1 + 0x38], %o5 ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x30] ! k[12, 13] + stx %o5, [%o1 + 0x38] ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x70] ! k[28, 29] + stx %o5, [%o1 + 0x78] ! k[30, 31] + srlx %o4, 32, %g2 + srlx %o5, 32, %g3 + stw %o4, [%o1 + 0xc0] ! k[48] + stw %g3, [%o1 + 0xc4] ! k[49] + stw %o5, [%o1 + 0xc8] ! k[50] + stw %g2, [%o1 + 0xcc] ! k[51] + ROTL128(%o4, %o5, %g2, %g3, 49) + stx %o4, [%o1 + 0xe0] ! k[56, 57] + stx %o5, [%o1 + 0xe8] ! k[58, 59] + ldx [%o1 + 0x00], %o4 ! k[ 0, 1] + ldx [%o1 + 0x08], %o5 ! k[ 2, 3] + ROTL128(%o4, %o5, %g2, %g3, 45) + stx %o4, [%o1 + 0x60] ! k[24, 25] + stx %o5, [%o1 + 0x68] ! k[26, 27] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x80] ! k[32, 33] + stx %o5, [%o1 + 0x88] ! k[34, 35] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xb0] ! k[44, 45] + stx %o5, [%o1 + 0xb8] ! k[46, 47] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xf0] ! k[60, 61] + stx %o5, [%o1 + 0xf8] ! k[62, 63] + mov 4, %o0 +2: retl + nop + .size sparc_hw_camellia_ekeygen,.-sparc_hw_camellia_ekeygen + + .align 32 + .globl sparc_hw_camellia_encrypt + .type sparc_hw_camellia_encrypt,#function +sparc_hw_camellia_encrypt: + /* %o0=key, %o1=input, %o2=output, %o3=rounds */ + ld [%o1 + 0x00], %f0 + ld [%o1 + 0x04], %f1 + ld [%o1 + 0x08], %f2 + ld [%o1 + 0x0c], %f3 + + ldd [%o0 + 0x00], %f4 + ldd [%o0 + 0x08], %f6 + + cmp %o3, 3 + fxor %f4, %f0, %f0 + be 1f + fxor %f6, %f2, %f2 + + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + add %o0, 0x40, %o0 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + +1: + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + ldd [%o0 + 0xc8], %f54 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + + st %f2, [%o2 + 0x00] + st %f3, [%o2 + 0x04] + st %f0, [%o2 + 0x08] + st %f1, [%o2 + 0x0c] + + retl + nop + .size sparc_hw_camellia_encrypt,.-sparc_hw_camellia_encrypt + + .align 32 + .globl sparc_hw_camellia_decrypt + .type sparc_hw_camellia_decrypt,#function +sparc_hw_camellia_decrypt: + /* %o0=key, %o1=input, %o2=output, %o3=rounds */ + ld [%o1 + 0x00], %f0 + ld [%o1 + 0x04], %f1 + ld [%o1 + 0x08], %f2 + ld [%o1 + 0x0c], %f3 + + sll %o3, 6, %o4 + add %o0, %o4, %o0 + + ldd [%o0 + 0x00], %f4 + ldd [%o0 + 0x08], %f6 + + cmp %o3, 3 + fxor %f4, %f0, %f0 + be 1f + fxor %f6, %f2, %f2 + + ldd [%o0 - 0x08], %f8 + ldd [%o0 - 0x10], %f10 + ldd [%o0 - 0x18], %f12 + ldd [%o0 - 0x20], %f14 + ldd [%o0 - 0x28], %f16 + ldd [%o0 - 0x30], %f18 + ldd [%o0 - 0x38], %f20 + ldd [%o0 - 0x40], %f22 + sub %o0, 0x40, %o0 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + +1: + ldd [%o0 - 0x08], %f8 + ldd [%o0 - 0x10], %f10 + ldd [%o0 - 0x18], %f12 + ldd [%o0 - 0x20], %f14 + ldd [%o0 - 0x28], %f16 + ldd [%o0 - 0x30], %f18 + ldd [%o0 - 0x38], %f20 + ldd [%o0 - 0x40], %f22 + ldd [%o0 - 0x48], %f24 + ldd [%o0 - 0x50], %f26 + ldd [%o0 - 0x58], %f28 + ldd [%o0 - 0x60], %f30 + ldd [%o0 - 0x68], %f32 + ldd [%o0 - 0x70], %f34 + ldd [%o0 - 0x78], %f36 + ldd [%o0 - 0x80], %f38 + ldd [%o0 - 0x88], %f40 + ldd [%o0 - 0x90], %f42 + ldd [%o0 - 0x98], %f44 + ldd [%o0 - 0xa0], %f46 + ldd [%o0 - 0xa8], %f48 + ldd [%o0 - 0xb0], %f50 + ldd [%o0 - 0xc0], %f52 + ldd [%o0 - 0xb8], %f54 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + + st %f2, [%o2 + 0x00] + st %f3, [%o2 + 0x04] + st %f0, [%o2 + 0x08] + st %f1, [%o2 + 0x0c] + + retl + nop + .size sparc_hw_camellia_decrypt,.-sparc_hw_camellia_decrypt + +#define LOAD_ENCRYPT_KEY(REG) \ + ldd [%REG + 0x00], %f4; \ + ldd [%REG + 0x08], %f6; \ + ldd [%REG + 0x10], %f8; \ + ldd [%REG + 0x18], %f10; \ + ldd [%REG + 0x20], %f12; \ + ldd [%REG + 0x28], %f14; \ + ldd [%REG + 0x30], %f16; \ + ldd [%REG + 0x38], %f18; \ + ldd [%REG + 0x40], %f20; \ + ldd [%REG + 0x48], %f22; \ + ldd [%REG + 0x50], %f24; \ + ldd [%REG + 0x58], %f26; \ + ldd [%REG + 0x60], %f28; \ + ldd [%REG + 0x68], %f30; \ + ldd [%REG + 0x70], %f32; \ + ldd [%REG + 0x78], %f34; \ + ldd [%REG + 0x80], %f36; \ + ldd [%REG + 0x88], %f38; \ + ldd [%REG + 0x90], %f40; \ + ldd [%REG + 0x98], %f42; \ + ldd [%REG + 0xa0], %f44; \ + ldd [%REG + 0xa8], %f46; \ + ldd [%REG + 0xb0], %f48; \ + ldd [%REG + 0xb8], %f50; \ + ldd [%REG + 0xc0], %f52; \ + ldd [%REG + 0xc8], %f54; + + .align 32 + .globl sparc_hw_camellia_cbc_encrypt_3rounds + .type sparc_hw_camellia_cbc_encrypt_3rounds,#function +sparc_hw_camellia_cbc_encrypt_3rounds: + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + LOAD_ENCRYPT_KEY(o3) + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f60, %f0, %f0 + fxor %f62, %f2, %f2 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f60 + fxor %f54, %f0, %f62 + std %f60, [%o1 + 0x00] + std %f62, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] + .size sparc_hw_camellia_cbc_encrypt_3rounds,.-sparc_hw_camellia_cbc_encrypt_3rounds + + .align 32 + .globl sparc_hw_camellia_cbc_encrypt_4rounds + .type sparc_hw_camellia_cbc_encrypt_4rounds,#function +sparc_hw_camellia_cbc_encrypt_4rounds: + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + LOAD_ENCRYPT_KEY(o3) + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f60, %f0, %f0 + fxor %f62, %f2, %f2 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f60 + fxor %f22, %f0, %f62 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + std %f60, [%o1 + 0x00] + std %f62, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] + .size sparc_hw_camellia_cbc_encrypt_4rounds,.-sparc_hw_camellia_cbc_encrypt_4rounds + +#define LOAD_DECRYPT_KEY(REG, OFF) \ + ldd [%REG + OFF + 0x00], %f4; \ + ldd [%REG + OFF + 0x08], %f6; \ + ldd [%REG + OFF - 0x08], %f8; \ + ldd [%REG + OFF - 0x10], %f10; \ + ldd [%REG + OFF - 0x18], %f12; \ + ldd [%REG + OFF - 0x20], %f14; \ + ldd [%REG + OFF - 0x28], %f16; \ + ldd [%REG + OFF - 0x30], %f18; \ + ldd [%REG + OFF - 0x38], %f20; \ + ldd [%REG + OFF - 0x40], %f22; \ + ldd [%REG + OFF - 0x48], %f24; \ + ldd [%REG + OFF - 0x50], %f26; \ + ldd [%REG + OFF - 0x58], %f28; \ + ldd [%REG + OFF - 0x60], %f30; \ + ldd [%REG + OFF - 0x68], %f32; \ + ldd [%REG + OFF - 0x70], %f34; \ + ldd [%REG + OFF - 0x78], %f36; \ + ldd [%REG + OFF - 0x80], %f38; \ + ldd [%REG + OFF - 0x88], %f40; \ + ldd [%REG + OFF - 0x90], %f42; \ + ldd [%REG + OFF - 0x98], %f44; \ + ldd [%REG + OFF - 0xa0], %f46; \ + ldd [%REG + OFF - 0xa8], %f48; \ + ldd [%REG + OFF - 0xb0], %f50; + + .align 32 + .globl sparc_hw_camellia_cbc_decrypt_3rounds + .type sparc_hw_camellia_cbc_decrypt_3rounds,#function +sparc_hw_camellia_cbc_decrypt_3rounds: + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + LOAD_DECRYPT_KEY(o3, 0x0c0) + ldd [%o3 + 0x00], %f52 + ldd [%o3 + 0x08], %f54 + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f56 + ldd [%o0 + 0x08], %f58 + add %o0, 0x10, %o0 + fxor %f4, %f56, %f0 + fxor %f6, %f58, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + fxor %f60, %f2, %f2 + fxor %f62, %f0, %f0 + fsrc2 %f56, %f60 + fsrc2 %f58, %f62 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] + .size sparc_hw_camellia_cbc_decrypt_3rounds,.-sparc_hw_camellia_cbc_decrypt_3rounds + + .align 32 + .globl sparc_hw_camellia_cbc_decrypt_4rounds + .type sparc_hw_camellia_cbc_decrypt_4rounds,#function +sparc_hw_camellia_cbc_decrypt_4rounds: + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + LOAD_DECRYPT_KEY(o3, 0x100) + ldd [%o3 + 0x100 - 0xb8], %f52 + ldd [%o3 + 0x100 - 0xc0], %f54 + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f56 + ldd [%o0 + 0x08], %f58 + add %o0, 0x10, %o0 + fxor %f4, %f56, %f0 + fxor %f6, %f58, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0x100 - 0xc8], %f8 + ldd [%o3 + 0x100 - 0xd0], %f10 + ldd [%o3 + 0x100 - 0xd8], %f12 + ldd [%o3 + 0x100 - 0xe0], %f14 + ldd [%o3 + 0x100 - 0xe8], %f16 + ldd [%o3 + 0x100 - 0xf0], %f18 + ldd [%o3 + 0x00], %f20 + ldd [%o3 + 0x08], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x100 - 0x08], %f8 + ldd [%o3 + 0x100 - 0x10], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x100 - 0x18], %f12 + ldd [%o3 + 0x100 - 0x20], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x100 - 0x28], %f16 + ldd [%o3 + 0x100 - 0x30], %f18 + fxor %f20, %f2, %f2 + fxor %f22, %f0, %f0 + ldd [%o3 + 0x100 - 0x38], %f20 + ldd [%o3 + 0x100 - 0x40], %f22 + fxor %f60, %f2, %f2 + fxor %f62, %f0, %f0 + fsrc2 %f56, %f60 + fsrc2 %f58, %f62 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] + .size sparc_hw_camellia_cbc_decrypt_4rounds,.-sparc_hw_camellia_cbc_decrypt_4rounds diff --git a/crypto/camellia/cmll_sparccore.c b/crypto/camellia/cmll_sparccore.c new file mode 100644 index 0000000..0133a36 --- /dev/null +++ b/crypto/camellia/cmll_sparccore.c @@ -0,0 +1,219 @@ +#include <openssl/opensslv.h> +#include <openssl/camellia.h> +#include <openssl/crypto.h> +#include <openssl/modes.h> +#include "cmll_locl.h" + +#include "sparc_arch.h" + +const char CAMELLIA_version[]="CAMELLIA" OPENSSL_VERSION_PTEXT; + +#define Camellia_Ekeygen _generic_camellia_ekeygen +int _generic_camellia_ekeygen(int keyBitLength, const u8 *rawKey, + KEY_TABLE_TYPE k); + +#define Camellia_EncryptBlock_Rounds _generic_camellia_encryptblock_rounds +void _generic_camellia_encryptblock_rounds(int grandRounds, + const u8 plaintext[], + const KEY_TABLE_TYPE keyTable, + u8 ciphertext[]); + +#define Camellia_DecryptBlock_Rounds _generic_camellia_decryptblock_rounds +void _generic_camellia_decryptblock_rounds(int grandRounds, + const u8 ciphertext[], + const KEY_TABLE_TYPE keyTable, + u8 plaintext[]); + +#include "camellia.c" + +extern int sparc_hw_camellia_ekeygen(const unsigned char *userkey, + KEY_TABLE_TYPE key, const int bits); + +int Camellia_set_key(const unsigned char *userKey, const int bits, + CAMELLIA_KEY *key) +{ + int rnds; + + if (!userKey || !key) + return -1; + if (bits != 128 && bits != 192 && bits != 256) + return -2; + + if (OPENSSL_sparcv9cap_P & SPARCV9_CAMELLIA) + rnds = sparc_hw_camellia_ekeygen(userKey, key->u.rd_key, bits); + else + rnds = _generic_camellia_ekeygen(bits, userKey, key->u.rd_key); + + key->grand_rounds = rnds; + + return 0; +} + +typedef unsigned long long cmll_u64; + +extern void sparc_hw_camellia_encrypt(const KEY_TABLE_TYPE k, + const cmll_u64 *in, + cmll_u64 *out, int rounds); + +void Camellia_encrypt(const unsigned char *in, unsigned char *out, + const CAMELLIA_KEY *key) +{ + const cmll_u64 *aligned_in; + cmll_u64 *aligned_out; + cmll_u64 bounce[2]; + + if (!(OPENSSL_sparcv9cap_P & SPARCV9_CAMELLIA)) { + _generic_camellia_encryptblock_rounds(key->grand_rounds, in, + key->u.rd_key, out); + return; + } + + aligned_out = (cmll_u64 *) out; + if ((unsigned long) out & 0x3) + aligned_out = bounce; + aligned_in = (const cmll_u64 *) in; + if ((unsigned long) in & 0x3) { + memcpy(aligned_out, in, CAMELLIA_BLOCK_SIZE); + aligned_in = (const cmll_u64 *) aligned_out; + } + + sparc_hw_camellia_encrypt(key->u.rd_key, aligned_in, + aligned_out, key->grand_rounds); + + if (aligned_out == bounce) + memcpy(out, aligned_out, CAMELLIA_BLOCK_SIZE); +} + +extern void sparc_hw_camellia_decrypt(const KEY_TABLE_TYPE k, + const cmll_u64 *in, + cmll_u64 *out, int rounds); + +void Camellia_decrypt(const unsigned char *in, unsigned char *out, + const CAMELLIA_KEY *key) +{ + const cmll_u64 *aligned_in; + cmll_u64 *aligned_out; + cmll_u64 bounce[2]; + + if (!(OPENSSL_sparcv9cap_P & SPARCV9_CAMELLIA)) { + _generic_camellia_decryptblock_rounds(key->grand_rounds, in, + key->u.rd_key, out); + return; + } + + aligned_out = (cmll_u64 *) out; + if ((unsigned long) out & 0x3) + aligned_out = bounce; + aligned_in = (const cmll_u64 *) in; + if ((unsigned long) in & 0x3) { + memcpy(aligned_out, in, CAMELLIA_BLOCK_SIZE); + aligned_in = (const cmll_u64 *) aligned_out; + } + + sparc_hw_camellia_decrypt(key->u.rd_key, aligned_in, + aligned_out, key->grand_rounds); + + if (aligned_out == bounce) + memcpy(out, aligned_out, CAMELLIA_BLOCK_SIZE); +} + +extern void sparc_hw_camellia_cbc_encrypt_3rounds(const cmll_u64 *in, + cmll_u64 *out, + unsigned int length, + const KEY_TABLE_TYPE k, + cmll_u64 *IV); + +extern void sparc_hw_camellia_cbc_encrypt_4rounds(const cmll_u64 *in, + cmll_u64 *out, + unsigned int length, + const KEY_TABLE_TYPE k, + cmll_u64 *IV); + +extern void sparc_hw_camellia_cbc_decrypt_3rounds(const cmll_u64 *in, + cmll_u64 *out, + unsigned int length, + const KEY_TABLE_TYPE k, + cmll_u64 *IV); + +extern void sparc_hw_camellia_cbc_decrypt_4rounds(const cmll_u64 *in, + cmll_u64 *out, + unsigned int length, + const KEY_TABLE_TYPE k, + cmll_u64 *IV); + +void Camellia_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const CAMELLIA_KEY *key, + unsigned char *ivec, const int enc) +{ + const cmll_u64 *aligned_in; + cmll_u64 *aligned_ivec; + cmll_u64 *aligned_out; + cmll_u64 ivb[2]; + + if (!(OPENSSL_sparcv9cap_P & SPARCV9_CAMELLIA)) + goto slow; + + aligned_out = (cmll_u64 *) out; + if ((unsigned long) out & 0x7) { + aligned_out = OPENSSL_malloc(length); + if (!aligned_out) + goto slow; + } + + aligned_in = (const cmll_u64 *) in; + if ((unsigned long) in & 0x7) { + memcpy(aligned_out, in, length); + aligned_in = (const cmll_u64 *) aligned_out; + } + + aligned_ivec = (cmll_u64 *) ivec; + if ((unsigned long) ivec & 0x7) { + memcpy(ivb, ivec, sizeof(ivb)); + aligned_ivec = ivb; + } + + if (enc) { + if (key->grand_rounds == 3) + sparc_hw_camellia_cbc_encrypt_3rounds(aligned_in, + aligned_out, + length, + key->u.rd_key, + aligned_ivec); + else + sparc_hw_camellia_cbc_encrypt_4rounds(aligned_in, + aligned_out, + length, + key->u.rd_key, + aligned_ivec); + } else { + if (key->grand_rounds == 3) + sparc_hw_camellia_cbc_decrypt_3rounds(aligned_in, + aligned_out, + length, + key->u.rd_key, + aligned_ivec); + else + sparc_hw_camellia_cbc_decrypt_4rounds(aligned_in, + aligned_out, + length, + key->u.rd_key, + aligned_ivec); + } + if ((unsigned long) out & 0x7) { + memcpy(out, aligned_out, length); + OPENSSL_free(aligned_out); + } + + if (aligned_ivec == ivb) + memcpy(ivec, ivb, sizeof(ivb)); + + return; + +slow: + if (enc) + CRYPTO_cbc128_encrypt(in, out, length, key, ivec, + (block128_f)Camellia_encrypt); + else + CRYPTO_cbc128_decrypt(in, out, length, key, ivec, + (block128_f)Camellia_decrypt); +} diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h index 032d67c..fe9805d 100644 --- a/crypto/sparc_arch.h +++ b/crypto/sparc_arch.h @@ -61,6 +61,17 @@ extern int OPENSSL_sparcv9cap_P; #define AES_KEXPAND2(a,b,c) \ .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); +#define CAMELLIA_F(a,b,c,d) \ + .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define CAMELLIA_FL(a,b,c) \ + .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); +#define CAMELLIA_FLI(a,b,c) \ + .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); + +#define MOVDTOX_F0_O4 \ + .word 0x99b02200 +#define MOVDTOX_F2_O5 \ + .word 0x9bb02202 #define MOVXTOD_G3_F4 \ .word 0x89b02303; #define MOVXTOD_G5_F6 \ -- 1.7.10.4 ______________________________________________________________________ OpenSSL Project http://www.openssl.org Development Mailing List openssl-dev@openssl.org Automated List Manager majord...@openssl.org