Module Name: src Committed By: christos Date: Sun Mar 4 16:45:12 UTC 2018
Modified Files: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64: aesp8-ppc.S ghashp8-ppc.S ppccpuid.S Added Files: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64: chacha-ppc.S chacha.inc poly1305-ppc.S poly1305-ppcfp.S poly1305.inc Log Message: regen To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 \ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/aesp8-ppc.S \ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ghashp8-ppc.S \ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ppccpuid.S cvs rdiff -u -r0 -r1.1 \ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/chacha-ppc.S \ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/chacha.inc \ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305-ppc.S \ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305-ppcfp.S \ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305.inc Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/aesp8-ppc.S diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/aesp8-ppc.S:1.1 src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/aesp8-ppc.S:1.2 --- src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/aesp8-ppc.S:1.1 Sat Oct 15 08:19:02 2016 +++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/aesp8-ppc.S Sun Mar 4 11:45:12 2018 @@ -1868,3 +1868,1822 @@ _aesp8_ctr32_encrypt8x: .byte 0,12,0x04,0,0x80,6,6,0 .long 0 +.globl aes_p8_xts_encrypt +.type aes_p8_xts_encrypt,@function +.section ".opd","aw" +.align 3 +aes_p8_xts_encrypt: +.quad .aes_p8_xts_encrypt,.TOC.@tocbase,0 +.previous + +.align 5 +.aes_p8_xts_encrypt: + mr 10,3 + li 3,-1 + cmpldi 5,16 + bltlr + + lis 0,0xfff0 + mfspr 12,256 + li 11,0 + mtspr 256,0 + + vspltisb 9,0x07 + + + + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + + + cmpldi 7,0 + beq .Lxts_enc_no_key2 + + lvsl 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +.Ltweak_xts_enc: + vperm 1,1,0,7 +.long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 +.long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bc 16,0,.Ltweak_xts_enc + + vperm 1,1,0,7 +.long 0x11080D08 + lvx 1,3,7 + vperm 0,0,1,7 +.long 0x11080509 + + li 8,0 + b .Lxts_enc + +.Lxts_enc_no_key2: + li 3,-16 + and 5,5,3 + + +.Lxts_enc: + lvx 4,0,10 + addi 10,10,16 + + lvsl 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmpldi 5,96 + bge _aesp8_xts_encrypt6x + + andi. 7,5,15 + subic 0,5,32 + subi 7,7,16 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + b .Loop_xts_enc + +.align 5 +.Loop_xts_enc: + vperm 1,1,0,7 +.long 0x10420D08 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 +.long 0x10420508 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,.Loop_xts_enc + + vperm 1,1,0,7 +.long 0x10420D08 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,8 +.long 0x10620509 + + + nop + +.long 0x7C602799 + addi 4,4,16 + + subic. 5,5,16 + beq .Lxts_enc_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + subic 0,5,32 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 3,3,0 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmpldi 5,16 + bge .Loop_xts_enc + + vxor 3,3,8 + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + subi 11,4,17 + subi 4,4,16 + mtctr 5 + li 5,16 +.Loop_xts_enc_steal: + lbzu 0,1(11) + stb 0,16(11) + bc 16,0,.Loop_xts_enc_steal + + mtctr 9 + b .Loop_xts_enc + +.Lxts_enc_done: + cmpldi 8,0 + beq .Lxts_enc_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + +.long 0x7D004799 + +.Lxts_enc_ret: + mtspr 256,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + + +.globl aes_p8_xts_decrypt +.type aes_p8_xts_decrypt,@function +.section ".opd","aw" +.align 3 +aes_p8_xts_decrypt: +.quad .aes_p8_xts_decrypt,.TOC.@tocbase,0 +.previous + +.align 5 +.aes_p8_xts_decrypt: + mr 10,3 + li 3,-1 + cmpldi 5,16 + bltlr + + lis 0,0xfff8 + mfspr 12,256 + li 11,0 + mtspr 256,0 + + andi. 0,5,15 + neg 0,0 + andi. 0,0,16 + sub 5,5,0 + + vspltisb 9,0x07 + + + + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + + + cmpldi 7,0 + beq .Lxts_dec_no_key2 + + lvsl 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +.Ltweak_xts_dec: + vperm 1,1,0,7 +.long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 +.long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bc 16,0,.Ltweak_xts_dec + + vperm 1,1,0,7 +.long 0x11080D08 + lvx 1,3,7 + vperm 0,0,1,7 +.long 0x11080509 + + li 8,0 + b .Lxts_dec + +.Lxts_dec_no_key2: + neg 3,5 + andi. 3,3,15 + add 5,5,3 + + +.Lxts_dec: + lvx 4,0,10 + addi 10,10,16 + + lvsl 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmpldi 5,96 + bge _aesp8_xts_decrypt6x + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + + cmpldi 5,16 + blt .Ltail_xts_dec + b .Loop_xts_dec + +.align 5 +.Loop_xts_dec: + vperm 1,1,0,7 +.long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 +.long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,.Loop_xts_dec + + vperm 1,1,0,7 +.long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,8 +.long 0x10620549 + + + nop + +.long 0x7C602799 + addi 4,4,16 + + subic. 5,5,16 + beq .Lxts_dec_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmpldi 5,16 + bge .Loop_xts_dec + +.Ltail_xts_dec: + vsrab 11,8,9 + vaddubm 12,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 12,12,11 + + subi 10,10,16 + add 10,10,5 + + vxor 2,2,8 + vxor 2,2,12 + +.Loop_xts_dec_short: + vperm 1,1,0,7 +.long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 +.long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,.Loop_xts_dec_short + + vperm 1,1,0,7 +.long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,12 +.long 0x10620549 + + + nop + +.long 0x7C602799 + + vor 2,4,4 + lvx 4,0,10 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + vxor 0,0,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + subi 11,4,1 + mtctr 5 + li 5,16 +.Loop_xts_dec_steal: + lbzu 0,1(11) + stb 0,16(11) + bc 16,0,.Loop_xts_dec_steal + + mtctr 9 + b .Loop_xts_dec + +.Lxts_dec_done: + cmpldi 8,0 + beq .Lxts_dec_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + +.long 0x7D004799 + +.Lxts_dec_ret: + mtspr 256,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_encrypt6x: + stdu 1,-448(1) + mflr 11 + li 7,207 + li 3,223 + std 11,464(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,396(1) + li 3,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,7 + addi 7,1,64+15 + mtctr 9 + +.Load_xts_enc_key: + vperm 24,30,31,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,31,30,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bc 16,0,.Load_xts_enc_key + + lvx 26,3,6 + vperm 24,30,31,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,31,26,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,64+15 + vperm 26,26,27,7 + lvx 29,28,6 + vperm 27,27,28,7 + lvx 30,29,6 + vperm 28,28,29,7 + lvx 31,30,6 + vperm 29,29,30,7 + lvx 22,31,6 + vperm 30,30,31,7 + lvx 24,0,7 + vperm 31,31,22,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 7,0,17 + vxor 8,8,11 + +.long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 12,1,18 + vxor 8,8,11 + +.long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 13,2,19 + vxor 8,8,11 + +.long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 14,3,20 + vxor 8,8,11 + +.long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 15,4,21 + vxor 8,8,11 + +.long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 16,5,22 + vxor 8,8,11 + + vxor 31,31,23 + mtctr 9 + b .Loop_xts_enc6x + +.align 5 +.Loop_xts_enc6x: +.long 0x10E7C508 +.long 0x118CC508 +.long 0x11ADC508 +.long 0x11CEC508 +.long 0x11EFC508 +.long 0x1210C508 + lvx 24,26,7 + addi 7,7,0x20 + +.long 0x10E7CD08 +.long 0x118CCD08 +.long 0x11ADCD08 +.long 0x11CECD08 +.long 0x11EFCD08 +.long 0x1210CD08 + lvx 25,3,7 + bc 16,0,.Loop_xts_enc6x + + subic 5,5,96 + vxor 0,17,31 +.long 0x10E7C508 +.long 0x118CC508 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 +.long 0x11ADC508 +.long 0x11CEC508 + vsldoi 11,11,11,15 +.long 0x11EFC508 +.long 0x1210C508 + + subfe. 0,0,0 + vand 11,11,10 +.long 0x10E7CD08 +.long 0x118CCD08 + vxor 8,8,11 +.long 0x11ADCD08 +.long 0x11CECD08 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 +.long 0x11EFCD08 +.long 0x1210CD08 + + and 0,0,5 + vaddubm 8,8,8 + vsldoi 11,11,11,15 +.long 0x10E7D508 +.long 0x118CD508 + vand 11,11,10 +.long 0x11ADD508 +.long 0x11CED508 + vxor 8,8,11 +.long 0x11EFD508 +.long 0x1210D508 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 +.long 0x10E7DD08 +.long 0x118CDD08 + vsldoi 11,11,11,15 +.long 0x11ADDD08 +.long 0x11CEDD08 + vand 11,11,10 +.long 0x11EFDD08 +.long 0x1210DD08 + + addi 7,1,64+15 + vxor 8,8,11 +.long 0x10E7E508 +.long 0x118CE508 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 +.long 0x11ADE508 +.long 0x11CEE508 + vaddubm 8,8,8 + vsldoi 11,11,11,15 +.long 0x11EFE508 +.long 0x1210E508 + lvx 24,0,7 + vand 11,11,10 + +.long 0x10E7ED08 +.long 0x118CED08 + vxor 8,8,11 +.long 0x11ADED08 +.long 0x11CEED08 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 +.long 0x11EFED08 +.long 0x1210ED08 + lvx 25,3,7 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + +.long 0x10E7F508 +.long 0x118CF508 + vand 11,11,10 +.long 0x11ADF508 +.long 0x11CEF508 + vxor 8,8,11 +.long 0x11EFF508 +.long 0x1210F508 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + +.long 0x10E70509 +.long 0x7C005699 + vaddubm 8,8,8 + vsldoi 11,11,11,15 +.long 0x118C0D09 +.long 0x7C235699 +.long 0x11AD1509 + +.long 0x7C5A5699 + vand 11,11,10 +.long 0x11CE1D09 + +.long 0x7C7B5699 +.long 0x11EF2509 + +.long 0x7C9C5699 + vxor 8,8,11 +.long 0x11702D09 + + +.long 0x7CBD5699 + addi 10,10,0x60 + + + + + +.long 0x7CE02799 + vxor 7,0,17 + +.long 0x7D832799 + vxor 12,1,18 + +.long 0x7DBA2799 + vxor 13,2,19 + +.long 0x7DDB2799 + vxor 14,3,20 + +.long 0x7DFC2799 + vxor 15,4,21 + +.long 0x7D7D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq .Loop_xts_enc6x + + addic. 5,5,0x60 + beq .Lxts_enc6x_zero + cmpwi 5,0x20 + blt .Lxts_enc6x_one + nop + beq .Lxts_enc6x_two + cmpwi 5,0x40 + blt .Lxts_enc6x_three + nop + beq .Lxts_enc6x_four + +.Lxts_enc6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_enc5x + + + vor 17,22,22 + +.long 0x7CE02799 + +.long 0x7D832799 + +.long 0x7DBA2799 + vxor 11,15,22 + +.long 0x7DDB2799 +.long 0x7DFC2799 + addi 4,4,0x50 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,21,21 + +.long 0x7CE02799 + +.long 0x7D832799 + vxor 11,14,21 + +.long 0x7DBA2799 +.long 0x7DDB2799 + addi 4,4,0x40 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,20,20 + +.long 0x7CE02799 + vxor 11,13,20 + +.long 0x7D832799 +.long 0x7DBA2799 + addi 4,4,0x30 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,19,19 + vxor 11,12,19 + +.long 0x7CE02799 +.long 0x7D832799 + addi 4,4,0x20 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_one: + vxor 7,5,17 + nop +.Loop_xts_enc1x: +.long 0x10E7C508 + lvx 24,26,7 + addi 7,7,0x20 + +.long 0x10E7CD08 + lvx 25,3,7 + bc 16,0,.Loop_xts_enc1x + + add 10,10,31 + cmpwi 31,0 +.long 0x10E7C508 + + subi 10,10,16 +.long 0x10E7CD08 + + lvsr 5,0,31 +.long 0x10E7D508 + +.long 0x7C005699 +.long 0x10E7DD08 + + addi 7,1,64+15 +.long 0x10E7E508 + lvx 24,0,7 + +.long 0x10E7ED08 + lvx 25,3,7 + vxor 17,17,31 + + +.long 0x10E7F508 + + vperm 0,0,0,5 +.long 0x10E78D09 + + vor 17,18,18 + vxor 11,7,18 + +.long 0x7CE02799 + addi 4,4,0x10 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_zero: + cmpwi 31,0 + beq .Lxts_enc6x_done + + add 10,10,31 + subi 10,10,16 +.long 0x7C005699 + lvsr 5,0,31 + + vperm 0,0,0,5 + vxor 11,11,17 +.Lxts_enc6x_steal: + vxor 0,0,17 + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + + subi 30,4,17 + subi 4,4,16 + mtctr 31 +.Loop_xts_enc6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bc 16,0,.Loop_xts_enc6x_steal + + li 31,0 + mtctr 9 + b .Loop_xts_enc1x + +.align 4 +.Lxts_enc6x_done: + cmpldi 8,0 + beq .Lxts_enc6x_ret + + vxor 8,17,23 + +.long 0x7D004799 + +.Lxts_enc6x_ret: + mtlr 11 + li 10,79 + li 11,95 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_enc5x: +.long 0x10E7C508 +.long 0x118CC508 +.long 0x11ADC508 +.long 0x11CEC508 +.long 0x11EFC508 + lvx 24,26,7 + addi 7,7,0x20 + +.long 0x10E7CD08 +.long 0x118CCD08 +.long 0x11ADCD08 +.long 0x11CECD08 +.long 0x11EFCD08 + lvx 25,3,7 + bc 16,0,_aesp8_xts_enc5x + + add 10,10,31 + cmpwi 31,0 +.long 0x10E7C508 +.long 0x118CC508 +.long 0x11ADC508 +.long 0x11CEC508 +.long 0x11EFC508 + + subi 10,10,16 +.long 0x10E7CD08 +.long 0x118CCD08 +.long 0x11ADCD08 +.long 0x11CECD08 +.long 0x11EFCD08 + vxor 17,17,31 + +.long 0x10E7D508 + lvsr 5,0,31 +.long 0x118CD508 +.long 0x11ADD508 +.long 0x11CED508 +.long 0x11EFD508 + vxor 1,18,31 + +.long 0x10E7DD08 +.long 0x7C005699 +.long 0x118CDD08 +.long 0x11ADDD08 +.long 0x11CEDD08 +.long 0x11EFDD08 + vxor 2,19,31 + + addi 7,1,64+15 +.long 0x10E7E508 +.long 0x118CE508 +.long 0x11ADE508 +.long 0x11CEE508 +.long 0x11EFE508 + lvx 24,0,7 + vxor 3,20,31 + +.long 0x10E7ED08 + +.long 0x118CED08 +.long 0x11ADED08 +.long 0x11CEED08 +.long 0x11EFED08 + lvx 25,3,7 + vxor 4,21,31 + +.long 0x10E7F508 + vperm 0,0,0,5 +.long 0x118CF508 +.long 0x11ADF508 +.long 0x11CEF508 +.long 0x11EFF508 + +.long 0x10E78D09 +.long 0x118C0D09 +.long 0x11AD1509 +.long 0x11CE1D09 +.long 0x11EF2509 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + stdu 1,-448(1) + mflr 11 + li 7,207 + li 3,223 + std 11,464(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,396(1) + li 3,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,7 + addi 7,1,64+15 + mtctr 9 + +.Load_xts_dec_key: + vperm 24,30,31,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,31,30,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bc 16,0,.Load_xts_dec_key + + lvx 26,3,6 + vperm 24,30,31,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,31,26,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,64+15 + vperm 26,26,27,7 + lvx 29,28,6 + vperm 27,27,28,7 + lvx 30,29,6 + vperm 28,28,29,7 + lvx 31,30,6 + vperm 29,29,30,7 + lvx 22,31,6 + vperm 30,30,31,7 + lvx 24,0,7 + vperm 31,31,22,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 7,0,17 + vxor 8,8,11 + +.long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 12,1,18 + vxor 8,8,11 + +.long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 13,2,19 + vxor 8,8,11 + +.long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 14,3,20 + vxor 8,8,11 + +.long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 15,4,21 + vxor 8,8,11 + +.long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 16,5,22 + vxor 8,8,11 + + vxor 31,31,23 + mtctr 9 + b .Loop_xts_dec6x + +.align 5 +.Loop_xts_dec6x: +.long 0x10E7C548 +.long 0x118CC548 +.long 0x11ADC548 +.long 0x11CEC548 +.long 0x11EFC548 +.long 0x1210C548 + lvx 24,26,7 + addi 7,7,0x20 + +.long 0x10E7CD48 +.long 0x118CCD48 +.long 0x11ADCD48 +.long 0x11CECD48 +.long 0x11EFCD48 +.long 0x1210CD48 + lvx 25,3,7 + bc 16,0,.Loop_xts_dec6x + + subic 5,5,96 + vxor 0,17,31 +.long 0x10E7C548 +.long 0x118CC548 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 +.long 0x11ADC548 +.long 0x11CEC548 + vsldoi 11,11,11,15 +.long 0x11EFC548 +.long 0x1210C548 + + subfe. 0,0,0 + vand 11,11,10 +.long 0x10E7CD48 +.long 0x118CCD48 + vxor 8,8,11 +.long 0x11ADCD48 +.long 0x11CECD48 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 +.long 0x11EFCD48 +.long 0x1210CD48 + + and 0,0,5 + vaddubm 8,8,8 + vsldoi 11,11,11,15 +.long 0x10E7D548 +.long 0x118CD548 + vand 11,11,10 +.long 0x11ADD548 +.long 0x11CED548 + vxor 8,8,11 +.long 0x11EFD548 +.long 0x1210D548 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 +.long 0x10E7DD48 +.long 0x118CDD48 + vsldoi 11,11,11,15 +.long 0x11ADDD48 +.long 0x11CEDD48 + vand 11,11,10 +.long 0x11EFDD48 +.long 0x1210DD48 + + addi 7,1,64+15 + vxor 8,8,11 +.long 0x10E7E548 +.long 0x118CE548 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 +.long 0x11ADE548 +.long 0x11CEE548 + vaddubm 8,8,8 + vsldoi 11,11,11,15 +.long 0x11EFE548 +.long 0x1210E548 + lvx 24,0,7 + vand 11,11,10 + +.long 0x10E7ED48 +.long 0x118CED48 + vxor 8,8,11 +.long 0x11ADED48 +.long 0x11CEED48 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 +.long 0x11EFED48 +.long 0x1210ED48 + lvx 25,3,7 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + +.long 0x10E7F548 +.long 0x118CF548 + vand 11,11,10 +.long 0x11ADF548 +.long 0x11CEF548 + vxor 8,8,11 +.long 0x11EFF548 +.long 0x1210F548 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + +.long 0x10E70549 +.long 0x7C005699 + vaddubm 8,8,8 + vsldoi 11,11,11,15 +.long 0x118C0D49 +.long 0x7C235699 +.long 0x11AD1549 + +.long 0x7C5A5699 + vand 11,11,10 +.long 0x11CE1D49 + +.long 0x7C7B5699 +.long 0x11EF2549 + +.long 0x7C9C5699 + vxor 8,8,11 +.long 0x12102D49 + +.long 0x7CBD5699 + addi 10,10,0x60 + + + + + +.long 0x7CE02799 + vxor 7,0,17 + +.long 0x7D832799 + vxor 12,1,18 + +.long 0x7DBA2799 + vxor 13,2,19 + +.long 0x7DDB2799 + vxor 14,3,20 + +.long 0x7DFC2799 + vxor 15,4,21 +.long 0x7E1D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq .Loop_xts_dec6x + + addic. 5,5,0x60 + beq .Lxts_dec6x_zero + cmpwi 5,0x20 + blt .Lxts_dec6x_one + nop + beq .Lxts_dec6x_two + cmpwi 5,0x40 + blt .Lxts_dec6x_three + nop + beq .Lxts_dec6x_four + +.Lxts_dec6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_dec5x + + + vor 17,22,22 + vxor 18,8,23 + +.long 0x7CE02799 + vxor 7,0,18 + +.long 0x7D832799 + +.long 0x7DBA2799 + +.long 0x7DDB2799 +.long 0x7DFC2799 + addi 4,4,0x50 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,21,21 + vor 18,22,22 + +.long 0x7CE02799 + vxor 7,0,22 + +.long 0x7D832799 + +.long 0x7DBA2799 +.long 0x7DDB2799 + addi 4,4,0x40 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,20,20 + vor 18,21,21 + +.long 0x7CE02799 + vxor 7,0,21 + +.long 0x7D832799 +.long 0x7DBA2799 + addi 4,4,0x30 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,19,19 + vor 18,20,20 + +.long 0x7CE02799 + vxor 7,0,20 +.long 0x7D832799 + addi 4,4,0x20 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_one: + vxor 7,5,17 + nop +.Loop_xts_dec1x: +.long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + +.long 0x10E7CD48 + lvx 25,3,7 + bc 16,0,.Loop_xts_dec1x + + subi 0,31,1 +.long 0x10E7C548 + + andi. 0,0,16 + cmpwi 31,0 +.long 0x10E7CD48 + + sub 10,10,0 +.long 0x10E7D548 + +.long 0x7C005699 +.long 0x10E7DD48 + + addi 7,1,64+15 +.long 0x10E7E548 + lvx 24,0,7 + +.long 0x10E7ED48 + lvx 25,3,7 + vxor 17,17,31 + + +.long 0x10E7F548 + + mtctr 9 +.long 0x10E78D49 + + vor 17,18,18 + vor 18,19,19 + +.long 0x7CE02799 + addi 4,4,0x10 + vxor 7,0,19 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_zero: + cmpwi 31,0 + beq .Lxts_dec6x_done + +.long 0x7C005699 + + vxor 7,0,18 +.Lxts_dec6x_steal: +.long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + +.long 0x10E7CD48 + lvx 25,3,7 + bc 16,0,.Lxts_dec6x_steal + + add 10,10,31 +.long 0x10E7C548 + + cmpwi 31,0 +.long 0x10E7CD48 + +.long 0x7C005699 +.long 0x10E7D548 + + lvsr 5,0,31 +.long 0x10E7DD48 + + addi 7,1,64+15 +.long 0x10E7E548 + lvx 24,0,7 + +.long 0x10E7ED48 + lvx 25,3,7 + vxor 18,18,31 + + +.long 0x10E7F548 + + vperm 0,0,0,5 +.long 0x11679549 + + + +.long 0x7D602799 + + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + vxor 7,7,17 + + subi 30,4,1 + mtctr 31 +.Loop_xts_dec6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bc 16,0,.Loop_xts_dec6x_steal + + li 31,0 + mtctr 9 + b .Loop_xts_dec1x + +.align 4 +.Lxts_dec6x_done: + cmpldi 8,0 + beq .Lxts_dec6x_ret + + vxor 8,17,23 + +.long 0x7D004799 + +.Lxts_dec6x_ret: + mtlr 11 + li 10,79 + li 11,95 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_dec5x: +.long 0x10E7C548 +.long 0x118CC548 +.long 0x11ADC548 +.long 0x11CEC548 +.long 0x11EFC548 + lvx 24,26,7 + addi 7,7,0x20 + +.long 0x10E7CD48 +.long 0x118CCD48 +.long 0x11ADCD48 +.long 0x11CECD48 +.long 0x11EFCD48 + lvx 25,3,7 + bc 16,0,_aesp8_xts_dec5x + + subi 0,31,1 +.long 0x10E7C548 +.long 0x118CC548 +.long 0x11ADC548 +.long 0x11CEC548 +.long 0x11EFC548 + + andi. 0,0,16 + cmpwi 31,0 +.long 0x10E7CD48 +.long 0x118CCD48 +.long 0x11ADCD48 +.long 0x11CECD48 +.long 0x11EFCD48 + vxor 17,17,31 + + sub 10,10,0 +.long 0x10E7D548 +.long 0x118CD548 +.long 0x11ADD548 +.long 0x11CED548 +.long 0x11EFD548 + vxor 1,18,31 + +.long 0x10E7DD48 +.long 0x7C005699 +.long 0x118CDD48 +.long 0x11ADDD48 +.long 0x11CEDD48 +.long 0x11EFDD48 + vxor 2,19,31 + + addi 7,1,64+15 +.long 0x10E7E548 +.long 0x118CE548 +.long 0x11ADE548 +.long 0x11CEE548 +.long 0x11EFE548 + lvx 24,0,7 + vxor 3,20,31 + +.long 0x10E7ED48 + +.long 0x118CED48 +.long 0x11ADED48 +.long 0x11CEED48 +.long 0x11EFED48 + lvx 25,3,7 + vxor 4,21,31 + +.long 0x10E7F548 +.long 0x118CF548 +.long 0x11ADF548 +.long 0x11CEF548 +.long 0x11EFF548 + +.long 0x10E78D49 +.long 0x118C0D49 +.long 0x11AD1549 +.long 0x11CE1D49 +.long 0x11EF2549 + mtctr 9 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ghashp8-ppc.S diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ghashp8-ppc.S:1.1 src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ghashp8-ppc.S:1.2 --- src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ghashp8-ppc.S:1.1 Sat Oct 15 08:19:02 2016 +++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ghashp8-ppc.S Sun Mar 4 11:45:12 2018 @@ -12,7 +12,7 @@ gcm_init_p8: .align 5 .gcm_init_p8: - lis 0,0xfff0 + li 0,-4096 li 8,0x10 mfspr 12,256 li 9,0x20 @@ -34,17 +34,99 @@ gcm_init_p8: vsl 9,9,5 vsrab 6,6,7 vand 6,6,8 - vxor 9,9,6 + vxor 3,9,6 - vsldoi 9,9,9,8 + vsldoi 9,3,3,8 vsldoi 8,4,8,8 vsldoi 11,4,9,8 vsldoi 10,9,4,8 .long 0x7D001F99 .long 0x7D681F99 + li 8,0x40 .long 0x7D291F99 + li 9,0x50 .long 0x7D4A1F99 + li 10,0x60 + +.long 0x10035CC8 +.long 0x10234CC8 +.long 0x104354C8 + +.long 0x10E044C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 +.long 0x100044C8 + vxor 6,6,2 + vxor 16,0,6 + + vsldoi 17,16,16,8 + vsldoi 19,4,17,8 + vsldoi 18,17,4,8 + +.long 0x7E681F99 + li 8,0x70 +.long 0x7E291F99 + li 9,0x80 +.long 0x7E4A1F99 + li 10,0x90 +.long 0x10039CC8 +.long 0x11B09CC8 +.long 0x10238CC8 +.long 0x11D08CC8 +.long 0x104394C8 +.long 0x11F094C8 + +.long 0x10E044C8 +.long 0x114D44C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vsldoi 11,14,4,8 + vsldoi 9,4,14,8 + vxor 0,0,5 + vxor 2,2,6 + vxor 13,13,11 + vxor 15,15,9 + + vsldoi 0,0,0,8 + vsldoi 13,13,13,8 + vxor 0,0,7 + vxor 13,13,10 + + vsldoi 6,0,0,8 + vsldoi 9,13,13,8 +.long 0x100044C8 +.long 0x11AD44C8 + vxor 6,6,2 + vxor 9,9,15 + vxor 0,0,6 + vxor 13,13,9 + + vsldoi 9,0,0,8 + vsldoi 17,13,13,8 + vsldoi 11,4,9,8 + vsldoi 10,9,4,8 + vsldoi 19,4,17,8 + vsldoi 18,17,4,8 + +.long 0x7D681F99 + li 8,0xa0 +.long 0x7D291F99 + li 9,0xb0 +.long 0x7D4A1F99 + li 10,0xc0 +.long 0x7E681F99 +.long 0x7E291F99 +.long 0x7E4A1F99 mtspr 256,12 blr @@ -52,7 +134,6 @@ gcm_init_p8: .byte 0,12,0x14,0,0,0,2,0 .long 0 - .globl gcm_gmult_p8 .type gcm_gmult_p8,@function .section ".opd","aw" @@ -120,7 +201,7 @@ gcm_ghash_p8: .align 5 .gcm_ghash_p8: - lis 0,0xfff8 + li 0,-4096 li 8,0x10 mfspr 12,256 li 9,0x20 @@ -129,43 +210,66 @@ gcm_ghash_p8: .long 0x7C001E99 .long 0x7D682699 + li 8,0x40 .long 0x7D292699 + li 9,0x50 .long 0x7D4A2699 + li 10,0x60 .long 0x7D002699 vxor 4,4,4 + cmpldi 6,64 + bge .Lgcm_ghash_p8_4x + .long 0x7C602E99 addi 5,5,16 - subi 6,6,16 + subic. 6,6,16 vxor 3,3,0 - b .Loop + beq .Lshort + +.long 0x7E682699 + li 8,16 +.long 0x7E292699 + add 9,5,6 +.long 0x7E4A2699 + b .Loop_2x .align 5 -.Loop: - subic 6,6,16 -.long 0x10035CC8 - subfe. 0,0,0 -.long 0x10234CC8 +.Loop_2x: +.long 0x7E002E99 + + + subic 6,6,32 +.long 0x10039CC8 +.long 0x11B05CC8 + subfe 0,0,0 +.long 0x10238CC8 +.long 0x11D04CC8 and 0,0,6 -.long 0x104354C8 +.long 0x104394C8 +.long 0x11F054C8 add 5,5,0 + vxor 0,0,13 + vxor 1,1,14 + .long 0x10E044C8 vsldoi 5,1,4,8 vsldoi 6,4,1,8 + vxor 2,2,15 vxor 0,0,5 vxor 2,2,6 vsldoi 0,0,0,8 vxor 0,0,7 -.long 0x7C602E99 - addi 5,5,16 +.long 0x7C682E99 + addi 5,5,32 vsldoi 6,0,0,8 .long 0x100044C8 @@ -173,8 +277,32 @@ gcm_ghash_p8: vxor 6,6,2 vxor 3,3,6 vxor 3,3,0 - beq .Loop + cmpld 9,5 + bgt .Loop_2x + + cmplwi 6,0 + bne .Leven + +.Lshort: +.long 0x10035CC8 +.long 0x10234CC8 +.long 0x104354C8 + +.long 0x10E044C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 +.long 0x100044C8 + vxor 6,6,2 +.Leven: vxor 0,0,6 .long 0x7C001F99 @@ -184,6 +312,273 @@ gcm_ghash_p8: .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 +.align 5 +.gcm_ghash_p8_4x: +.Lgcm_ghash_p8_4x: + stdu 1,-256(1) + li 10,63 + li 11,79 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + li 10,0x60 + stvx 31,11,1 + li 0,-1 + stw 12,252(1) + mtspr 256,0 + + lvsl 5,0,8 + + li 8,0x70 +.long 0x7E292699 + li 9,0x80 + vspltisb 6,8 + + li 10,0x90 +.long 0x7EE82699 + li 8,0xa0 +.long 0x7F092699 + li 9,0xb0 +.long 0x7F2A2699 + li 10,0xc0 +.long 0x7FA82699 + li 8,0x10 +.long 0x7FC92699 + li 9,0x20 +.long 0x7FEA2699 + li 10,0x30 + + vsldoi 7,4,6,8 + vaddubm 18,5,7 + vaddubm 19,6,18 + + srdi 6,6,4 + +.long 0x7C602E99 +.long 0x7E082E99 + subic. 6,6,8 +.long 0x7EC92E99 +.long 0x7F8A2E99 + addi 5,5,0x40 + + + + + + vxor 2,3,0 + +.long 0x11B0BCC8 +.long 0x11D0C4C8 +.long 0x11F0CCC8 + + vperm 11,17,9,18 + vperm 5,22,28,19 + vperm 10,17,9,19 + vperm 6,22,28,18 +.long 0x12B68CC8 +.long 0x12855CC8 +.long 0x137C4CC8 +.long 0x134654C8 + + vxor 21,21,14 + vxor 20,20,13 + vxor 27,27,21 + vxor 26,26,15 + + blt .Ltail_4x + +.Loop_4x: +.long 0x7C602E99 +.long 0x7E082E99 + subic. 6,6,4 +.long 0x7EC92E99 +.long 0x7F8A2E99 + addi 5,5,0x40 + + + + + +.long 0x1002ECC8 +.long 0x1022F4C8 +.long 0x1042FCC8 +.long 0x11B0BCC8 +.long 0x11D0C4C8 +.long 0x11F0CCC8 + + vxor 0,0,20 + vxor 1,1,27 + vxor 2,2,26 + vperm 5,22,28,19 + vperm 6,22,28,18 + +.long 0x10E044C8 +.long 0x12855CC8 +.long 0x134654C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 +.long 0x12B68CC8 +.long 0x137C4CC8 +.long 0x100044C8 + + vxor 20,20,13 + vxor 26,26,15 + vxor 2,2,3 + vxor 21,21,14 + vxor 2,2,6 + vxor 27,27,21 + vxor 2,2,0 + bge .Loop_4x + +.Ltail_4x: +.long 0x1002ECC8 +.long 0x1022F4C8 +.long 0x1042FCC8 + + vxor 0,0,20 + vxor 1,1,27 + +.long 0x10E044C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 2,2,26 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 +.long 0x100044C8 + vxor 6,6,2 + vxor 0,0,6 + + addic. 6,6,4 + beq .Ldone_4x + +.long 0x7C602E99 + cmpldi 6,2 + li 6,-4 + blt .Lone +.long 0x7E082E99 + beq .Ltwo + +.Lthree: +.long 0x7EC92E99 + + + + + vxor 2,3,0 + vor 29,23,23 + vor 30, 24, 24 + vor 31,25,25 + + vperm 5,16,22,19 + vperm 6,16,22,18 +.long 0x12B08CC8 +.long 0x13764CC8 +.long 0x12855CC8 +.long 0x134654C8 + + vxor 27,27,21 + b .Ltail_4x + +.align 4 +.Ltwo: + + + + vxor 2,3,0 + vperm 5,4,16,19 + vperm 6,4,16,18 + + vsldoi 29,4,17,8 + vor 30, 17, 17 + vsldoi 31,17,4,8 + +.long 0x12855CC8 +.long 0x13704CC8 +.long 0x134654C8 + + b .Ltail_4x + +.align 4 +.Lone: + + + vsldoi 29,4,9,8 + vor 30, 9, 9 + vsldoi 31,9,4,8 + + vxor 2,3,0 + vxor 20,20,20 + vxor 27,27,27 + vxor 26,26,26 + + b .Ltail_4x + +.Ldone_4x: + +.long 0x7C001F99 + + li 10,63 + li 11,79 + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + addi 1,1,256 + blr +.long 0 +.byte 0,12,0x04,0,0x80,0,4,0 +.long 0 .byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ppccpuid.S diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ppccpuid.S:1.1 src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ppccpuid.S:1.2 --- src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ppccpuid.S:1.1 Sat Oct 15 08:19:02 2016 +++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ppccpuid.S Sun Mar 4 11:45:12 2018 @@ -1,6 +1,21 @@ .machine "any" .text +.globl OPENSSL_fpu_probe +.type OPENSSL_fpu_probe,@function +.section ".opd","aw" +.align 3 +OPENSSL_fpu_probe: +.quad .OPENSSL_fpu_probe,.TOC.@tocbase,0 +.previous + +.align 4 +.OPENSSL_fpu_probe: + fmr 0,0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + .globl OPENSSL_ppc64_probe .type OPENSSL_ppc64_probe,@function .section ".opd","aw" @@ -51,6 +66,23 @@ OPENSSL_crypto207_probe: .byte 0,12,0x14,0,0,0,0,0 +.globl OPENSSL_madd300_probe +.type OPENSSL_madd300_probe,@function +.section ".opd","aw" +.align 3 +OPENSSL_madd300_probe: +.quad .OPENSSL_madd300_probe,.TOC.@tocbase,0 +.previous + +.align 4 +.OPENSSL_madd300_probe: + xor 0,0,0 +.long 0x10600033 +.long 0x10600031 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + .globl OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,@function .section ".opd","aw" @@ -122,12 +154,7 @@ OPENSSL_rdtsc: .align 4 .OPENSSL_rdtsc: -.Lrdtsc: - mftb 5 - mftbu 4 mftb 3 - cmplw 0,3,5 - bne .Lrdtsc blr .long 0 .byte 0,12,0x14,0,0,0,0,0 @@ -172,3 +199,133 @@ OPENSSL_cleanse: .byte 0,12,0x14,0,0,0,2,0 .long 0 + +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,@function +.section ".opd","aw" +.align 3 +CRYPTO_memcmp: +.quad .CRYPTO_memcmp,.TOC.@tocbase,0 +.previous + +.align 4 +.CRYPTO_memcmp: + cmpldi 5,0 + li 0,0 + beq .Lno_data + mtctr 5 +.Loop_cmp: + lbz 6,0(3) + addi 3,3,1 + lbz 7,0(4) + addi 4,4,1 + xor 6,6,7 + or 0,0,6 + bc 16,0,.Loop_cmp + +.Lno_data: + li 3,0 + sub 3,3,0 + extrwi 3,3,1,0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 + +.globl OPENSSL_instrument_bus +.type OPENSSL_instrument_bus,@function +.section ".opd","aw" +.align 3 +OPENSSL_instrument_bus: +.quad .OPENSSL_instrument_bus,.TOC.@tocbase,0 +.previous + +.align 4 +.OPENSSL_instrument_bus: + mtctr 4 + + mftb 7 + li 8,0 + + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + +.Loop: mftb 6 + sub 8,6,7 + mr 7,6 + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + addi 3,3,4 + bc 16,0,.Loop + + mr 3,4 + blr +.long 0 +.byte 0,12,0x14,0,0,0,2,0 +.long 0 + + +.globl OPENSSL_instrument_bus2 +.type OPENSSL_instrument_bus2,@function +.section ".opd","aw" +.align 3 +OPENSSL_instrument_bus2: +.quad .OPENSSL_instrument_bus2,.TOC.@tocbase,0 +.previous + +.align 4 +.OPENSSL_instrument_bus2: + mr 0,4 + slwi 4,4,2 + + mftb 7 + li 8,0 + + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + + mftb 6 + sub 8,6,7 + mr 7,6 + mr 9,8 +.Loop2: + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + + addic. 5,5,-1 + beq .Ldone2 + + mftb 6 + sub 8,6,7 + mr 7,6 + cmplw 7,8,9 + mr 9,8 + + mfcr 6 + not 6,6 + rlwinm 6,6,1,29,29 + + sub. 4,4,6 + add 3,3,6 + bne .Loop2 + +.Ldone2: + srwi 4,4,2 + sub 3,0,4 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 + Added files: Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/chacha-ppc.S diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/chacha-ppc.S:1.1 --- /dev/null Sun Mar 4 11:45:12 2018 +++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/chacha-ppc.S Sun Mar 4 11:45:12 2018 @@ -0,0 +1,1142 @@ +.machine "any" +.text + +.globl ChaCha20_ctr32_int +.type ChaCha20_ctr32_int,@function +.section ".opd","aw" +.align 3 +ChaCha20_ctr32_int: +.quad .ChaCha20_ctr32_int,.TOC.@tocbase,0 +.previous + +.align 5 +.ChaCha20_ctr32_int: +__ChaCha20_ctr32_int: + cmpldi 5,0 + beqlr + + stdu 1,-256(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + std 22,176(1) + std 23,184(1) + std 24,192(1) + std 25,200(1) + std 26,208(1) + std 27,216(1) + std 28,224(1) + std 29,232(1) + std 30,240(1) + std 31,248(1) + std 0,272(1) + + lwz 11,0(7) + lwz 12,4(7) + lwz 14,8(7) + lwz 15,12(7) + + bl __ChaCha20_1x + + ld 0,272(1) + ld 14,112(1) + ld 15,120(1) + ld 16,128(1) + ld 17,136(1) + ld 18,144(1) + ld 19,152(1) + ld 20,160(1) + ld 21,168(1) + ld 22,176(1) + ld 23,184(1) + ld 24,192(1) + ld 25,200(1) + ld 26,208(1) + ld 27,216(1) + ld 28,224(1) + ld 29,232(1) + ld 30,240(1) + ld 31,248(1) + mtlr 0 + addi 1,1,256 + blr +.long 0 +.byte 0,12,4,1,0x80,18,5,0 +.long 0 + + +.align 5 +__ChaCha20_1x: +.Loop_outer: + lis 16,0x6170 + lis 17,0x3320 + lis 18,0x7962 + lis 19,0x6b20 + ori 16,16,0x7865 + ori 17,17,0x646e + ori 18,18,0x2d32 + ori 19,19,0x6574 + + li 0,10 + lwz 20,0(6) + lwz 21,4(6) + lwz 22,8(6) + lwz 23,12(6) + lwz 24,16(6) + mr 28,11 + lwz 25,20(6) + mr 29,12 + lwz 26,24(6) + mr 30,14 + lwz 27,28(6) + mr 31,15 + + mr 7,20 + mr 8,21 + mr 9,22 + mr 10,23 + + mtctr 0 +.Loop: + add 16,16,20 + add 17,17,21 + add 18,18,22 + add 19,19,23 + xor 28,28,16 + xor 29,29,17 + xor 30,30,18 + xor 31,31,19 + rotlwi 28,28,16 + rotlwi 29,29,16 + rotlwi 30,30,16 + rotlwi 31,31,16 + add 24,24,28 + add 25,25,29 + add 26,26,30 + add 27,27,31 + xor 20,20,24 + xor 21,21,25 + xor 22,22,26 + xor 23,23,27 + rotlwi 20,20,12 + rotlwi 21,21,12 + rotlwi 22,22,12 + rotlwi 23,23,12 + add 16,16,20 + add 17,17,21 + add 18,18,22 + add 19,19,23 + xor 28,28,16 + xor 29,29,17 + xor 30,30,18 + xor 31,31,19 + rotlwi 28,28,8 + rotlwi 29,29,8 + rotlwi 30,30,8 + rotlwi 31,31,8 + add 24,24,28 + add 25,25,29 + add 26,26,30 + add 27,27,31 + xor 20,20,24 + xor 21,21,25 + xor 22,22,26 + xor 23,23,27 + rotlwi 20,20,7 + rotlwi 21,21,7 + rotlwi 22,22,7 + rotlwi 23,23,7 + add 16,16,21 + add 17,17,22 + add 18,18,23 + add 19,19,20 + xor 31,31,16 + xor 28,28,17 + xor 29,29,18 + xor 30,30,19 + rotlwi 31,31,16 + rotlwi 28,28,16 + rotlwi 29,29,16 + rotlwi 30,30,16 + add 26,26,31 + add 27,27,28 + add 24,24,29 + add 25,25,30 + xor 21,21,26 + xor 22,22,27 + xor 23,23,24 + xor 20,20,25 + rotlwi 21,21,12 + rotlwi 22,22,12 + rotlwi 23,23,12 + rotlwi 20,20,12 + add 16,16,21 + add 17,17,22 + add 18,18,23 + add 19,19,20 + xor 31,31,16 + xor 28,28,17 + xor 29,29,18 + xor 30,30,19 + rotlwi 31,31,8 + rotlwi 28,28,8 + rotlwi 29,29,8 + rotlwi 30,30,8 + add 26,26,31 + add 27,27,28 + add 24,24,29 + add 25,25,30 + xor 21,21,26 + xor 22,22,27 + xor 23,23,24 + xor 20,20,25 + rotlwi 21,21,7 + rotlwi 22,22,7 + rotlwi 23,23,7 + rotlwi 20,20,7 + bc 16,0,.Loop + + subic 5,5,64 + addi 16,16,0x7865 + addi 17,17,0x646e + addi 18,18,0x2d32 + addi 19,19,0x6574 + addis 16,16,0x6170 + addis 17,17,0x3320 + addis 18,18,0x7962 + addis 19,19,0x6b20 + + subfe. 0,0,0 + add 20,20,7 + lwz 7,16(6) + add 21,21,8 + lwz 8,20(6) + add 22,22,9 + lwz 9,24(6) + add 23,23,10 + lwz 10,28(6) + add 24,24,7 + add 25,25,8 + add 26,26,9 + add 27,27,10 + + add 28,28,11 + add 29,29,12 + add 30,30,14 + add 31,31,15 + addi 11,11,1 + mr 7,16 + rotlwi 16,16,8 + rlwimi 16,7,24,0,7 + rlwimi 16,7,24,16,23 + mr 8,17 + rotlwi 17,17,8 + rlwimi 17,8,24,0,7 + rlwimi 17,8,24,16,23 + mr 9,18 + rotlwi 18,18,8 + rlwimi 18,9,24,0,7 + rlwimi 18,9,24,16,23 + mr 10,19 + rotlwi 19,19,8 + rlwimi 19,10,24,0,7 + rlwimi 19,10,24,16,23 + mr 7,20 + rotlwi 20,20,8 + rlwimi 20,7,24,0,7 + rlwimi 20,7,24,16,23 + mr 8,21 + rotlwi 21,21,8 + rlwimi 21,8,24,0,7 + rlwimi 21,8,24,16,23 + mr 9,22 + rotlwi 22,22,8 + rlwimi 22,9,24,0,7 + rlwimi 22,9,24,16,23 + mr 10,23 + rotlwi 23,23,8 + rlwimi 23,10,24,0,7 + rlwimi 23,10,24,16,23 + mr 7,24 + rotlwi 24,24,8 + rlwimi 24,7,24,0,7 + rlwimi 24,7,24,16,23 + mr 8,25 + rotlwi 25,25,8 + rlwimi 25,8,24,0,7 + rlwimi 25,8,24,16,23 + mr 9,26 + rotlwi 26,26,8 + rlwimi 26,9,24,0,7 + rlwimi 26,9,24,16,23 + mr 10,27 + rotlwi 27,27,8 + rlwimi 27,10,24,0,7 + rlwimi 27,10,24,16,23 + mr 7,28 + rotlwi 28,28,8 + rlwimi 28,7,24,0,7 + rlwimi 28,7,24,16,23 + mr 8,29 + rotlwi 29,29,8 + rlwimi 29,8,24,0,7 + rlwimi 29,8,24,16,23 + mr 9,30 + rotlwi 30,30,8 + rlwimi 30,9,24,0,7 + rlwimi 30,9,24,16,23 + mr 10,31 + rotlwi 31,31,8 + rlwimi 31,10,24,0,7 + rlwimi 31,10,24,16,23 + bne .Ltail + + lwz 7,0(4) + lwz 8,4(4) + cmpldi 5,0 + lwz 9,8(4) + lwz 10,12(4) + xor 16,16,7 + lwz 7,16(4) + xor 17,17,8 + lwz 8,20(4) + xor 18,18,9 + lwz 9,24(4) + xor 19,19,10 + lwz 10,28(4) + xor 20,20,7 + lwz 7,32(4) + xor 21,21,8 + lwz 8,36(4) + xor 22,22,9 + lwz 9,40(4) + xor 23,23,10 + lwz 10,44(4) + xor 24,24,7 + lwz 7,48(4) + xor 25,25,8 + lwz 8,52(4) + xor 26,26,9 + lwz 9,56(4) + xor 27,27,10 + lwz 10,60(4) + xor 28,28,7 + stw 16,0(3) + xor 29,29,8 + stw 17,4(3) + xor 30,30,9 + stw 18,8(3) + xor 31,31,10 + stw 19,12(3) + stw 20,16(3) + stw 21,20(3) + stw 22,24(3) + stw 23,28(3) + stw 24,32(3) + stw 25,36(3) + stw 26,40(3) + stw 27,44(3) + stw 28,48(3) + stw 29,52(3) + stw 30,56(3) + addi 4,4,64 + stw 31,60(3) + addi 3,3,64 + + bne .Loop_outer + + blr + +.align 4 +.Ltail: + addi 5,5,64 + subi 4,4,1 + subi 3,3,1 + addi 7,1,48-1 + mtctr 5 + + stw 16,48(1) + stw 17,52(1) + stw 18,56(1) + stw 19,60(1) + stw 20,64(1) + stw 21,68(1) + stw 22,72(1) + stw 23,76(1) + stw 24,80(1) + stw 25,84(1) + stw 26,88(1) + stw 27,92(1) + stw 28,96(1) + stw 29,100(1) + stw 30,104(1) + stw 31,108(1) + +.Loop_tail: + lbzu 11,1(4) + lbzu 16,1(7) + xor 12,11,16 + stbu 12,1(3) + bc 16,0,.Loop_tail + + stw 1,48(1) + stw 1,52(1) + stw 1,56(1) + stw 1,60(1) + stw 1,64(1) + stw 1,68(1) + stw 1,72(1) + stw 1,76(1) + stw 1,80(1) + stw 1,84(1) + stw 1,88(1) + stw 1,92(1) + stw 1,96(1) + stw 1,100(1) + stw 1,104(1) + stw 1,108(1) + + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + +.globl ChaCha20_ctr32_vmx +.type ChaCha20_ctr32_vmx,@function +.section ".opd","aw" +.align 3 +ChaCha20_ctr32_vmx: +.quad .ChaCha20_ctr32_vmx,.TOC.@tocbase,0 +.previous + +.align 5 +.ChaCha20_ctr32_vmx: + cmpldi 5,256 + blt __ChaCha20_ctr32_int + + stdu 1,-464(1) + mflr 0 + li 10,127 + li 11,143 + mfspr 12,256 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + stw 12,316(1) + std 14,320(1) + std 15,328(1) + std 16,336(1) + std 17,344(1) + std 18,352(1) + std 19,360(1) + std 20,368(1) + std 21,376(1) + std 22,384(1) + std 23,392(1) + std 24,400(1) + std 25,408(1) + std 26,416(1) + std 27,424(1) + std 28,432(1) + std 29,440(1) + std 30,448(1) + std 31,456(1) + li 12,-1 + std 0, 480(1) + mtspr 256,12 + + bl .Lconsts + li 16,16 + li 17,32 + li 18,48 + li 19,64 + li 20,31 + li 21,15 + + lvx 16,0,6 + lvsl 12,0,6 + lvx 17,16,6 + lvx 31,20,6 + + lvx 18,0,7 + lvsl 13,0,7 + lvx 27,21,7 + + lvx 15,0,12 + lvx 20,16,12 + lvx 21,17,12 + lvx 22,18,12 + lvx 23,19,12 + + vperm 16,16,17,12 + vperm 17,17,31,12 + vperm 18,18,27,13 + + lwz 11,0(7) + lwz 12,4(7) + vadduwm 18,18,20 + lwz 14,8(7) + vadduwm 19,18,20 + lwz 15,12(7) + vadduwm 20,19,20 + + vspltisw 24,-12 + vspltisw 25,12 + vspltisw 26,-7 + + + vxor 12,12,12 + vspltisw 30,-1 + lvsl 28,0,4 + lvsr 29,0,3 + vperm 30,12,30,29 + + lvsl 12,0,16 + vspltisb 13,3 + vxor 12,12,13 + vxor 29,29,13 + vperm 28,28,28,12 + + b .Loop_outer_vmx + +.align 4 +.Loop_outer_vmx: + lis 16,0x6170 + lis 17,0x3320 + vor 0,15,15 + lis 18,0x7962 + lis 19,0x6b20 + vor 4,15,15 + ori 16,16,0x7865 + ori 17,17,0x646e + vor 8,15,15 + ori 18,18,0x2d32 + ori 19,19,0x6574 + vor 1,16,16 + + li 0,10 + lwz 20,0(6) + vor 5,16,16 + lwz 21,4(6) + vor 9,16,16 + lwz 22,8(6) + vor 2,17,17 + lwz 23,12(6) + vor 6,17,17 + lwz 24,16(6) + vor 10,17,17 + mr 28,11 + lwz 25,20(6) + vor 3,18,18 + mr 29,12 + lwz 26,24(6) + vor 7,19,19 + mr 30,14 + lwz 27,28(6) + vor 11,20,20 + mr 31,15 + + mr 7,20 + mr 8,21 + mr 9,22 + mr 10,23 + vspltisw 27,7 + + mtctr 0 + nop +.Loop_vmx: + vadduwm 0,0,1 + add 16,16,20 + vadduwm 4,4,5 + add 17,17,21 + vadduwm 8,8,9 + add 18,18,22 + vxor 3,3,0 + add 19,19,23 + vxor 7,7,4 + xor 28,28,16 + vxor 11,11,8 + xor 29,29,17 + vperm 3,3,3,22 + xor 30,30,18 + vperm 7,7,7,22 + xor 31,31,19 + vperm 11,11,11,22 + rotlwi 28,28,16 + vadduwm 2,2,3 + rotlwi 29,29,16 + vadduwm 6,6,7 + rotlwi 30,30,16 + vadduwm 10,10,11 + rotlwi 31,31,16 + vxor 12,1,2 + add 24,24,28 + vxor 13,5,6 + add 25,25,29 + vxor 14,9,10 + add 26,26,30 + vsrw 1,12,24 + add 27,27,31 + vsrw 5,13,24 + xor 20,20,24 + vsrw 9,14,24 + xor 21,21,25 + vslw 12,12,25 + xor 22,22,26 + vslw 13,13,25 + xor 23,23,27 + vslw 14,14,25 + rotlwi 20,20,12 + vor 1,1,12 + rotlwi 21,21,12 + vor 5,5,13 + rotlwi 22,22,12 + vor 9,9,14 + rotlwi 23,23,12 + vadduwm 0,0,1 + add 16,16,20 + vadduwm 4,4,5 + add 17,17,21 + vadduwm 8,8,9 + add 18,18,22 + vxor 3,3,0 + add 19,19,23 + vxor 7,7,4 + xor 28,28,16 + vxor 11,11,8 + xor 29,29,17 + vperm 3,3,3,23 + xor 30,30,18 + vperm 7,7,7,23 + xor 31,31,19 + vperm 11,11,11,23 + rotlwi 28,28,8 + vadduwm 2,2,3 + rotlwi 29,29,8 + vadduwm 6,6,7 + rotlwi 30,30,8 + vadduwm 10,10,11 + rotlwi 31,31,8 + vxor 12,1,2 + add 24,24,28 + vxor 13,5,6 + add 25,25,29 + vxor 14,9,10 + add 26,26,30 + vsrw 1,12,26 + add 27,27,31 + vsrw 5,13,26 + xor 20,20,24 + vsrw 9,14,26 + xor 21,21,25 + vslw 12,12,27 + xor 22,22,26 + vslw 13,13,27 + xor 23,23,27 + vslw 14,14,27 + rotlwi 20,20,7 + vor 1,1,12 + rotlwi 21,21,7 + vor 5,5,13 + rotlwi 22,22,7 + vor 9,9,14 + rotlwi 23,23,7 + vsldoi 2,2,2, 16-8 + vsldoi 6,6,6, 16-8 + vsldoi 10,10,10, 16-8 + vsldoi 1,1,1, 16-12 + vsldoi 5,5,5, 16-12 + vsldoi 9,9,9, 16-12 + vsldoi 3,3,3, 16-4 + vsldoi 7,7,7, 16-4 + vsldoi 11,11,11, 16-4 + vadduwm 0,0,1 + add 16,16,21 + vadduwm 4,4,5 + add 17,17,22 + vadduwm 8,8,9 + add 18,18,23 + vxor 3,3,0 + add 19,19,20 + vxor 7,7,4 + xor 31,31,16 + vxor 11,11,8 + xor 28,28,17 + vperm 3,3,3,22 + xor 29,29,18 + vperm 7,7,7,22 + xor 30,30,19 + vperm 11,11,11,22 + rotlwi 31,31,16 + vadduwm 2,2,3 + rotlwi 28,28,16 + vadduwm 6,6,7 + rotlwi 29,29,16 + vadduwm 10,10,11 + rotlwi 30,30,16 + vxor 12,1,2 + add 26,26,31 + vxor 13,5,6 + add 27,27,28 + vxor 14,9,10 + add 24,24,29 + vsrw 1,12,24 + add 25,25,30 + vsrw 5,13,24 + xor 21,21,26 + vsrw 9,14,24 + xor 22,22,27 + vslw 12,12,25 + xor 23,23,24 + vslw 13,13,25 + xor 20,20,25 + vslw 14,14,25 + rotlwi 21,21,12 + vor 1,1,12 + rotlwi 22,22,12 + vor 5,5,13 + rotlwi 23,23,12 + vor 9,9,14 + rotlwi 20,20,12 + vadduwm 0,0,1 + add 16,16,21 + vadduwm 4,4,5 + add 17,17,22 + vadduwm 8,8,9 + add 18,18,23 + vxor 3,3,0 + add 19,19,20 + vxor 7,7,4 + xor 31,31,16 + vxor 11,11,8 + xor 28,28,17 + vperm 3,3,3,23 + xor 29,29,18 + vperm 7,7,7,23 + xor 30,30,19 + vperm 11,11,11,23 + rotlwi 31,31,8 + vadduwm 2,2,3 + rotlwi 28,28,8 + vadduwm 6,6,7 + rotlwi 29,29,8 + vadduwm 10,10,11 + rotlwi 30,30,8 + vxor 12,1,2 + add 26,26,31 + vxor 13,5,6 + add 27,27,28 + vxor 14,9,10 + add 24,24,29 + vsrw 1,12,26 + add 25,25,30 + vsrw 5,13,26 + xor 21,21,26 + vsrw 9,14,26 + xor 22,22,27 + vslw 12,12,27 + xor 23,23,24 + vslw 13,13,27 + xor 20,20,25 + vslw 14,14,27 + rotlwi 21,21,7 + vor 1,1,12 + rotlwi 22,22,7 + vor 5,5,13 + rotlwi 23,23,7 + vor 9,9,14 + rotlwi 20,20,7 + vsldoi 2,2,2, 16-8 + vsldoi 6,6,6, 16-8 + vsldoi 10,10,10, 16-8 + vsldoi 1,1,1, 16-4 + vsldoi 5,5,5, 16-4 + vsldoi 9,9,9, 16-4 + vsldoi 3,3,3, 16-12 + vsldoi 7,7,7, 16-12 + vsldoi 11,11,11, 16-12 + bc 16,0,.Loop_vmx + + subi 5,5,256 + addi 16,16,0x7865 + addi 17,17,0x646e + addi 18,18,0x2d32 + addi 19,19,0x6574 + addis 16,16,0x6170 + addis 17,17,0x3320 + addis 18,18,0x7962 + addis 19,19,0x6b20 + add 20,20,7 + lwz 7,16(6) + add 21,21,8 + lwz 8,20(6) + add 22,22,9 + lwz 9,24(6) + add 23,23,10 + lwz 10,28(6) + add 24,24,7 + add 25,25,8 + add 26,26,9 + add 27,27,10 + add 28,28,11 + add 29,29,12 + add 30,30,14 + add 31,31,15 + + vadduwm 0,0,15 + vadduwm 4,4,15 + vadduwm 8,8,15 + vadduwm 1,1,16 + vadduwm 5,5,16 + vadduwm 9,9,16 + vadduwm 2,2,17 + vadduwm 6,6,17 + vadduwm 10,10,17 + vadduwm 3,3,18 + vadduwm 7,7,19 + vadduwm 11,11,20 + + addi 11,11,4 + vadduwm 18,18,21 + vadduwm 19,19,21 + vadduwm 20,20,21 + + mr 7,16 + rotlwi 16,16,8 + rlwimi 16,7,24,0,7 + rlwimi 16,7,24,16,23 + mr 8,17 + rotlwi 17,17,8 + rlwimi 17,8,24,0,7 + rlwimi 17,8,24,16,23 + mr 9,18 + rotlwi 18,18,8 + rlwimi 18,9,24,0,7 + rlwimi 18,9,24,16,23 + mr 10,19 + rotlwi 19,19,8 + rlwimi 19,10,24,0,7 + rlwimi 19,10,24,16,23 + mr 7,20 + rotlwi 20,20,8 + rlwimi 20,7,24,0,7 + rlwimi 20,7,24,16,23 + mr 8,21 + rotlwi 21,21,8 + rlwimi 21,8,24,0,7 + rlwimi 21,8,24,16,23 + mr 9,22 + rotlwi 22,22,8 + rlwimi 22,9,24,0,7 + rlwimi 22,9,24,16,23 + mr 10,23 + rotlwi 23,23,8 + rlwimi 23,10,24,0,7 + rlwimi 23,10,24,16,23 + mr 7,24 + rotlwi 24,24,8 + rlwimi 24,7,24,0,7 + rlwimi 24,7,24,16,23 + mr 8,25 + rotlwi 25,25,8 + rlwimi 25,8,24,0,7 + rlwimi 25,8,24,16,23 + mr 9,26 + rotlwi 26,26,8 + rlwimi 26,9,24,0,7 + rlwimi 26,9,24,16,23 + mr 10,27 + rotlwi 27,27,8 + rlwimi 27,10,24,0,7 + rlwimi 27,10,24,16,23 + mr 7,28 + rotlwi 28,28,8 + rlwimi 28,7,24,0,7 + rlwimi 28,7,24,16,23 + mr 8,29 + rotlwi 29,29,8 + rlwimi 29,8,24,0,7 + rlwimi 29,8,24,16,23 + mr 9,30 + rotlwi 30,30,8 + rlwimi 30,9,24,0,7 + rlwimi 30,9,24,16,23 + mr 10,31 + rotlwi 31,31,8 + rlwimi 31,10,24,0,7 + rlwimi 31,10,24,16,23 + lwz 7,0(4) + lwz 8,4(4) + lwz 9,8(4) + lwz 10,12(4) + xor 16,16,7 + lwz 7,16(4) + xor 17,17,8 + lwz 8,20(4) + xor 18,18,9 + lwz 9,24(4) + xor 19,19,10 + lwz 10,28(4) + xor 20,20,7 + lwz 7,32(4) + xor 21,21,8 + lwz 8,36(4) + xor 22,22,9 + lwz 9,40(4) + xor 23,23,10 + lwz 10,44(4) + xor 24,24,7 + lwz 7,48(4) + xor 25,25,8 + lwz 8,52(4) + xor 26,26,9 + lwz 9,56(4) + xor 27,27,10 + lwz 10,60(4) + xor 28,28,7 + stw 16,0(3) + xor 29,29,8 + stw 17,4(3) + xor 30,30,9 + stw 18,8(3) + xor 31,31,10 + stw 19,12(3) + addi 4,4,64 + stw 20,16(3) + li 7,16 + stw 21,20(3) + li 8,32 + stw 22,24(3) + li 9,48 + stw 23,28(3) + li 10,64 + stw 24,32(3) + stw 25,36(3) + stw 26,40(3) + stw 27,44(3) + stw 28,48(3) + stw 29,52(3) + stw 30,56(3) + stw 31,60(3) + addi 3,3,64 + + lvx 31,0,4 + lvx 27,7,4 + lvx 12,8,4 + lvx 13,9,4 + lvx 14,10,4 + addi 4,4,64 + + vperm 31,31,27,28 + vperm 27,27,12,28 + vperm 12,12,13,28 + vperm 13,13,14,28 + vxor 0,0,31 + vxor 1,1,27 + lvx 27,7,4 + vxor 2,2,12 + lvx 12,8,4 + vxor 3,3,13 + lvx 13,9,4 + lvx 31,10,4 + addi 4,4,64 + li 10,63 + vperm 0,0,0,29 + vperm 1,1,1,29 + vperm 2,2,2,29 + vperm 3,3,3,29 + + vperm 14,14,27,28 + vperm 27,27,12,28 + vperm 12,12,13,28 + vperm 13,13,31,28 + vxor 4,4,14 + vxor 5,5,27 + lvx 27,7,4 + vxor 6,6,12 + lvx 12,8,4 + vxor 7,7,13 + lvx 13,9,4 + lvx 14,10,4 + addi 4,4,64 + vperm 4,4,4,29 + vperm 5,5,5,29 + vperm 6,6,6,29 + vperm 7,7,7,29 + + vperm 31,31,27,28 + vperm 27,27,12,28 + vperm 12,12,13,28 + vperm 13,13,14,28 + vxor 8,8,31 + vxor 9,9,27 + vxor 10,10,12 + vxor 11,11,13 + vperm 8,8,8,29 + vperm 9,9,9,29 + vperm 10,10,10,29 + vperm 11,11,11,29 + + andi. 17,3,15 + mr 16,3 + + vsel 31,0,1,30 + vsel 27,1,2,30 + vsel 12,2,3,30 + vsel 13,3,4,30 + vsel 1,4,5,30 + vsel 2,5,6,30 + vsel 3,6,7,30 + vsel 4,7,8,30 + vsel 5,8,9,30 + vsel 6,9,10,30 + vsel 7,10,11,30 + + + stvx 31,7,3 + stvx 27,8,3 + stvx 12,9,3 + addi 3,3,64 + stvx 13,0,3 + stvx 1,7,3 + stvx 2,8,3 + stvx 3,9,3 + addi 3,3,64 + stvx 4,0,3 + stvx 5,7,3 + stvx 6,8,3 + stvx 7,9,3 + addi 3,3,64 + + beq .Laligned_vmx + + sub 18,3,17 + li 19,0 +.Lunaligned_tail_vmx: + stvebx 11,19,18 + addi 19,19,1 + cmpw 19,17 + bne .Lunaligned_tail_vmx + + sub 18,16,17 +.Lunaligned_head_vmx: + stvebx 0,17,18 + cmpwi 17,15 + addi 17,17,1 + bne .Lunaligned_head_vmx + + cmpldi 5,255 + bgt .Loop_outer_vmx + + b .Ldone_vmx + +.align 4 +.Laligned_vmx: + stvx 0,0,16 + + cmpldi 5,255 + bgt .Loop_outer_vmx + nop + +.Ldone_vmx: + cmpldi 5,0 + bnel __ChaCha20_1x + + lwz 12,316(1) + li 10,127 + li 11,143 + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 0, 480(1) + ld 14,320(1) + ld 15,328(1) + ld 16,336(1) + ld 17,344(1) + ld 18,352(1) + ld 19,360(1) + ld 20,368(1) + ld 21,376(1) + ld 22,384(1) + ld 23,392(1) + ld 24,400(1) + ld 25,408(1) + ld 26,416(1) + ld 27,424(1) + ld 28,432(1) + ld 29,440(1) + ld 30,448(1) + ld 31,456(1) + mtlr 0 + addi 1,1,464 + blr +.long 0 +.byte 0,12,0x04,1,0x80,18,5,0 +.long 0 + + +.align 5 +.Lconsts: + mflr 0 + bcl 20,31,$+4 + mflr 12 + addi 12,12,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.Lsigma: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 +.long 1,0,0,0 +.long 4,0,0,0 +.long 0x02030001,0x06070405,0x0a0b0809,0x0e0f0c0d +.long 0x01020300,0x05060704,0x090a0b08,0x0d0e0f0c +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,80,111,119,101,114,80,67,47,65,108,116,105,86,101,99,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/chacha.inc diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/chacha.inc:1.1 --- /dev/null Sun Mar 4 11:45:12 2018 +++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/chacha.inc Sun Mar 4 11:45:12 2018 @@ -0,0 +1,5 @@ +.PATH.S: ${.PARSEDIR} + +CHACHA_SRCS = chacha-ppc.S + +.include "../../chacha.inc" Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305-ppc.S diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305-ppc.S:1.1 --- /dev/null Sun Mar 4 11:45:12 2018 +++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305-ppc.S Sun Mar 4 11:45:12 2018 @@ -0,0 +1,195 @@ +.machine "any" +.text +.globl poly1305_init_int +.type poly1305_init_int,@function +.section ".opd","aw" +.align 3 +poly1305_init_int: +.quad .poly1305_init_int,.TOC.@tocbase,0 +.previous + +.align 4 +.poly1305_init_int: + xor 0,0,0 + std 0,0(3) + std 0,8(3) + std 0,16(3) + + cmpld 4,0 + beq- .Lno_key + li 7,4 + lwbrx 10,0,4 + li 11,8 + lwbrx 7,7,4 + li 8,12 + lwbrx 11,11,4 + lwbrx 8,8,4 + insrdi 10,7,32,0 + insrdi 11,8,32,0 + lis 8,0xfff + ori 8,8,0xfffc + insrdi 8,8,32,0 + ori 7,8,3 + + and 10,10,7 + and 11,11,8 + + std 10,32(3) + std 11,40(3) + +.Lno_key: + xor 3,3,3 + blr +.long 0 +.byte 0,12,0x14,0,0,0,2,0 + + +.globl poly1305_blocks +.type poly1305_blocks,@function +.section ".opd","aw" +.align 3 +poly1305_blocks: +.quad .poly1305_blocks,.TOC.@tocbase,0 +.previous + +.align 4 +.poly1305_blocks: + srdi. 5,5,4 + beq- .Labort + + stdu 1,-192(1) + mflr 0 + std 27,152(1) + std 28,160(1) + std 29,168(1) + std 30,176(1) + std 31,184(1) + std 0,208(1) + + ld 27,32(3) + ld 28,40(3) + + ld 7,0(3) + ld 8,8(3) + ld 9,16(3) + + srdi 29,28,2 + mtctr 5 + add 29,29,28 + li 0,3 + b .Loop + +.align 4 +.Loop: + li 10,4 + lwbrx 30,0,4 + li 31,8 + lwbrx 10,10,4 + li 11,12 + lwbrx 31,31,4 + lwbrx 11,11,4 + insrdi 30,10,32,0 + insrdi 31,11,32,0 + addi 4,4,16 + + addc 7,7,30 + adde 8,8,31 + + mulld 10,7,27 + mulhdu 11,7,27 + adde 9,9,6 + + mulld 30,8,29 + mulhdu 31,8,29 + addc 10,10,30 + adde 11,11,31 + + mulld 30,7,28 + mulhdu 12,7,28 + addc 11,11,30 + addze 12,12 + + mulld 30,8,27 + mulhdu 31,8,27 + addc 11,11,30 + adde 12,12,31 + + mulld 30,9,29 + mulld 31,9,27 + addc 11,11,30 + adde 12,12,31 + + andc 30,12,0 + and 9,12,0 + srdi 31,30,2 + add 30,30,31 + addc 7,10,30 + addze 8,11 + addze 9,9 + + bc 16,0,.Loop + + std 7,0(3) + std 8,8(3) + std 9,16(3) + + ld 27,152(1) + ld 28,160(1) + ld 29,168(1) + ld 30,176(1) + ld 31,184(1) + addi 1,1,192 +.Labort: + blr +.long 0 +.byte 0,12,4,1,0x80,5,4,0 + + +.globl poly1305_emit +.type poly1305_emit,@function +.section ".opd","aw" +.align 3 +poly1305_emit: +.quad .poly1305_emit,.TOC.@tocbase,0 +.previous + +.align 4 +.poly1305_emit: + ld 7,0(3) + ld 8,8(3) + ld 9,16(3) + ld 6,0(5) + ld 5,8(5) + + addic 10,7,5 + addze 11,8 + addze 12,9 + + srdi 0,12,2 + neg 0,0 + + andc 7,7,0 + and 10,10,0 + andc 8,8,0 + and 11,11,0 + or 7,7,10 + or 8,8,11 + rotldi 6,6,32 + rotldi 5,5,32 + addc 7,7,6 + adde 8,8,5 + rldicl 0,7,32,32 + li 10,4 + stwbrx 7,0,4 + rldicl 7,8,32,32 + li 11,8 + stwbrx 0,10,4 + li 12,12 + stwbrx 8,11,4 + stwbrx 7,12,4 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 + +.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,80,80,67,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305-ppcfp.S diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305-ppcfp.S:1.1 --- /dev/null Sun Mar 4 11:45:12 2018 +++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305-ppcfp.S Sun Mar 4 11:45:12 2018 @@ -0,0 +1,606 @@ +.machine "any" +.text + +.globl poly1305_init_fpu +.type poly1305_init_fpu,@function +.section ".opd","aw" +.align 3 +poly1305_init_fpu: +.quad .poly1305_init_fpu,.TOC.@tocbase,0 +.previous + +.align 6 +.poly1305_init_fpu: + stdu 1,-48(1) + mflr 6 + std 6,64(1) + + bl .LPICmeup + + xor 0,0,0 + mtlr 6 + + lfd 8,8*0(5) + lfd 9,8*1(5) + lfd 10,8*2(5) + lfd 11,8*3(5) + lfd 12,8*4(5) + lfd 13,8*5(5) + + stfd 8,8*0(3) + stfd 9,8*1(3) + stfd 10,8*2(3) + stfd 11,8*3(3) + + cmpld 4,0 + beq- .Lno_key + + lfd 6,8*13(5) + mffs 7 + + stfd 8,8*4(3) + stfd 9,8*5(3) + stfd 10,8*6(3) + stfd 11,8*7(3) + + li 8,4 + li 9,8 + li 10,12 + lwbrx 7,0,4 + lwbrx 8,8,4 + lwbrx 9,9,4 + lwbrx 10,10,4 + + lis 11,0xf000 + ori 12,11,3 + andc 7,7,11 + andc 8,8,12 + andc 9,9,12 + andc 10,10,12 + + stw 7,36(3) + stw 8,44(3) + stw 9,52(3) + stw 10,60(3) + + mtfsf 255,6 + stfd 8,8*18(3) + stfd 9,8*19(3) + stfd 10,8*20(3) + stfd 11,8*21(3) + stfd 12,8*22(3) + stfd 13,8*23(3) + + lfd 0,8*4(3) + lfd 2,8*5(3) + lfd 4,8*6(3) + lfd 6,8*7(3) + + fsub 0,0,8 + fsub 2,2,9 + fsub 4,4,10 + fsub 6,6,11 + + lfd 8,8*6(5) + lfd 9,8*7(5) + lfd 10,8*8(5) + lfd 11,8*9(5) + + fmul 3,2,13 + fmul 5,4,13 + stfd 7,8*15(3) + fmul 7,6,13 + + fadd 1,0,8 + stfd 3,8*12(3) + fadd 3,2,9 + stfd 5,8*13(3) + fadd 5,4,10 + stfd 7,8*14(3) + fadd 7,6,11 + + fsub 1,1,8 + fsub 3,3,9 + fsub 5,5,10 + fsub 7,7,11 + + lfd 8,8*10(5) + lfd 9,8*11(5) + lfd 10,8*12(5) + + fsub 0,0,1 + fsub 2,2,3 + fsub 4,4,5 + fsub 6,6,7 + + stfd 1,8*5(3) + stfd 3,8*7(3) + stfd 5,8*9(3) + stfd 7,8*11(3) + + stfd 0,8*4(3) + stfd 2,8*6(3) + stfd 4,8*8(3) + stfd 6,8*10(3) + + lfd 2,8*12(3) + lfd 4,8*13(3) + lfd 6,8*14(3) + lfd 0,8*15(3) + + fadd 3,2,8 + fadd 5,4,9 + fadd 7,6,10 + + fsub 3,3,8 + fsub 5,5,9 + fsub 7,7,10 + + fsub 2,2,3 + fsub 4,4,5 + fsub 6,6,7 + + stfd 3,8*13(3) + stfd 5,8*15(3) + stfd 7,8*17(3) + + stfd 2,8*12(3) + stfd 4,8*14(3) + stfd 6,8*16(3) + + mtfsf 255,0 +.Lno_key: + xor 3,3,3 + addi 1,1,48 + blr +.long 0 +.byte 0,12,4,1,0x80,0,2,0 + + +.globl poly1305_blocks_fpu +.type poly1305_blocks_fpu,@function +.section ".opd","aw" +.align 3 +poly1305_blocks_fpu: +.quad .poly1305_blocks_fpu,.TOC.@tocbase,0 +.previous + +.align 4 +.poly1305_blocks_fpu: + srwi. 5,5,4 + beq- .Labort + + stdu 1,-240(1) + mflr 0 + stfd 14,96(1) + stfd 15,104(1) + stfd 16,112(1) + stfd 17,120(1) + stfd 18,128(1) + stfd 19,136(1) + stfd 20,144(1) + stfd 21,152(1) + stfd 22,160(1) + stfd 23,168(1) + stfd 24,176(1) + stfd 25,184(1) + stfd 26,192(1) + stfd 27,200(1) + stfd 28,208(1) + stfd 29,216(1) + stfd 30,224(1) + stfd 31,232(1) + std 0,256(1) + + xor 0,0,0 + li 10,1 + mtctr 5 + neg 5,5 + stw 0,80(1) + stw 10,84(1) + + lfd 8,8*18(3) + lfd 9,8*19(3) + lfd 10,8*20(3) + lfd 11,8*21(3) + lfd 12,8*22(3) + lfd 13,8*23(3) + + lfd 0,8*0(3) + lfd 2,8*1(3) + lfd 4,8*2(3) + lfd 6,8*3(3) + + stfd 8,48(1) + oris 10,6,18736 + stfd 9,56(1) + stfd 10,64(1) + stw 10,72(1) + + li 11,4 + li 12,8 + li 6,12 + lwbrx 7,0,4 + lwbrx 8,11,4 + lwbrx 9,12,4 + lwbrx 10,6,4 + addi 4,4,16 + + stw 7,52(1) + stw 8,60(1) + stw 9,68(1) + stw 10,76(1) + + mffs 28 + lfd 29,80(1) + lfd 14,8*4(3) + lfd 15,8*5(3) + lfd 16,8*6(3) + lfd 17,8*7(3) + lfd 18,8*8(3) + lfd 19,8*9(3) + lfd 24,8*10(3) + lfd 25,8*11(3) + lfd 26,8*12(3) + lfd 27,8*13(3) + lfd 20,8*14(3) + lfd 21,8*15(3) + lfd 22,8*16(3) + lfd 23,8*17(3) + + stfd 28,80(1) + mtfsf 255,29 + + addic 5,5,1 + addze 0,0 + slwi. 0,0,4 + sub 4,4,0 + + lfd 28,48(1) + lfd 29,56(1) + lfd 30,64(1) + lfd 31,72(1) + + fsub 0,0,8 + lwbrx 7,0,4 + fsub 2,2,9 + lwbrx 8,11,4 + fsub 4,4,10 + lwbrx 9,12,4 + fsub 6,6,11 + lwbrx 10,6,4 + + fsub 28,28,8 + addi 4,4,16 + fsub 29,29,9 + fsub 30,30,10 + fsub 31,31,11 + + fadd 28,28,0 + stw 7,52(1) + fadd 29,29,2 + stw 8,60(1) + fadd 30,30,4 + stw 9,68(1) + fadd 31,31,6 + stw 10,76(1) + + b .Lentry + +.align 4 +.Loop: + fsub 30,30,8 + addic 5,5,1 + fsub 31,31,9 + addze 0,0 + fsub 26,26,10 + slwi. 0,0,4 + fsub 27,27,11 + sub 4,4,0 + + fadd 0,0,30 + fadd 1,1,31 + fadd 4,4,26 + fadd 5,5,27 + + + fadd 26,2,10 + lwbrx 7,0,4 + fadd 27,3,10 + lwbrx 8,11,4 + fadd 30,6,12 + lwbrx 9,12,4 + fadd 31,7,12 + lwbrx 10,6,4 + fadd 24,0,9 + addi 4,4,16 + fadd 25,1,9 + fadd 28,4,11 + fadd 29,5,11 + + fsub 26,26,10 + stw 7,52(1) + fsub 27,27,10 + stw 8,60(1) + fsub 30,30,12 + stw 9,68(1) + fsub 31,31,12 + stw 10,76(1) + fsub 24,24,9 + fsub 25,25,9 + fsub 28,28,11 + fsub 29,29,11 + + fsub 2,2,26 + fsub 3,3,27 + fsub 6,6,30 + fsub 7,7,31 + fsub 4,4,28 + fsub 5,5,29 + fsub 0,0,24 + fsub 1,1,25 + + fadd 2,2,24 + fadd 3,3,25 + fadd 6,6,28 + fadd 7,7,29 + fadd 4,4,26 + fadd 5,5,27 + fmadd 0,30,13,0 + fmadd 1,31,13,1 + + fadd 29,2,3 + lfd 26,8*12(3) + fadd 31,6,7 + lfd 27,8*13(3) + fadd 30,4,5 + lfd 24,8*10(3) + fadd 28,0,1 + lfd 25,8*11(3) +.Lentry: + fmul 0,22,29 + fmul 1,23,29 + fmul 4,16,29 + fmul 5,17,29 + fmul 2,14,29 + fmul 3,15,29 + fmul 6,18,29 + fmul 7,19,29 + + fmadd 0,26,31,0 + fmadd 1,27,31,1 + fmadd 4,22,31,4 + fmadd 5,23,31,5 + fmadd 2,20,31,2 + fmadd 3,21,31,3 + fmadd 6,14,31,6 + fmadd 7,15,31,7 + + fmadd 0,20,30,0 + fmadd 1,21,30,1 + fmadd 4,14,30,4 + fmadd 5,15,30,5 + fmadd 2,22,30,2 + fmadd 3,23,30,3 + fmadd 6,16,30,6 + fmadd 7,17,30,7 + + fmadd 0,14,28,0 + lfd 30,48(1) + fmadd 1,15,28,1 + lfd 31,56(1) + fmadd 4,18,28,4 + lfd 26,64(1) + fmadd 5,19,28,5 + lfd 27,72(1) + fmadd 2,16,28,2 + fmadd 3,17,28,3 + fmadd 6,24,28,6 + fmadd 7,25,28,7 + + bc 16,0,.Loop + + + fadd 24,0,9 + fadd 25,1,9 + fadd 28,4,11 + fadd 29,5,11 + fadd 26,2,10 + fadd 27,3,10 + fadd 30,6,12 + fadd 31,7,12 + + fsub 24,24,9 + fsub 25,25,9 + fsub 28,28,11 + fsub 29,29,11 + fsub 26,26,10 + fsub 27,27,10 + fsub 30,30,12 + fsub 31,31,12 + + fsub 2,2,26 + fsub 3,3,27 + fsub 6,6,30 + fsub 7,7,31 + fsub 4,4,28 + fsub 5,5,29 + fsub 0,0,24 + fsub 1,1,25 + + fadd 2,2,24 + fadd 3,3,25 + fadd 6,6,28 + fadd 7,7,29 + fadd 4,4,26 + fadd 5,5,27 + fmadd 0,30,13,0 + fmadd 1,31,13,1 + + fadd 29,2,3 + fadd 31,6,7 + fadd 30,4,5 + fadd 28,0,1 + + lfd 0,80(1) + fadd 29,29,9 + fadd 31,31,11 + fadd 30,30,10 + fadd 28,28,8 + + stfd 29,8*1(3) + stfd 31,8*3(3) + stfd 30,8*2(3) + stfd 28,8*0(3) + + mtfsf 255,0 + lfd 14,96(1) + lfd 15,104(1) + lfd 16,112(1) + lfd 17,120(1) + lfd 18,128(1) + lfd 19,136(1) + lfd 20,144(1) + lfd 21,152(1) + lfd 22,160(1) + lfd 23,168(1) + lfd 24,176(1) + lfd 25,184(1) + lfd 26,192(1) + lfd 27,200(1) + lfd 28,208(1) + lfd 29,216(1) + lfd 30,224(1) + lfd 31,232(1) + addi 1,1,240 +.Labort: + blr +.long 0 +.byte 0,12,4,1,0x80,0,4,0 + +.globl poly1305_emit_fpu +.type poly1305_emit_fpu,@function +.section ".opd","aw" +.align 3 +poly1305_emit_fpu: +.quad .poly1305_emit_fpu,.TOC.@tocbase,0 +.previous + +.align 4 +.poly1305_emit_fpu: + stdu 1,-80(1) + mflr 0 + std 28,48(1) + std 29,56(1) + std 30,64(1) + std 31,72(1) + std 0,96(1) + + lwz 28,0(3) + lwz 7,4(3) + lwz 29,8(3) + lwz 8,12(3) + lwz 30,16(3) + lwz 9,20(3) + lwz 31,24(3) + lwz 10,28(3) + + lis 0,0xfff0 + andc 28,28,0 + andc 29,29,0 + andc 30,30,0 + andc 31,31,0 + li 0,3 + + srwi 6,31,2 + and 11,31,0 + andc 31,31,0 + add 31,31,6 + add 7,7,31 + add 8,8,28 + add 9,9,29 + add 10,10,30 + + srdi 28,7,32 + add 8,8,28 + srdi 29,8,32 + add 9,9,29 + srdi 30,9,32 + add 10,10,30 + srdi 31,10,32 + add 11,11,31 + + insrdi 7,8,32,0 + insrdi 9,10,32,0 + + addic 28,7,5 + addze 29,9 + addze 30,11 + + srdi 0,30,2 + neg 0,0 + sradi 0,0,63 + ld 30,0(5) + ld 31,8(5) + + andc 7,7,0 + and 28,28,0 + andc 9,9,0 + and 29,29,0 + or 7,7,28 + or 9,9,29 + rotldi 30,30,32 + rotldi 31,31,32 + addc 7,7,30 + adde 9,9,31 + + srdi 8,7,32 + srdi 10,9,32 + li 29,4 + stwbrx 7,0,4 + li 30,8 + stwbrx 8,29,4 + li 31,12 + stwbrx 9,30,4 + stwbrx 10,31,4 + ld 28,48(1) + ld 29,56(1) + ld 30,64(1) + ld 31,72(1) + addi 1,1,80 + blr +.long 0 +.byte 0,12,4,1,0x80,4,3,0 + +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 5 + addi 5,5,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 + +.long 0x43300000,0x00000000 +.long 0x45300000,0x00000000 +.long 0x47300000,0x00000000 +.long 0x49300000,0x00000000 +.long 0x4b500000,0x00000000 + +.long 0x37f40000,0x00000000 + +.long 0x44300000,0x00000000 +.long 0x46300000,0x00000000 +.long 0x48300000,0x00000000 +.long 0x4a300000,0x00000000 +.long 0x3e300000,0x00000000 +.long 0x40300000,0x00000000 +.long 0x42300000,0x00000000 + +.long 0x00000000,0x00000001 +.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,80,80,67,32,70,80,85,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 4 Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305.inc diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305.inc:1.1 --- /dev/null Sun Mar 4 11:45:12 2018 +++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/poly1305.inc Sun Mar 4 11:45:12 2018 @@ -0,0 +1,6 @@ +.PATH.S: ${.PARSEDIR} + +POLY1305_SRCS = poly1305-ppc.S poly1305-ppcfp.S +POLY1305_CPPFLAGS+=-DPOLY1305_ASM + +.include "../../poly1305.inc"