This patch does a re-arrangement in order to circumvent a performance degradation of more than 20%. Measurements with the fix included showed performance improvements of the required size on zEC12 and z13.
Signed-off-by: Leonidas Da Silva Barbosa <[email protected]> --- crypto/bn/asm/s390x.S | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/crypto/bn/asm/s390x.S b/crypto/bn/asm/s390x.S index 43fcb79..c0e1fe9 100755 --- a/crypto/bn/asm/s390x.S +++ b/crypto/bn/asm/s390x.S @@ -33,35 +33,40 @@ bn_mul_add_words: .Loop4_madd: lg %r7,0(%r2,%r3) // ap[i] + lg %r9,8(%r2,%r3) + mlgr %r6,%r5 // *=w alcgr %r7,%r8 // +=carry - alcgr %r6,zero + alcgr %r6,%r0 + mlgr %r8,%r5 + alg %r7,0(%r2,%r1) // +=rp[i] stg %r7,0(%r2,%r1) // rp[i]= + lg %r7,0x10(%r2,%r3) - lg %r9,8(%r2,%r3) - mlgr %r8,%r5 alcgr %r9,%r6 - alcgr %r8,zero + alcgr %r8,%r0 + alg %r9,8(%r2,%r1) stg %r9,8(%r2,%r1) + lg %r9,0x18(%r2,%r3) - lg %r7,16(%r2,%r3) mlgr %r6,%r5 alcgr %r7,%r8 - alcgr %r6,zero - alg %r7,16(%r2,%r1) - stg %r7,16(%r2,%r1) + alcgr %r6,%r0 + mlgr %r8,%r5 + + alg %r7,0x10(%r2,%r1) + stg %r7,0x10(%r2,%r1) - lg %r9,24(%r2,%r3) - mlgr %r8,%r5 alcgr %r9,%r6 - alcgr %r8,zero - alg %r9,24(%r2,%r1) - stg %r9,24(%r2,%r1) + alcgr %r8,%r0 - la %r2,32(%r2) // i+=4 - brct %r4,.Loop4_madd + alg %r9,0x18(%r2,%r1) + stg %r9,0x18(%r2,%r1) + la %r2,0x20(%r0,%r2) // i+=4 + + brct %r4,.Loop4_madd la %r10,1(%r10) // see if len%4 is zero ... brct %r10,.Loop1_madd // without touching condition code:-) -- 1.8.3.1 _______________________________________________ openssl-bugs-mod mailing list [email protected] https://mta.openssl.org/mailman/listinfo/openssl-bugs-mod _______________________________________________ openssl-dev mailing list To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-dev
