Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e2b21b5002a2bf21ca73c7448309a7288a984ddf
Commit:     e2b21b5002a2bf21ca73c7448309a7288a984ddf
Parent:     b7a30da61adc5f252ee97b2a4f3fc23c9d06a08a
Author:     Denys Vlasenko <[EMAIL PROTECTED]>
AuthorDate: Fri Oct 26 16:22:57 2007 +0800
Committer:  Herbert Xu <[EMAIL PROTECTED]>
CommitDate: Fri Jan 11 08:16:06 2008 +1100

    [CRYPTO] twofish: Do not unroll big stuff in twofish key setup
    
    Currently twofish cipher key setup code
    has unrolled loops - approximately 70-100
    instructions are repeated 40 times.
    
    As a result, twofish module is the biggest module
    in crypto/*.
    
    Unrolling produces x2.5 more code (+18k on i386), and speeds up key
    setup by 7%:
    
        unrolled: twofish_setkey/sec: 41128
            loop: twofish_setkey/sec: 38148
        CALC_K256: ~100 insns each
        CALC_K192: ~90 insns
           CALC_K: ~70 insns
    
    Attached patch removes this unrolling.
    
    $ size */twofish_common.o
       text    data     bss     dec     hex filename
      37920       0       0   37920    9420 crypto.org/twofish_common.o
      13209       0       0   13209    3399 crypto/twofish_common.o
    
    Run tested (modprobe tcrypt reports ok). Please apply.
    
    Signed-off-by: Denys Vlasenko <[EMAIL PROTECTED]>
    Signed-off-by: Herbert Xu <[EMAIL PROTECTED]>
---
 crypto/twofish_common.c |   96 +++++++++++++++--------------------------------
 1 files changed, 30 insertions(+), 66 deletions(-)

diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
index b4b9c0c..0af216c 100644
--- a/crypto/twofish_common.c
+++ b/crypto/twofish_common.c
@@ -655,84 +655,48 @@ int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, 
unsigned int key_len)
                        CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
                }
 
-               /* Calculate whitening and round subkeys.  The constants are
-                * indices of subkeys, preprocessed through q0 and q1. */
-               CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-               CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-               CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-               CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-               CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-               CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-               CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-               CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-               CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-               CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-               CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-               CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-               CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-               CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-               CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-               CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-               CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-               CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-               CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-               CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+               /* CALC_K256/CALC_K192/CALC_K loops were unrolled.
+                * Unrolling produced x2.5 more code (+18k on i386),
+                * and speeded up key setup by 7%:
+                * unrolled: twofish_setkey/sec: 41128
+                *     loop: twofish_setkey/sec: 38148
+                * CALC_K256: ~100 insns each
+                * CALC_K192: ~90 insns
+                *    CALC_K: ~70 insns
+                */
+               /* Calculate whitening and round subkeys */
+               for ( i = 0; i < 8; i += 2 ) {
+                       CALC_K256 (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
+               }
+               for ( i = 0; i < 32; i += 2 ) {
+                       CALC_K256 (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
+               }
        } else if (key_len == 24) { /* 192-bit key */
                /* Compute the S-boxes. */
                for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
                        CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
                }
 
-               /* Calculate whitening and round subkeys.  The constants are
-                * indices of subkeys, preprocessed through q0 and q1. */
-               CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-               CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-               CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-               CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-               CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-               CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-               CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-               CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-               CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-               CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-               CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-               CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-               CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-               CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-               CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-               CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-               CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-               CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-               CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-               CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+               /* Calculate whitening and round subkeys */
+               for ( i = 0; i < 8; i += 2 ) {
+                       CALC_K192 (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
+               }
+               for ( i = 0; i < 32; i += 2 ) {
+                       CALC_K192 (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
+               }
        } else { /* 128-bit key */
                /* Compute the S-boxes. */
                for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
                        CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
                }
 
-               /* Calculate whitening and round subkeys.  The constants are
-                * indices of subkeys, preprocessed through q0 and q1. */
-               CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-               CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-               CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-               CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-               CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-               CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-               CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-               CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-               CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-               CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-               CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
-               CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-               CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
-               CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-               CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-               CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
-               CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-               CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-               CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
-               CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+               /* Calculate whitening and round subkeys */
+               for ( i = 0; i < 8; i += 2 ) {
+                       CALC_K (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
+               }
+               for ( i = 0; i < 32; i += 2 ) {
+                       CALC_K (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
+               }
        }
 
        return 0;
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to