Hi Herbert and others,

attached is a patch with support for VIA C7 crypto engine providing
SHA1/SHA256 digests. It compiles into a new module padlock-sha.ko.

Currently it allocates 1 page for its buffer and if there are more data
to be hashed it falls back to software SHA implementation. By default it
requests sha1-generic and sha256-generic modules for fallbacks. Patch
that adds these aliases to sha1.ko and sha256.ko is at
http://www.logix.cz/michal/devel/padlock/kernel-2.6.17-aliases.diff

I also use some accessors as discussed earlier on the linux-crypto list:
http://www.logix.cz/michal/devel/padlock/kernel-2.6.17-accessors.diff

The patch is based on cryptodev-2.6 GIT tree but you'll need to fetch
commit ID 224f611c1639cb6c134a934dae7f7b9f0ac3b540 from Linus' tree to
compile it (or #if 0/#endif the two checks for cpu_has_phe(_enabled) in
padlock_init() function).

Here are some benchmarks from tcrypt.ko (mode=303 for sha1 and mode=304
for sha256):

SHA1
 Block size   Software          PadLock
   16 bytes:  272 cycles/byte   43 cycles/byte
   64 bytes:  126 cycles/byte   15 cycles/byte
  256 bytes:   74 cycles/byte    7 cycles/byte
 1024 bytes:   61 cycles/byte    5 cycles/byte
 2048 bytes:   59 cycles/byte    4 cycles/byte
 4096 bytes:   58 cycles/byte    4 cycles/byte
 8192 bytes:   58 cycles/byte   58 cycles/byte

SHA256
 Block size   Software          PadLock
   16 bytes:  311 cycles/byte   48 cycles/byte
   64 bytes:  144 cycles/byte   16 cycles/byte
  256 bytes:   86 cycles/byte    7 cycles/byte
 1024 bytes:   72 cycles/byte    5 cycles/byte
 2048 bytes:   70 cycles/byte    5 cycles/byte
 4096 bytes:   68 cycles/byte    4 cycles/byte
 8192 bytes:   68 cycles/byte   70 cycles/byte


For 8k pages it falls back to software, so the significant slowdown. All
results are at
http://www.logix.cz/michal/devel/padlock/kernel-2.6.17-results.txt

Note - to compile this patch on vanilla 2.6.17 and 2.6.17 please apply
http://www.logix.cz/michal/devel/padlock/kernel-2.6.16-padlock-prereq.diff
first (it contains all the above mentioned diffs as well).

The attached patch is also available at
http://www.logix.cz/michal/devel/padlock/kernel-padlock-sha.diff
(just in case it gets wrapped in the email).

Please comment :-)

Michal Ludvig

---

Support for SHA1 / SHA256 algorithms in VIA C7 processors.

Signed-off-by: Michal Ludvig <[EMAIL PROTECTED]>

Index: linux-2.6.16.13-xenU/drivers/crypto/padlock-sha.c
===================================================================
--- /dev/null
+++ linux-2.6.16.13-xenU/drivers/crypto/padlock-sha.c
@@ -0,0 +1,366 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for VIA PadLock hardware crypto engine.
+ *
+ * Copyright (c) 2006  Michal Ludvig <[EMAIL PROTECTED]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include <asm/byteorder.h>
+#include "padlock.h"
+
+#define PADLOCK_CRA_PRIORITY   300
+
+#define SHA1_DEFAULT_FALLBACK  "sha1-generic"
+#define SHA1_DIGEST_SIZE        20
+#define SHA1_HMAC_BLOCK_SIZE    64
+
+#define SHA256_DEFAULT_FALLBACK "sha256-generic"
+#define SHA256_DIGEST_SIZE      32
+#define SHA256_HMAC_BLOCK_SIZE  64
+
+static char *sha1_fallback = SHA1_DEFAULT_FALLBACK;
+static char *sha256_fallback = SHA256_DEFAULT_FALLBACK;
+
+module_param(sha1_fallback, charp, 0444);
+module_param(sha256_fallback, charp, 0444);
+
+MODULE_PARM_DESC(sha1_fallback, "Fallback driver for SHA1. Default is " 
SHA1_DEFAULT_FALLBACK);
+MODULE_PARM_DESC(sha256_fallback, "Fallback driver for SHA256. Default is " 
SHA256_DEFAULT_FALLBACK);
+
+struct padlock_sha_ctx {
+       char            *data;
+       size_t          used;
+       size_t          data_len;
+       int             bypass;
+       void (*f_sha_padlock)(const char *in, char *out, int count);
+       const char      *fallback_driver_name;
+       struct crypto_tfm *fallback_tfm;
+};
+
+#define CTX(tfm)       ((struct padlock_sha_ctx*)(crypto_tfm_ctx(tfm)))
+
+/* We'll need aligned address on the stack */
+#define NEAREST_ALIGNED(ptr) ((unsigned char *)(ptr) + \
+       ((0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F))
+
+static struct crypto_tfm *tfm_sha1, *tfm_sha256;
+static struct crypto_alg sha1_alg, sha256_alg;
+
+static void padlock_sha_bypass(struct crypto_tfm *tfm)
+{
+       if (CTX(tfm)->bypass)
+               return;
+
+       /* We're attempting to use ALG from a module of the same name,
+        * e.g. sha1 algo from sha1.ko. This could be more intelligent and
+        * allow e.g. sha1-i586 module to be used instead. Hmm, maybe later.
+        *
+        * BTW We assume we get a valid TFM. There is no error-path from
+        * digest.dia_init().
+        */
+       CTX(tfm)->fallback_tfm = 
crypto_alloc_tfm(CTX(tfm)->fallback_driver_name, 0);
+       BUG_ON(!CTX(tfm)->fallback_tfm);
+
+       crypto_digest_init(CTX(tfm)->fallback_tfm);
+       if (CTX(tfm)->data && CTX(tfm)->used) {
+               struct scatterlist sg[8];
+
+               sg_set_buf(&sg[0], CTX(tfm)->data, CTX(tfm)->used);
+               crypto_digest_update(CTX(tfm)->fallback_tfm, sg, 1);
+       }
+
+       CTX(tfm)->used = 0;
+       CTX(tfm)->bypass = 1;
+}
+
+static int padlock_cra_init(struct crypto_tfm *tfm)
+{
+       /* For now we'll allocate one page. Later
+        * this should be configurable. */
+       CTX(tfm)->data = (char*)__get_free_page(GFP_KERNEL);
+       if (!CTX(tfm)->data)
+               padlock_sha_bypass(tfm);
+       else
+               CTX(tfm)->data_len = PAGE_SIZE;
+
+       /* We always succeed ;-) */
+       return 0;
+}
+
+static void padlock_cra_exit(struct crypto_tfm *tfm)
+{
+       if (CTX(tfm)->data) {
+               free_page((unsigned long)(CTX(tfm)->data));
+               CTX(tfm)->data = NULL;
+       }
+}
+
+static void padlock_sha_init(struct crypto_tfm *tfm)
+{
+       CTX(tfm)->used = 0;
+       CTX(tfm)->bypass = 0;
+}
+
+static void padlock_sha_update(struct crypto_tfm *tfm, const uint8_t *data, 
unsigned int length)
+{
+       if (unlikely(!CTX(tfm)->bypass && (CTX(tfm)->used + length > 
CTX(tfm)->data_len)))
+               padlock_sha_bypass(tfm);
+
+       if (unlikely(CTX(tfm)->bypass)) {
+               struct scatterlist sg[8];
+               BUG_ON(!CTX(tfm)->fallback_tfm);
+               sg_set_buf(&sg[0], data, length);
+               crypto_digest_update(CTX(tfm)->fallback_tfm, sg, 1);
+               goto out_unlock;
+       }
+
+       memcpy(CTX(tfm)->data + CTX(tfm)->used, data, length);
+       CTX(tfm)->used += length;
+
+out_unlock:
+       return;
+}
+
+static inline void
+padlock_htonl_block(uint32_t *data, size_t count)
+{
+       while (count--) {
+               asm volatile ("bswapl %0" : "+r"(*data));
+               data++;
+       }
+}
+
+void padlock_do_sha1(const char *in, char *out, int count)
+{
+       /* We can't store directly to *out as it
+        * doesn't have to be aligned. But who cares,
+        * it's only a few bytes... */
+       char buf[128+16];
+       char *output = NEAREST_ALIGNED(buf);
+
+       ((uint32_t*)output)[0] = 0x67452301;
+       ((uint32_t*)output)[1] = 0xEFCDAB89;
+       ((uint32_t*)output)[2] = 0x98BADCFE;
+       ((uint32_t*)output)[3] = 0x10325476;
+       ((uint32_t*)output)[4] = 0xC3D2E1F0;
+
+       asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"       /* rep xsha1 */
+                     : "+S"(in), "+D"(output)
+                     : "c"(count), "a"(0));
+
+       memcpy(out, output, 5 * sizeof(uint32_t));
+
+       padlock_htonl_block((uint32_t*)out, 5);
+}
+
+void padlock_do_sha256(const char *in, char *out, int count)
+{
+       /* We can't store directly to *out as it
+        * doesn't have to be aligned. But who cares,
+        * it's only a few bytes... */
+       char buf[128+16];
+       char *output = NEAREST_ALIGNED(buf);
+
+       ((uint32_t*)output)[0] = 0x6A09E667;
+       ((uint32_t*)output)[1] = 0xBB67AE85;
+       ((uint32_t*)output)[2] = 0x3C6EF372;
+       ((uint32_t*)output)[3] = 0xA54FF53A;
+       ((uint32_t*)output)[4] = 0x510E527F;
+       ((uint32_t*)output)[5] = 0x9B05688C;
+       ((uint32_t*)output)[6] = 0x1F83D9AB;
+       ((uint32_t*)output)[7] = 0x5BE0CD19;
+
+       asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"       /* rep xsha256 */
+                     : "+S"(in), "+D"(output)
+                     : "c"(count), "a"(0));
+
+       memcpy(out, output, 8 * sizeof(uint32_t));
+
+       padlock_htonl_block((uint32_t*)out, 8);
+}
+
+static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out)
+{
+       if (unlikely(CTX(tfm)->bypass)) {
+               BUG_ON(!CTX(tfm)->fallback_tfm);
+               crypto_digest_final(CTX(tfm)->fallback_tfm, out);
+               crypto_free_tfm(CTX(tfm)->fallback_tfm);
+               CTX(tfm)->fallback_tfm = NULL;
+               CTX(tfm)->bypass = 0;
+               return;
+       }
+
+       /* Pass the input buffer to PadLock microcode... */
+       CTX(tfm)->f_sha_padlock(CTX(tfm)->data, out, CTX(tfm)->used);
+
+       CTX(tfm)->used = 0;
+}
+
+static void padlock_sha1_init(struct crypto_tfm *tfm)
+{
+       CTX(tfm)->f_sha_padlock = padlock_do_sha1;
+       CTX(tfm)->fallback_driver_name = crypto_tfm_alg_driver_name(tfm_sha1);
+
+       padlock_sha_init(tfm);
+}
+
+static void padlock_sha256_init(struct crypto_tfm *tfm)
+{
+       CTX(tfm)->f_sha_padlock = padlock_do_sha256;
+       CTX(tfm)->fallback_driver_name = crypto_tfm_alg_driver_name(tfm_sha256);
+
+       padlock_sha_init(tfm);
+}
+
+static struct crypto_alg sha1_alg = {
+       .cra_name               =       "sha1",
+       .cra_driver_name        =       "sha1-padlock",
+       .cra_priority           =       PADLOCK_CRA_PRIORITY,
+       .cra_flags              =       CRYPTO_ALG_TYPE_DIGEST,
+       .cra_blocksize          =       SHA1_HMAC_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct padlock_sha_ctx),
+       .cra_alignmask          =       PADLOCK_ALIGNMENT - 1,
+       .cra_module             =       THIS_MODULE,
+       .cra_list               =       LIST_HEAD_INIT(sha1_alg.cra_list),
+       .cra_init               =       padlock_cra_init,
+       .cra_exit               =       padlock_cra_exit,
+       .cra_u                  =       {
+               .digest = {
+                       .dia_digestsize =       SHA1_DIGEST_SIZE,
+                       .dia_init       =       padlock_sha1_init,
+                       .dia_update     =       padlock_sha_update,
+                       .dia_final      =       padlock_sha_final,
+               }
+       }
+};
+
+static struct crypto_alg sha256_alg = {
+       .cra_name               =       "sha256",
+       .cra_driver_name        =       "sha256-padlock",
+       .cra_priority           =       PADLOCK_CRA_PRIORITY,
+       .cra_flags              =       CRYPTO_ALG_TYPE_DIGEST,
+       .cra_blocksize          =       SHA256_HMAC_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct padlock_sha_ctx),
+       .cra_alignmask          =       PADLOCK_ALIGNMENT - 1,
+       .cra_module             =       THIS_MODULE,
+       .cra_list               =       LIST_HEAD_INIT(sha256_alg.cra_list),
+       .cra_init               =       padlock_cra_init,
+       .cra_exit               =       padlock_cra_exit,
+       .cra_u                  =       {
+               .digest = {
+                       .dia_digestsize =       SHA256_DIGEST_SIZE,
+                       .dia_init       =       padlock_sha256_init,
+                       .dia_update     =       padlock_sha_update,
+                       .dia_final      =       padlock_sha_final,
+               }
+       }
+};
+
+int __init padlock_init_sha(void)
+{
+       int rc = -ENOENT;
+
+       /* We'll hold one TFM for each fallback
+        * to ensure the modules are loaded and available. */
+       tfm_sha1 = crypto_alloc_tfm(sha1_fallback, 0);
+       if (!tfm_sha1) {
+               printk(KERN_WARNING PFX "Couldn't load fallback module for 
'%s'. Tried '%s'.\n",
+                      sha1_alg.cra_name, sha1_fallback);
+               goto out;
+       }
+       printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", 
sha1_alg.cra_name,
+              crypto_tfm_alg_driver_name(tfm_sha1), 
crypto_tfm_alg_priority(tfm_sha1));
+
+       tfm_sha256 = crypto_alloc_tfm(sha256_fallback, 0);
+       if (!tfm_sha256) {
+               printk(KERN_WARNING PFX "Couldn't load fallback module for 
'%s'. Tried '%s'.\n",
+                      sha256_alg.cra_name, sha256_fallback);
+               goto out_free1;
+       }
+       printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", 
sha256_alg.cra_name,
+              crypto_tfm_alg_driver_name(tfm_sha256), 
crypto_tfm_alg_priority(tfm_sha256));
+
+       rc = crypto_register_alg(&sha1_alg);
+       if (rc)
+               goto out_free256;
+
+       rc = crypto_register_alg(&sha256_alg);
+       if (rc)
+               goto out_unreg1;
+
+       printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 
algorithms.\n");
+
+       return 0;
+
+out_unreg1:
+       crypto_unregister_alg(&sha1_alg);
+out_free256:
+       crypto_free_tfm(tfm_sha256);
+out_free1:
+       crypto_free_tfm(tfm_sha1);
+out:
+       return -ENOENT;
+}
+
+void __exit padlock_fini_sha(void)
+{
+       crypto_free_tfm(tfm_sha1);
+       crypto_free_tfm(tfm_sha256);
+       crypto_unregister_alg(&sha1_alg);
+       crypto_unregister_alg(&sha256_alg);
+}
+
+static int __init
+padlock_init(void)
+{
+       int ret = 0;
+
+       if (!cpu_has_phe) {
+               printk(KERN_ERR PFX "VIA PadLock Hash Engine not detected.\n");
+               return -ENODEV;
+       }
+
+       if (!cpu_has_phe_enabled) {
+               printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. 
Hmm, strange...\n");
+               return -ENODEV;
+       }
+
+       if ((ret = padlock_init_sha())) {
+               printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization 
failed.\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static void __exit
+padlock_fini(void)
+{
+       padlock_fini_sha();
+}
+
+module_init(padlock_init);
+module_exit(padlock_fini);
+
+MODULE_DESCRIPTION("VIA PadLock Hash Engine support.");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Michal Ludvig");
+
+MODULE_ALIAS("sha1-padlock");
+MODULE_ALIAS("sha256-padlock");
Index: linux-2.6.16.13-xenU/drivers/crypto/Kconfig
===================================================================
--- linux-2.6.16.13-xenU.orig/drivers/crypto/Kconfig
+++ linux-2.6.16.13-xenU/drivers/crypto/Kconfig
@@ -20,4 +20,13 @@ config CRYPTO_DEV_PADLOCK_AES
        help
          Use VIA PadLock for AES algorithm.
 
+config CRYPTO_DEV_PADLOCK_SHA
+       tristate "Support for SHA1/SHA256 in VIA PadLock"
+       depends on CRYPTO_DEV_PADLOCK
+       select CRYPTO_SHA1
+       select CRYPTO_SHA256
+       default m
+       help
+         Use VIA PadLock for SHA1/SHA256 algorithms.
+
 endmenu
Index: linux-2.6.16.13-xenU/drivers/crypto/Makefile
===================================================================
--- linux-2.6.16.13-xenU.orig/drivers/crypto/Makefile
+++ linux-2.6.16.13-xenU/drivers/crypto/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK) += padlock.o
 
 padlock-objs-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
+obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
 
 padlock-objs := padlock-generic.o $(padlock-objs-y)
 
Index: linux-2.6.16.13-xenU/drivers/crypto/padlock-generic.c
===================================================================
--- linux-2.6.16.13-xenU.orig/drivers/crypto/padlock-generic.c
+++ linux-2.6.16.13-xenU/drivers/crypto/padlock-generic.c
@@ -3,7 +3,8 @@
  *
  * Support for VIA PadLock hardware crypto engine.
  *
- * Copyright (c) 2004  Michal Ludvig <[EMAIL PROTECTED]>
+ * Copyright (c) 2004-2006  Michal Ludvig <[EMAIL PROTECTED]>
+ *                          http://www.logix.cz/michal/devel/padlock
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,7 +24,7 @@ static int __init
 padlock_init(void)
 {
        int ret = -ENOSYS;
-       
+
        if (!cpu_has_xcrypt) {
                printk(KERN_ERR PFX "VIA PadLock not detected.\n");
                return -ENODEV;
Index: linux-2.6.16.13-xenU/drivers/crypto/padlock.h
===================================================================
--- linux-2.6.16.13-xenU.orig/drivers/crypto/padlock.h
+++ linux-2.6.16.13-xenU/drivers/crypto/padlock.h
@@ -33,4 +33,9 @@ int padlock_init_aes(void);
 void padlock_fini_aes(void);
 #endif
 
+#ifdef CONFIG_CRYPTO_DEV_PADLOCK_SHA
+int padlock_init_sha(void);
+void padlock_fini_sha(void);
+#endif
+
 #endif /* _CRYPTO_PADLOCK_H */

--


-
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to