Hi Andy!
I hope I addressed all your wishes w.r.t. the PadLock engine. Attached
is the interdiff since the last version, on my PadLock page is the full
patch (http://www.logix.cz/michal/devel/padlock).
Most recent changes are:
- Rewritten the aligner to use only a fixed-size buffer on the stack
instead of malloc()ed space.
- Control word, IV and KEY are now in ctx->cipher_data
- Extended key is computed in place.
- Using padlock_bswapl() instead of htonl().
Do you now like it more? :-)
Michal Ludvig
--
* A mouse is a device used to point at the xterm you want to type in.
* Personal homepage - http://www.logix.cz/michal
Index: crypto/engine/eng_padlock.c
===================================================================
--- crypto/engine/eng_padlock.c.orig
+++ crypto/engine/eng_padlock.c
@@ -65,8 +65,6 @@
#include <string.h>
#include <inttypes.h>
-#include <netinet/in.h> /* we need htonl() */
-
#include <openssl/crypto.h>
#include <openssl/dso.h>
#include <openssl/engine.h>
@@ -323,14 +321,10 @@
sizeof(padlock_cipher_nids[0]));
/* Function prototypes ... */
-static int padlock_aes_init_key_128(EVP_CIPHER_CTX *ctx, const unsigned char *key,
- const unsigned char *iv, int enc);
-static int padlock_aes_init_key_192(EVP_CIPHER_CTX *ctx, const unsigned char *key,
- const unsigned char *iv, int enc);
-static int padlock_aes_init_key_256(EVP_CIPHER_CTX *ctx, const unsigned char *key,
- const unsigned char *iv, int enc);
+static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ const unsigned char *iv, int enc);
static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
- const unsigned char *in, unsigned int inl);
+ const unsigned char *in, unsigned int nbytes);
/* Some AES-related constants */
#define AES_BLOCK_SIZE 16
@@ -341,14 +335,31 @@
#define AES_KEY_WORDS (4 * (AES_MAXNR + 1))
#define AES_KEY_BYTES (AES_KEY_WORDS * 4)
+/* Control word. */
+union cword {
+ uint32_t cword[4];
+ struct {
+ int rounds:4;
+ int algo:3;
+ int keygen:1;
+ int interm:1;
+ int encdec:1;
+ int ksize:2;
+ } b;
+};
+
/* Here we store the plain key for AES128
and the extended key for AES192/AES256 */
-struct padlock_aes_key
+struct padlock_cipher_data
{
- uint32_t aes_key[AES_KEY_WORDS];
- int extended;
+ uint8_t iv[AES_BLOCK_SIZE]; /* Initialization vector */
+ union cword cword; /* Control word */
+ AES_KEY ks; /* Encryption key */
+ uint8_t *key; /* Encryption key pointer */
};
+#define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)(ctx->cipher_data +
((0x10 - ((size_t)(ctx->cipher_data) & 0x0F)) & 0x0F)))
+
/* Declaring so many ciphers by hand would be a pain.
Instead introduce a bit of preprocessor magic :-) */
#define DECLARE_AES_EVP(ksize,lmode,umode) \
@@ -358,10 +369,10 @@
AES_KEY_SIZE_##ksize, \
AES_BLOCK_SIZE, \
0 | EVP_CIPH_##umode##_MODE, \
- padlock_aes_init_key_##ksize, \
+ padlock_aes_init_key, \
padlock_aes_cipher, \
NULL, \
- sizeof(struct padlock_aes_key), \
+ sizeof(struct padlock_cipher_data) + 16, \
EVP_CIPHER_set_asn1_iv, \
EVP_CIPHER_get_asn1_iv, \
NULL, \
@@ -442,100 +453,72 @@
return 1;
}
-/* Generate an extended AES key in software. Needed for AES192/AES256 */
-static int
-padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
- const unsigned char *iv, int enc,
- int key_len)
+/* Our own htonl()/ntohl() */
+static inline void
+padlock_bswapl(uint32_t *arg)
{
- AES_KEY ks;
- struct padlock_aes_key *tmp_aes_key = NULL;
- int i;
-
- tmp_aes_key = (struct padlock_aes_key *) (ctx->cipher_data);
- memset(tmp_aes_key, 0, sizeof(struct padlock_aes_key));
- if (key) {
- if (enc)
- AES_set_encrypt_key(key, key_len, &ks);
- else
- AES_set_decrypt_key(key, key_len, &ks);
-
- /* OpenSSL internal functions use byte-swapped extended key. */
- for (i = 0; i < AES_KEY_WORDS; i++)
- tmp_aes_key->aes_key[i] = htonl(ks.rd_key[i]);
-
- tmp_aes_key->extended = 1;
- }
-
- return 1;
+ asm volatile ("bswapl %0" : "+r"(*arg));
}
-/* PadLock can generate an extended key for AES128 in hardware */
+/* Prepare the encryption key for PadLock usage */
static int
-padlock_aes_init_key_128(EVP_CIPHER_CTX *ctx, const unsigned char *key,
- const unsigned char *iv, int enc)
+padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ const unsigned char *iv, int enc)
{
- struct padlock_aes_key *tmp_aes_key = NULL;
+ struct padlock_cipher_data *cdata;
+ int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
+ int i;
- tmp_aes_key = (struct padlock_aes_key *) (ctx->cipher_data);
- memset(tmp_aes_key, 0, sizeof(struct padlock_aes_key));
+ cdata = ALIGNED_CIPHER_DATA(ctx);
+ memset(cdata, 0, sizeof(struct padlock_cipher_data));
+
+ /* Prepare Control word. */
+ cdata->cword.b.encdec = (ctx->encrypt == 0);
+ cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
+ cdata->cword.b.ksize = (key_len - 128) / 64;
+
+ cdata->key = (uint8_t *)(cdata->ks.rd_key);
+
if (key) {
- memcpy (tmp_aes_key->aes_key, key, AES_KEY_SIZE_128);
- tmp_aes_key->extended = 0;
+ switch(key_len) {
+ case 128:
+ /* PadLock can generate an extended key for
+ AES128 in hardware */
+ memcpy (cdata->key, key, AES_KEY_SIZE_128);
+ cdata->cword.b.keygen = 0;
+ break;
+
+ case 192:
+ case 256:
+ /* Generate an extended AES key in software.
+ Needed for AES192/AES256 */
+ if (enc)
+ AES_set_encrypt_key(key, key_len, &cdata->ks);
+ else
+ AES_set_decrypt_key(key, key_len, &cdata->ks);
+
+ /* OpenSSL internal functions use byte-swapped
extended key. */
+ for (i = 0; i < AES_KEY_WORDS; i++)
+ padlock_bswapl((uint32_t
*)&(cdata->ks.rd_key[i]));
+
+ cdata->cword.b.keygen = 1;
+ break;
+
+ default:
+ /* ERROR */
+ return 0;
+ }
}
return 1;
}
-static int
-padlock_aes_init_key_192(EVP_CIPHER_CTX *ctx, const unsigned char *key,
- const unsigned char *iv, int enc)
-{
- return padlock_aes_init_key(ctx, key, iv, enc, 192);
-}
-
-static int
-padlock_aes_init_key_256(EVP_CIPHER_CTX *ctx, const unsigned char *key,
- const unsigned char *iv, int enc)
-{
- return padlock_aes_init_key(ctx, key, iv, enc, 256);
-}
-
-/* Data for the VIA PadLock *must* be aligned on 16-Bytes boundaries! */
-static void *
-padlock_aligned_malloc(size_t size, size_t alignment, void **index)
-{
- char *ptr;
-
- ptr = malloc(size + alignment);
- *index = ptr;
- if (alignment > 1 && ((long)ptr & (alignment - 1))) {
- ptr += alignment - ((long)ptr & (alignment - 1));
- }
-
- return ptr;
-}
-
-/* Control word. */
-union cword {
- uint32_t cword[4];
- struct {
- int rounds:4;
- int algo:3;
- int keygen:1;
- int interm:1;
- int encdec:1;
- int ksize:2;
- } b;
-};
-
/* Template for all modes */
#define PADLOCK_XCRYPT_ASM(name,opcode) \
static inline void name(uint8_t *input, uint8_t *output, uint8_t *key, \
uint8_t **iv, void *control_word, uint32_t count) \
{ \
- asm volatile ("pushfl; popfl\n" \
- "pushl %%ebx\n" \
+ asm volatile ("pushl %%ebx\n" \
"movl %%eax, %%ebx\n" \
"movl %0, %%eax\n" \
"movl (%%eax), %%eax\n" \
@@ -555,126 +538,112 @@
/* Re-align the arguments to 16-Bytes boundaries and run the
encryption function itself. This function is not AES-specific. */
-static inline void
-padlock_aligner(uint8_t *out_arg, const uint8_t *in_arg, uint8_t *iv_arg,
- void *key, union cword *cword, size_t nbytes,
- size_t blocksize, int mode)
-{
- /* Don't blindly modify this structure - the items must
- be 16-Bytes aligned! */
- struct padlock_xcrypt_data {
- uint8_t iv[2*blocksize]; /* Initialization vector */
+static int
+padlock_aes_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out_arg, const uint8_t *in_arg,
+ unsigned int nbytes)
+{
+ struct padlock_realign_buffer {
+ #define REALIGN_SIZE 4096 /* Must be a multiple of 16! */
+ uint8_t buf[REALIGN_SIZE]; /* Buffer for data realignmentation */
};
+ struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx);
uint8_t *in, *out, *iv;
- void *index = NULL;
- char bigbuf[sizeof(struct padlock_xcrypt_data) + 16];
- struct padlock_xcrypt_data *data;
+ char bigbuf[sizeof(struct padlock_realign_buffer) + 16];
+ struct padlock_realign_buffer *realign;
+ int in_arg_aligned, out_arg_aligned, realign_in_loop = 0;
+ size_t realigned_bytes, blocksize = AES_BLOCK_SIZE;
- memset(bigbuf, 0, sizeof (bigbuf));
+ /* Force key reload.
+ TODO: ... only if the context is different from the last one. */
+ asm volatile ("pushf; popf");
- /* Place 'data' at the first 16-Bytes aligned address in 'bigbuf' */
- if (((long)bigbuf) & 0x0F)
- data = (void*)(bigbuf + 16 - ((long)bigbuf & 0x0F));
- else
- data = (void*)bigbuf;
+ /* Place 'realign' at the first 16-Bytes aligned address in 'bigbuf' */
+ realign = (void*)(bigbuf + ((16 - ((size_t)bigbuf & 0x0F)) & 0x0F));
/* Always make a local copy of IV - xcrypt may change it! */
- iv = data->iv;
- if (iv_arg)
- memcpy(iv, iv_arg, blocksize);
-
- /* Align 'in_arg' */
- if (((long)in_arg) & 0x0F) {
- in = padlock_aligned_malloc(nbytes, 16, &index);
- memcpy(in, in_arg, nbytes);
- }
- else
- in = (uint8_t*)in_arg;
+ iv = cdata->iv;
+ if (ctx->iv)
+ memcpy(iv, ctx->iv, blocksize);
+
+ /* Set in/out buffers depending on their alignment. */
+ in_arg_aligned = !(((size_t)in_arg) & 0x0F);
+ out_arg_aligned = !(((size_t)out_arg) & 0x0F);
- /* Align 'out_arg' */
- if (((long)out_arg) & 0x0F) {
- if (index)
- out = in; /* xcrypt can work "in place" */
+ if (in_arg_aligned) {
+ in = (uint8_t*)in_arg;
+ if (out_arg_aligned)
+ out = out_arg;
else
- out = padlock_aligned_malloc(nbytes, 16, &index);
- }
- else
- out = out_arg;
-
- /* Run xcrypt for a requested mode */
- switch (mode) {
- case EVP_CIPH_ECB_MODE:
- padlock_xcrypt_ecb(in, out, key, &iv, cword, nbytes/blocksize);
- break;
-
- case EVP_CIPH_CBC_MODE:
- padlock_xcrypt_cbc(in, out, key, &iv, cword, nbytes/blocksize);
- break;
-
- case EVP_CIPH_CFB_MODE:
- padlock_xcrypt_cfb(in, out, key, &iv, cword, nbytes/blocksize);
- break;
-
- case EVP_CIPH_OFB_MODE:
- padlock_xcrypt_ofb(in, out, key, &iv, cword, nbytes/blocksize);
- break;
-
- default:
- break;
- }
-
- /* Copy the 16-Byte aligned output to the caller's buffer. */
- if (out != out_arg)
- memcpy(out_arg, out, nbytes);
+ out = in;
+ } else {
+ if (out_arg_aligned) {
+ memcpy(out_arg, in_arg, nbytes);
+ in = out_arg; /* xcrypt "in place" */
+ out = in;
+ } else {
+ /* Tough but most common case - nothing is
+ aligned. We will re-align the input data
+ by parts in a loop. Each part should be
+ small enough to fit in the L1 D-cache
+ (hope that it helps a bit :-) */
+ realign_in_loop = 1;
+ realigned_bytes = 0;
+ in = realign->buf;
+ out = realign->buf;
+ }
+ }
+
+ /* Main encryption loop */
+ do {
+ size_t current_nbytes;
+ /* Realign the data if needed */
+ if (realign_in_loop) {
+ current_nbytes = nbytes < REALIGN_SIZE ? nbytes : REALIGN_SIZE;
+ memcpy(in, in_arg + realigned_bytes, current_nbytes);
+ } else
+ current_nbytes = nbytes;
+
+ /* Run xcrypt for a requested mode */
+ switch (EVP_CIPHER_CTX_mode(ctx)) {
+ case EVP_CIPH_ECB_MODE:
+ padlock_xcrypt_ecb(in, out, cdata->key, &iv,
&cdata->cword, current_nbytes/blocksize);
+ break;
+
+ case EVP_CIPH_CBC_MODE:
+ padlock_xcrypt_cbc(in, out, cdata->key, &iv,
&cdata->cword, current_nbytes/blocksize);
+ break;
+
+ case EVP_CIPH_CFB_MODE:
+ padlock_xcrypt_cfb(in, out, cdata->key, &iv,
&cdata->cword, current_nbytes/blocksize);
+ break;
+
+ case EVP_CIPH_OFB_MODE:
+ padlock_xcrypt_ofb(in, out, cdata->key, &iv,
&cdata->cword, current_nbytes/blocksize);
+ break;
+
+ default:
+ break;
+ }
+
+ /* Save the result */
+ if (realign_in_loop) {
+ memcpy(out_arg + realigned_bytes, out, current_nbytes);
+ nbytes -= current_nbytes;
+ realigned_bytes += current_nbytes;
+ } else {
+ /* Copy the 16-Byte aligned output to the caller's buffer. */
+ if (out != out_arg)
+ memcpy(out_arg, out, nbytes);
+ }
+ } while (realign_in_loop && nbytes);
+
+ /* Clean the realign buffer if it was used */
+ if (realign_in_loop)
+ memset(in, 0, REALIGN_SIZE);
/* Save modified IV */
- memcpy (iv_arg, iv, blocksize);
-
- if (index)
- free(index);
-
-}
-
-/* Entry point specific for AES cipher */
-static int
-padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
- const unsigned char *in, unsigned int in_len)
-{
- char bigbuf[sizeof(union cword) + 16];
- union cword *cword;
- void *key_arg, *key, *iv, *index_key=NULL;
- int mode = EVP_CIPHER_CTX_mode(ctx);
- int key_len = EVP_CIPHER_CTX_key_length(ctx);
- struct padlock_aes_key *aes_key = (struct padlock_aes_key *)
(ctx->cipher_data);
-
- /* Place 'data' at the first 16-Bytes aligned address in 'bigbuf'. */
- if (((long)bigbuf) & 0x0F)
- cword = (void*)(bigbuf + 16 - ((long)bigbuf & 0x0F));
- else
- cword = (void*)bigbuf;
-
- /* Prepare Control word. */
- memset (cword, 0, sizeof(union cword));
- cword->b.encdec = (ctx->encrypt == 0);
- cword->b.rounds = 10 + (key_len - 16) / 4;
- cword->b.ksize = (key_len - 16) / 8;
- cword->b.keygen = aes_key->extended;
-
- key_arg = aes_key->aes_key;
- if ((long)key_arg & 0x0f) {
- key = padlock_aligned_malloc(AES_KEY_BYTES, 16, &index_key);
- memcpy (key, key_arg, AES_KEY_BYTES);
- } else
- key = key_arg;
-
- iv = ctx->iv;
-
- padlock_aligner(out, in, iv, key, cword,
- in_len, AES_BLOCK_SIZE, mode);
-
- if (index_key)
- free(index_key);
+ memcpy (ctx->iv, iv, blocksize);
return 1;
}