--- aes_cfb.c.prev	2006-05-25 15:34:04.000000000 +1000
+++ aes_cfb.c	2006-05-26 16:56:38.000000000 +1000
@@ -113,6 +113,7 @@
 #include <assert.h>
 
 #include <openssl/aes.h>
+#include <openssl/bn.h>
 #include "aes_locl.h"
 #include "e_os.h"
 
@@ -121,38 +122,103 @@
  * 128bit block we have used is contained in *num;
  */
 
+#undef u64
+#if defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)
+#define u64 unsigned long
+#elif defined(THIRTY_TWO_BIT) && defined(BN_ULLONG)
+#define u64 BN_ULLONG
+#endif
+
+#if defined(u64)
+#define xor128(v1, v2, r) *(u64*)(r) = *(u64*)(v1) ^ *(u64*)(v2); \
+			  *(u64*)((r) + 8) = *(u64*)((v1) + 8) ^ *(u64*)((v2) + 8);
+
+#define move128(v, r) *(u64*)(r) = *(u64*)(v); *(u64*)((r) + 8) = *(u64*)((v) + 8);
+#else
+#define xor128(v1, v2, r) *((r) + 0x0) = *((v1) + 0x0) ^ *((v2) + 0x0); \
+			  *((r) + 0x1) = *((v1) + 0x1) ^ *((v2) + 0x1); \
+			  *((r) + 0x2) = *((v1) + 0x2) ^ *((v2) + 0x2); \
+			  *((r) + 0x3) = *((v1) + 0x3) ^ *((v2) + 0x3); \
+			  *((r) + 0x4) = *((v1) + 0x4) ^ *((v2) + 0x4); \
+			  *((r) + 0x5) = *((v1) + 0x5) ^ *((v2) + 0x5); \
+			  *((r) + 0x6) = *((v1) + 0x6) ^ *((v2) + 0x6); \
+			  *((r) + 0x7) = *((v1) + 0x7) ^ *((v2) + 0x7); \
+			  *((r) + 0x8) = *((v1) + 0x8) ^ *((v2) + 0x8); \
+			  *((r) + 0x9) = *((v1) + 0x9) ^ *((v2) + 0x9); \
+			  *((r) + 0xa) = *((v1) + 0xa) ^ *((v2) + 0xa); \
+			  *((r) + 0xb) = *((v1) + 0xb) ^ *((v2) + 0xb); \
+			  *((r) + 0xc) = *((v1) + 0xc) ^ *((v2) + 0xc); \
+			  *((r) + 0xd) = *((v1) + 0xd) ^ *((v2) + 0xd); \
+			  *((r) + 0xe) = *((v1) + 0xe) ^ *((v2) + 0xe); \
+			  *((r) + 0xf) = *((v1) + 0xf) ^ *((v2) + 0xf);
+
+#define move128(v, r) memcpy((r), (v), 16);
+#endif
+
 void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
 	const unsigned long length, const AES_KEY *key,
 	unsigned char *ivec, int *num, const int enc) {
 
 	unsigned int n;
-	unsigned long l = length;
+	unsigned long l = 0;
 	unsigned char c;
+	unsigned char cv[AES_BLOCK_SIZE];
 
 	assert(in && out && key && ivec && num);
 
 	n = *num;
 
 	if (enc) {
-		while (l--) {
-			if (n == 0) {
-				AES_encrypt(ivec, ivec, key);
+		if (n) {
+			for (; l < length; l++) {
+				ivec[n] = out[l] = ivec[n] ^ in[l];
+				if(!(n = (n + 1) % AES_BLOCK_SIZE)) {
+					l++;
+					break;
+				}
 			}
-			ivec[n] = *(out++) = *(in++) ^ ivec[n];
-			n = (n+1) % AES_BLOCK_SIZE;
 		}
+
+		for (; l + AES_BLOCK_SIZE <= length; l += AES_BLOCK_SIZE) {
+			AES_encrypt(ivec, ivec, key);
+			xor128(ivec, in + l, out + l);
+			move128(out + l, ivec);
+		}
+
+		if((length - l)) AES_encrypt(ivec, ivec, key);
+		for (; l < length; l++) {
+			ivec[n] = out[l] = ivec[n] ^ in[l];
+			n++;
+		}
+
 	} else {
-		while (l--) {
-			if (n == 0) {
-				AES_encrypt(ivec, ivec, key);
+		if (n) {
+			for (; l < length; l++) {
+				c = in[l];
+				out[l] = ivec[n] ^ in[l];
+				ivec[n] = c;
+				if(!(n = (n + 1) % AES_BLOCK_SIZE)) {
+					l++;
+					break;
+				}
 			}
-			c = *(in);
-			*(out++) = *(in++) ^ ivec[n];
+		}
+
+		for (; l + AES_BLOCK_SIZE <= length; l += AES_BLOCK_SIZE) {
+			AES_encrypt(ivec, ivec, key);
+			move128(in + l, cv);
+			xor128(ivec, in + l, out + l);
+			move128(cv, ivec);
+		}
+
+		if((length - l)) AES_encrypt(ivec, ivec, key);
+		for (; l < length; l++) {
+			c = in[l];
+			out[l] = ivec[n] ^ in[l];
 			ivec[n] = c;
-			n = (n+1) % AES_BLOCK_SIZE;
+			n++;
 		}
 	}
-
 	*num=n;
 }
 
