Hello, guys! > > The other angle that comes to mind is if there are machines where "unsigned > > int" is 64 bits. Seems like Cray's are that way. > > Will the code still work on such a machine? Find attached patch relative to openssl-SNAP-19990421 snapshot. This is my "final" proposal:-) Well, I call it "final" (i.e. in quotes) because I still have some questions and comments. 1. crypto/sha/sha_locl.h says: #if defined(WIN32) #define ROTATE(a,n) _lrotl(a,n) Isn't _lrotl Microsoft C specific? If it is, whouldn't #ifdef MSC (or whatever they have predefined) be more appropriate? 2. I've added some GNU C machine code templates and guarded 'em with #ifdef(__GNUC__). I'm not very good at gcc releases and wonder if anybody has a clue if it should be shielded with extra && __GNUC__>=2 or something? 3. In Intel part of the mentioned section you'll find template with bswap instruction which is not implemented in 386. For this reason I've *preliminary* shielded it with #ifdef I386_ONLY. I suggest that this macro (or *whatever* you find more appropriate) should get defined through opensslconf.h whenever they run './Configure 386 ...'. 4. You'll also find ROTATE for PowerPC in this __GNUC__ section. In general (as far as I know) the hired instruction is supported even by Power CPUs. So that what I'd like to figure out which predefined macros are there at none-PowerPC boxes so that they could be added too... And finally an implementation comment. Original version did (in little-endian case:-) memcpy ("here","there","much") and then Endian_Reverse32("here") "much" times. As motto of modern computing is "memory i/o is bad, calculations are good" I've replaced memcpy ("here",...)/Endian_Reverse32("here") with { tmp="there"; Endian_Reverse32(tmp); "here"=tmp; } performed "much" times thus reducing number of writes. But the sequence is initiated only if "there" is appropriately aligned (as RISCs will suffer if it's not). If it's not aligned, then the code falls through to the portion when data is picked byte by byte and collected to longs with shifts and ORs. Cheers. Andy.
*** ./crypto/sha/sha.h.orig Tue Apr 20 19:00:12 1999 --- ./crypto/sha/sha.h Thu Apr 29 16:23:49 1999 *************** *** 63,81 **** extern "C" { #endif ! #define SHA_CBLOCK 64 ! #define SHA_LBLOCK 16 ! #define SHA_BLOCK 16 ! #define SHA_LAST_BLOCK 56 ! #define SHA_LENGTH_BLOCK 8 ! #define SHA_DIGEST_LENGTH 20 ! #ifdef WIN16 #define SHA_LONG unsigned long #else #define SHA_LONG unsigned int ! #endif typedef struct SHAstate_st { SHA_LONG h0,h1,h2,h3,h4; --- 63,91 ---- extern "C" { #endif ! /* ! * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! * ! SHA_LONG has to be at least 32 bits wide. If it's wider, then ! ! * ! SHA_LONG_LOG2 has to be defined along. ! ! * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! */ ! #if defined(WIN16) || defined(__LP32__) #define SHA_LONG unsigned long + #elif defined(_CRAY) || defined(__ILP64__) + #define SHA_LONG unsigned long + #define SHA_LONG_LOG2 3 #else #define SHA_LONG unsigned int ! #endif + #define SHA_LBLOCK 16 + #define SHA_CBLOCK (SHA_LBLOCK*4) /* SHA treats input data as a + * contiguous array of 32 bit + * wide big-endian values. */ + #define SHA_LAST_BLOCK (SHA_CBLOCK-8) + #define SHA_DIGEST_LENGTH 20 + typedef struct SHAstate_st { SHA_LONG h0,h1,h2,h3,h4; *** ./crypto/sha/sha_locl.h.orig Tue Apr 20 19:00:13 1999 --- ./crypto/sha/sha_locl.h Thu Apr 29 18:41:09 1999 *************** *** 158,187 **** *((c)++)=(unsigned char)(((l)>>16)&0xff), \ *((c)++)=(unsigned char)(((l)>>24)&0xff)) #undef ROTATE #if defined(WIN32) #define ROTATE(a,n) _lrotl(a,n) ! #else ! #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) #endif /* A nice byte order reversal from Wei Dai <[EMAIL PROTECTED]> */ ! #if defined(WIN32) /* 5 instructions with rotate instruction, else 9 */ #define Endian_Reverse32(a) \ { \ ! unsigned long l=(a); \ ! (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \ } #else /* 6 instructions with rotate instruction, else 8 */ #define Endian_Reverse32(a) \ { \ ! unsigned long l=(a); \ ! l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \ ! (a)=ROTATE(l,16L); \ } #endif /* As pointed out by Wei Dai <[EMAIL PROTECTED]>, F() below can be * simplified to the code in F_00_19. Wei attributes these optimisations --- 158,238 ---- *((c)++)=(unsigned char)(((l)>>16)&0xff), \ *((c)++)=(unsigned char)(((l)>>24)&0xff)) + #ifndef SHA_LONG_LOG2 + #define SHA_LONG_LOG2 2 /* default to 32 bits */ + #endif + #undef ROTATE + #undef Endian_Reverse32 #if defined(WIN32) #define ROTATE(a,n) _lrotl(a,n) ! #elif defined(__GNUC__) ! /* some inline assembler templates by <[EMAIL PROTECTED]> */ ! #if defined(__i386) ! #define ROTATE(a,n) ({ register unsigned int ret; \ ! asm ("roll %1,%0" \ ! : "=r"(ret) \ ! : "I"(n), "0"(a) \ ! : "cc"); \ ! ret; \ ! }) ! #ifndef I386_ONLY ! #define Endian_Reverse32(a) \ ! { register unsigned int l=(a); \ ! asm ("bswapl %0" \ ! : "=r"(l) : "0"(l)); \ ! (a)=l; \ ! } #endif + #elif defined(__powerpc) + #define ROTATE(a,n) ({ register unsigned int ret; \ + asm ("rlwinm %0,%1,%2,0,31" \ + : "=r"(ret) \ + : "r"(a), "I"(n)); \ + ret; \ + }) + /* I could write Endian_Reverse32 for PowerPC also, but PowerPC-based + * systems are big-endian so that the macro would never get engaged. + */ + #endif + #endif /* A nice byte order reversal from Wei Dai <[EMAIL PROTECTED]> */ ! #ifdef ROTATE ! #ifndef Endian_Reverse32 /* 5 instructions with rotate instruction, else 9 */ #define Endian_Reverse32(a) \ { \ ! unsigned long t=(a); \ ! (a)=((ROTATE(t,8)&0x00FF00FF)|(ROTATE((t&0x00FF00FF),24))); \ } + #endif #else + #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) + #ifndef Endian_Reverse32 /* 6 instructions with rotate instruction, else 8 */ #define Endian_Reverse32(a) \ { \ ! unsigned long t=(a); \ ! t=(((t>>8)&0x00FF00FF)|((t&0x00FF00FF)<<8)); \ ! (a)=ROTATE(t,16); \ } #endif + /* + * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|... + * It's rewritten as above for two reasons: + * - RISCs aren't good at long constants and have to explicitely + * compose 'em with several (well, usually 2) instructions in a + * register before performing the actual operation and (as you + * already realized:-) having same constant should inspire the + * compiler to permanently allocate the only register for it; + * - most modern CPUs have two ALUs, but usually only one has + * circuitry for shifts:-( this minor tweak inspires compiler + * to schedule shift instructions in a better way... + * + * <[EMAIL PROTECTED]> + */ + #endif /* As pointed out by Wei Dai <[EMAIL PROTECTED]>, F() below can be * simplified to the code in F_00_19. Wei attributes these optimisations *************** *** 195,207 **** #define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d))) #define F_60_79(b,c,d) F_20_39(b,c,d) - #ifdef SHA_0 #undef Xupdate #define Xupdate(a,i,ia,ib,ic,id) X[(i)&0x0f]=(a)=\ (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]); #endif #ifdef SHA_1 - #undef Xupdate #define Xupdate(a,i,ia,ib,ic,id) (a)=\ (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);\ X[(i)&0x0f]=(a)=ROTATE((a),1); --- 246,257 ---- #define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d))) #define F_60_79(b,c,d) F_20_39(b,c,d) #undef Xupdate + #ifdef SHA_0 #define Xupdate(a,i,ia,ib,ic,id) X[(i)&0x0f]=(a)=\ (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]); #endif #ifdef SHA_1 #define Xupdate(a,i,ia,ib,ic,id) (a)=\ (ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);\ X[(i)&0x0f]=(a)=ROTATE((a),1); *** ./crypto/sha/sha_dgst.c.orig Tue Apr 20 19:00:12 1999 --- ./crypto/sha/sha_dgst.c Thu Apr 29 17:52:26 1999 *************** *** 81,96 **** #define K_60_79 0xca62c1d6UL #ifndef NOPROTO ! void sha_block(SHA_CTX *c, register SHA_LONG *p, int num); #else ! void sha_block(); #endif ! #define M_c2nl c2nl ! #define M_p_c2nl p_c2nl ! #define M_c2nl_p c2nl_p ! #define M_p_c2nl_p p_c2nl_p ! #define M_nl2c nl2c void SHA_Init(SHA_CTX *c) { --- 81,104 ---- #define K_60_79 0xca62c1d6UL #ifndef NOPROTO ! static void sha_block(SHA_CTX *c, register SHA_LONG *p, int num); #else ! static void sha_block(); #endif ! #if !defined(B_ENDIAN) && defined(SHA_ASM) ! # define M_c2nl c2l ! # define M_p_c2nl p_c2l ! # define M_c2nl_p c2l_p ! # define M_p_c2nl_p p_c2l_p ! # define M_nl2c l2c ! #else ! # define M_c2nl c2nl ! # define M_p_c2nl p_c2nl ! # define M_c2nl_p c2nl_p ! # define M_p_c2nl_p p_c2nl_p ! # define M_nl2c nl2c ! #endif void SHA_Init(SHA_CTX *c) { *************** *** 136,142 **** } len-=(SHA_CBLOCK-c->num); ! sha_block(c,p,64); c->num=0; /* drop through and do the rest */ } --- 144,150 ---- } len-=(SHA_CBLOCK-c->num); ! sha_block(c,p,1); c->num=0; /* drop through and do the rest */ } *************** *** 173,179 **** * copies it to a local array. I should be able to do this for * the C version as well.... */ ! #if 1 #if defined(B_ENDIAN) || defined(SHA_ASM) if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0) { --- 181,187 ---- * copies it to a local array. I should be able to do this for * the C version as well.... */ ! #if SHA_LONG_LOG2==2 #if defined(B_ENDIAN) || defined(SHA_ASM) if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0) { *************** *** 180,187 **** sw=len/SHA_CBLOCK; if (sw) { - sw*=SHA_CBLOCK; sha_block(c,(SHA_LONG *)data,sw); data+=sw; len-=sw; } --- 188,195 ---- sw=len/SHA_CBLOCK; if (sw) { sha_block(c,(SHA_LONG *)data,sw); + sw*=SHA_CBLOCK; data+=sw; len-=sw; } *************** *** 193,227 **** p=c->data; while (len >= SHA_CBLOCK) { ! #if defined(B_ENDIAN) || defined(L_ENDIAN) if (p != (SHA_LONG *)data) memcpy(p,data,SHA_CBLOCK); data+=SHA_CBLOCK; ! # ifdef L_ENDIAN ! # ifndef SHA_ASM /* Will not happen */ ! for (sw=(SHA_LBLOCK/4); sw; sw--) { ! Endian_Reverse32(p[0]); ! Endian_Reverse32(p[1]); ! Endian_Reverse32(p[2]); ! Endian_Reverse32(p[3]); ! p+=4; } p=c->data; ! # endif ! # endif ! #else ! for (sw=(SHA_BLOCK/4); sw; sw--) { ! M_c2nl(data,l); *(p++)=l; ! M_c2nl(data,l); *(p++)=l; ! M_c2nl(data,l); *(p++)=l; ! M_c2nl(data,l); *(p++)=l; } p=c->data; ! #endif ! sha_block(c,p,64); len-=SHA_CBLOCK; } ec=(int)len; c->num=ec; --- 201,261 ---- p=c->data; while (len >= SHA_CBLOCK) { ! #if SHA_LONG_LOG2==2 ! #if defined(B_ENDIAN) || defined(SHA_ASM) ! #define SHA_NO_TAIL_CODE ! /* ! * Basically we get here only when data happens ! * to be unaligned. ! */ if (p != (SHA_LONG *)data) memcpy(p,data,SHA_CBLOCK); data+=SHA_CBLOCK; ! sha_block(c,p=c->data,1); ! len-=SHA_CBLOCK; ! #else /* little-endian */ ! #define BE_COPY(dst,src,i) { \ ! l = ((SHA_LONG *)src)[i]; \ ! Endian_Reverse32(l); \ ! dst[i] = l; \ ! } ! if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0) { ! for (sw=(SHA_LBLOCK/4); sw; sw--) ! { ! BE_COPY(p,data,0); ! BE_COPY(p,data,1); ! BE_COPY(p,data,2); ! BE_COPY(p,data,3); ! p+=4; ! data += 4*sizeof(SHA_LONG); ! } ! sha_block(c,p=c->data,1); ! len-=SHA_CBLOCK; ! continue; } + #endif + #endif + #ifndef SHA_NO_TAIL_CODE + /* + * In addition to "sizeof(SHA_LONG)!= 4" case the + * following code covers unaligned access cases on + * little-endian machines. + * <[EMAIL PROTECTED]> + */ p=c->data; ! for (sw=(SHA_LBLOCK/4); sw; sw--) { ! M_c2nl(data,l); p[0]=l; ! M_c2nl(data,l); p[1]=l; ! M_c2nl(data,l); p[2]=l; ! M_c2nl(data,l); p[3]=l; ! p+=4; } p=c->data; ! sha_block(c,p,1); len-=SHA_CBLOCK; + #endif } ec=(int)len; c->num=ec; *************** *** 236,261 **** void SHA_Transform(SHA_CTX *c, unsigned char *b) { ! SHA_LONG p[16]; ! #if !defined(B_ENDIAN) SHA_LONG *q; int i; - #endif ! #if defined(B_ENDIAN) || defined(L_ENDIAN) ! memcpy(p,b,64); ! #ifdef L_ENDIAN ! q=p; ! for (i=(SHA_LBLOCK/4); i; i--) { ! Endian_Reverse32(q[0]); ! Endian_Reverse32(q[1]); ! Endian_Reverse32(q[2]); ! Endian_Reverse32(q[3]); ! q+=4; } #endif ! #else q=p; for (i=(SHA_LBLOCK/4); i; i--) { --- 270,304 ---- void SHA_Transform(SHA_CTX *c, unsigned char *b) { ! SHA_LONG p[SHA_LBLOCK]; SHA_LONG *q; int i; ! #if SHA_LONG_LOG2==2 ! #if defined(B_ENDIAN) || defined(SHA_ASM) ! memcpy(p,b,SHA_CBLOCK); ! sha_block(c,p,1); ! return; ! #else ! if (((unsigned long)b%sizeof(SHA_LONG)) == 0) { ! q=p; ! for (i=(SHA_LBLOCK/4); i; i--) ! { ! unsigned long l; ! BE_COPY(q,b,0); /* BE_COPY was defined above */ ! BE_COPY(q,b,1); ! BE_COPY(q,b,2); ! BE_COPY(q,b,3); ! q+=4; ! b+=4*sizeof(SHA_LONG); ! } ! sha_block(c,p,1); ! return; } #endif ! #endif ! #ifndef SHA_NO_TAIL_CODE /* defined above, see comment */ q=p; for (i=(SHA_LBLOCK/4); i; i--) { *************** *** 265,278 **** c2nl(b,l); *(q++)=l; c2nl(b,l); *(q++)=l; } #endif - sha_block(c,p,64); } ! void sha_block(SHA_CTX *c, register SHA_LONG *W, int num) { register SHA_LONG A,B,C,D,E,T; ! SHA_LONG X[16]; A=c->h0; B=c->h1; --- 308,322 ---- c2nl(b,l); *(q++)=l; c2nl(b,l); *(q++)=l; } + sha_block(c,p,1); #endif } ! #ifndef SHA_ASM ! static void sha_block(SHA_CTX *c, register SHA_LONG *W, int num) { register SHA_LONG A,B,C,D,E,T; ! SHA_LONG X[SHA_LBLOCK]; A=c->h0; B=c->h1; *************** *** 372,379 **** c->h3=(c->h3+B)&0xffffffffL; c->h4=(c->h4+C)&0xffffffffL; ! num-=64; ! if (num <= 0) break; A=c->h0; B=c->h1; --- 416,422 ---- c->h3=(c->h3+B)&0xffffffffL; c->h4=(c->h4+C)&0xffffffffL; ! if (--num <= 0) break; A=c->h0; B=c->h1; *************** *** 381,389 **** D=c->h3; E=c->h4; ! W+=16; } } void SHA_Final(unsigned char *md, SHA_CTX *c) { --- 424,438 ---- D=c->h3; E=c->h4; ! W+=SHA_LBLOCK; /* Note! This can happen only when sizeof(SHA_LONG) ! * is 4. Whenever it's not the actual case this ! * function is never called with num larger than 1 ! * and we never advance down here. ! * <[EMAIL PROTECTED]> ! */ } } + #endif void SHA_Final(unsigned char *md, SHA_CTX *c) { *************** *** 409,415 **** { for (; i<SHA_LBLOCK; i++) p[i]=0; ! sha_block(c,p,64); i=0; } for (; i<(SHA_LBLOCK-2); i++) --- 458,464 ---- { for (; i<SHA_LBLOCK; i++) p[i]=0; ! sha_block(c,p,1); i=0; } for (; i<(SHA_LBLOCK-2); i++) *************** *** 416,422 **** p[i]=0; p[SHA_LBLOCK-2]=c->Nh; p[SHA_LBLOCK-1]=c->Nl; ! sha_block(c,p,64); cp=md; l=c->h0; nl2c(l,cp); l=c->h1; nl2c(l,cp); --- 465,477 ---- p[i]=0; p[SHA_LBLOCK-2]=c->Nh; p[SHA_LBLOCK-1]=c->Nl; ! #if SHA_LONG_LOG2==2 ! #if !defined(B_ENDIAN) && defined(SHA_ASM) ! Endian_Reverse32(p[SHA_LBLOCK-2]); ! Endian_Reverse32(p[SHA_LBLOCK-1]); ! #endif ! #endif ! sha_block(c,p,1); cp=md; l=c->h0; nl2c(l,cp); l=c->h1; nl2c(l,cp); *************** *** 424,432 **** l=c->h3; nl2c(l,cp); l=c->h4; nl2c(l,cp); - /* clear stuff, sha_block may be leaving some stuff on the stack - * but I'm not worried :-) */ c->num=0; ! /* memset((char *)&c,0,sizeof(c));*/ } --- 479,488 ---- l=c->h3; nl2c(l,cp); l=c->h4; nl2c(l,cp); c->num=0; ! /* sha_block may be leaving some stuff on the stack ! * but I'm not worried :-) ! memset((void *)c,0,sizeof(SHA_CTX)); ! */ } *** ./crypto/sha/sha1dgst.c.orig Wed Apr 21 01:00:18 1999 --- ./crypto/sha/sha1dgst.c Thu Apr 29 18:06:44 1999 *************** *** 83,103 **** #ifndef NOPROTO # ifdef SHA1_ASM void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num); ! # define sha1_block sha1_block_x86 # else ! void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num); # endif #else # ifdef SHA1_ASM void sha1_block_x86(); ! # define sha1_block sha1_block_x86 # else ! void sha1_block(); # endif #endif ! #if defined(L_ENDIAN) && defined(SHA1_ASM) # define M_c2nl c2l # define M_p_c2nl p_c2l # define M_c2nl_p c2l_p --- 83,103 ---- #ifndef NOPROTO # ifdef SHA1_ASM void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num); ! # define sha1_block(c,p,n) sha1_block_x86((c),(p),(n)*SHA_CBLOCK) # else ! static void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num); # endif #else # ifdef SHA1_ASM void sha1_block_x86(); ! # define sha1_block(c,p,n) sha1_block_x86((c),(p),(n)*SHA_CBLOCK) # else ! static void sha1_block(); # endif #endif ! #if !defined(B_ENDIAN) && defined(SHA1_ASM) # define M_c2nl c2l # define M_p_c2nl p_c2l # define M_c2nl_p c2l_p *************** *** 156,162 **** } len-=(SHA_CBLOCK-c->num); ! sha1_block(c,p,64); c->num=0; /* drop through and do the rest */ } --- 156,162 ---- } len-=(SHA_CBLOCK-c->num); ! sha1_block(c,p,1); c->num=0; /* drop through and do the rest */ } *************** *** 193,199 **** * copies it to a local array. I should be able to do this for * the C version as well.... */ ! #if 1 #if defined(B_ENDIAN) || defined(SHA1_ASM) if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0) { --- 193,199 ---- * copies it to a local array. I should be able to do this for * the C version as well.... */ ! #if SHA_LONG_LOG2==2 #if defined(B_ENDIAN) || defined(SHA1_ASM) if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0) { *************** *** 200,207 **** sw=len/SHA_CBLOCK; if (sw) { - sw*=SHA_CBLOCK; sha1_block(c,(SHA_LONG *)data,sw); data+=sw; len-=sw; } --- 200,207 ---- sw=len/SHA_CBLOCK; if (sw) { sha1_block(c,(SHA_LONG *)data,sw); + sw*=SHA_CBLOCK; data+=sw; len-=sw; } *************** *** 213,247 **** p=c->data; while (len >= SHA_CBLOCK) { ! #if defined(B_ENDIAN) || defined(L_ENDIAN) if (p != (SHA_LONG *)data) memcpy(p,data,SHA_CBLOCK); data+=SHA_CBLOCK; ! # ifdef L_ENDIAN ! # ifndef SHA1_ASM /* Will not happen */ ! for (sw=(SHA_LBLOCK/4); sw; sw--) { ! Endian_Reverse32(p[0]); ! Endian_Reverse32(p[1]); ! Endian_Reverse32(p[2]); ! Endian_Reverse32(p[3]); ! p+=4; } p=c->data; ! # endif ! # endif ! #else ! for (sw=(SHA_BLOCK/4); sw; sw--) { ! M_c2nl(data,l); *(p++)=l; ! M_c2nl(data,l); *(p++)=l; ! M_c2nl(data,l); *(p++)=l; ! M_c2nl(data,l); *(p++)=l; } p=c->data; ! #endif ! sha1_block(c,p,64); len-=SHA_CBLOCK; } ec=(int)len; c->num=ec; --- 213,273 ---- p=c->data; while (len >= SHA_CBLOCK) { ! #if SHA_LONG_LOG2==2 ! #if defined(B_ENDIAN) || defined(SHA1_ASM) ! #define SHA_NO_TAIL_CODE ! /* ! * Basically we get here only when data happens ! * to be unaligned. ! */ if (p != (SHA_LONG *)data) memcpy(p,data,SHA_CBLOCK); data+=SHA_CBLOCK; ! sha1_block(c,p=c->data,1); ! len-=SHA_CBLOCK; ! #else /* little-endian */ ! #define BE_COPY(dst,src,i) { \ ! l = ((SHA_LONG *)src)[i]; \ ! Endian_Reverse32(l); \ ! dst[i] = l; \ ! } ! if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0) { ! for (sw=(SHA_LBLOCK/4); sw; sw--) ! { ! BE_COPY(p,data,0); ! BE_COPY(p,data,1); ! BE_COPY(p,data,2); ! BE_COPY(p,data,3); ! p+=4; ! data += 4*sizeof(SHA_LONG); ! } ! sha1_block(c,p=c->data,1); ! len-=SHA_CBLOCK; ! continue; } + #endif + #endif + #ifndef SHA_NO_TAIL_CODE + /* + * In addition to "sizeof(SHA_LONG)!= 4" case the + * following code covers unaligned access cases on + * little-endian machines. + * <[EMAIL PROTECTED]> + */ p=c->data; ! for (sw=(SHA_LBLOCK/4); sw; sw--) { ! M_c2nl(data,l); p[0]=l; ! M_c2nl(data,l); p[1]=l; ! M_c2nl(data,l); p[2]=l; ! M_c2nl(data,l); p[3]=l; ! p+=4; } p=c->data; ! sha1_block(c,p,1); len-=SHA_CBLOCK; + #endif } ec=(int)len; c->num=ec; *************** *** 256,281 **** void SHA1_Transform(SHA_CTX *c, unsigned char *b) { ! SHA_LONG p[16]; ! #ifndef B_ENDIAN SHA_LONG *q; int i; - #endif ! #if defined(B_ENDIAN) || defined(L_ENDIAN) ! memcpy(p,b,64); ! #ifdef L_ENDIAN ! q=p; ! for (i=(SHA_LBLOCK/4); i; i--) { ! Endian_Reverse32(q[0]); ! Endian_Reverse32(q[1]); ! Endian_Reverse32(q[2]); ! Endian_Reverse32(q[3]); ! q+=4; } #endif ! #else q=p; for (i=(SHA_LBLOCK/4); i; i--) { --- 282,316 ---- void SHA1_Transform(SHA_CTX *c, unsigned char *b) { ! SHA_LONG p[SHA_LBLOCK]; SHA_LONG *q; int i; ! #if SHA_LONG_LOG2==2 ! #if defined(B_ENDIAN) || defined(SHA1_ASM) ! memcpy(p,b,SHA_CBLOCK); ! sha1_block(c,p,1); ! return; ! #else ! if (((unsigned long)b%sizeof(SHA_LONG)) == 0) { ! q=p; ! for (i=(SHA_LBLOCK/4); i; i--) ! { ! unsigned long l; ! BE_COPY(q,b,0); /* BE_COPY was defined above */ ! BE_COPY(q,b,1); ! BE_COPY(q,b,2); ! BE_COPY(q,b,3); ! q+=4; ! b+=4*sizeof(SHA_LONG); ! } ! sha1_block(c,p,1); ! return; } #endif ! #endif ! #ifndef SHA_NO_TAIL_CODE /* defined above, see comment */ q=p; for (i=(SHA_LBLOCK/4); i; i--) { *************** *** 285,300 **** c2nl(b,l); *(q++)=l; c2nl(b,l); *(q++)=l; } #endif - sha1_block(c,p,64); } #ifndef SHA1_ASM ! ! void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num) { register SHA_LONG A,B,C,D,E,T; ! SHA_LONG X[16]; A=c->h0; B=c->h1; --- 320,334 ---- c2nl(b,l); *(q++)=l; c2nl(b,l); *(q++)=l; } + sha1_block(c,p,1); #endif } #ifndef SHA1_ASM ! static void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num) { register SHA_LONG A,B,C,D,E,T; ! SHA_LONG X[SHA_LBLOCK]; A=c->h0; B=c->h1; *************** *** 394,401 **** c->h3=(c->h3+B)&0xffffffffL; c->h4=(c->h4+C)&0xffffffffL; ! num-=64; ! if (num <= 0) break; A=c->h0; B=c->h1; --- 428,434 ---- c->h3=(c->h3+B)&0xffffffffL; c->h4=(c->h4+C)&0xffffffffL; ! if (--num <= 0) break; A=c->h0; B=c->h1; *************** *** 403,409 **** D=c->h3; E=c->h4; ! W+=16; } } #endif --- 436,447 ---- D=c->h3; E=c->h4; ! W+=SHA_LBLOCK; /* Note! This can happen only when sizeof(SHA_LONG) ! * is 4. Whenever it's not the actual case this ! * function is never called with num larger than 1 ! * and we never advance down here. ! * <[EMAIL PROTECTED]> ! */ } } #endif *************** *** 432,438 **** { for (; i<SHA_LBLOCK; i++) p[i]=0; ! sha1_block(c,p,64); i=0; } for (; i<(SHA_LBLOCK-2); i++) --- 470,476 ---- { for (; i<SHA_LBLOCK; i++) p[i]=0; ! sha1_block(c,p,1); i=0; } for (; i<(SHA_LBLOCK-2); i++) *************** *** 439,449 **** p[i]=0; p[SHA_LBLOCK-2]=c->Nh; p[SHA_LBLOCK-1]=c->Nl; ! #if defined(L_ENDIAN) && defined(SHA1_ASM) Endian_Reverse32(p[SHA_LBLOCK-2]); Endian_Reverse32(p[SHA_LBLOCK-1]); #endif ! sha1_block(c,p,64); cp=md; l=c->h0; nl2c(l,cp); l=c->h1; nl2c(l,cp); --- 477,489 ---- p[i]=0; p[SHA_LBLOCK-2]=c->Nh; p[SHA_LBLOCK-1]=c->Nl; ! #if SHA_LONG_LOG2==2 ! #if !defined(B_ENDIAN) && defined(SHA1_ASM) Endian_Reverse32(p[SHA_LBLOCK-2]); Endian_Reverse32(p[SHA_LBLOCK-1]); #endif ! #endif ! sha1_block(c,p,1); cp=md; l=c->h0; nl2c(l,cp); l=c->h1; nl2c(l,cp); *************** *** 451,459 **** l=c->h3; nl2c(l,cp); l=c->h4; nl2c(l,cp); - /* clear stuff, sha1_block may be leaving some stuff on the stack - * but I'm not worried :-) */ c->num=0; ! /* memset((char *)&c,0,sizeof(c));*/ } --- 491,500 ---- l=c->h3; nl2c(l,cp); l=c->h4; nl2c(l,cp); c->num=0; ! /* sha_block may be leaving some stuff on the stack ! * but I'm not worried :-) ! memset((void *)c,0,sizeof(SHA_CTX)); ! */ }