On Sun, Aug 05, 2012 at 03:52:08AM -0400, Brad Smith wrote:
> On Tue, Jul 24, 2012 at 01:32:49AM -0400, Brad Smith wrote:
> > Taking a look at what is left in the src tree for compiler optimizer
> > workarounds it looks like there are some that can most likely be
> > garbage collected for sparc / alpha and sh now that they have
> > much newer compilers.
> > 
> > sparc and alpha both having been updated through gcc3 and gcc4
> > in the timeframe these workarounds have been in the tree and sh
> > being updated from gcc3 to gcc4.
> 
> martynas@ pointed out this sha1 workaround for a gcc3 / sh
> optimizer bug. Someone with a landisk system please check
> this, but it most likely can be removed now that sh is using
> gcc4.

miod has plans to remove this (and the same diff).

> 
> 
> Index: hash/sha1.c
> ===================================================================
> RCS file: /home/cvs/src/lib/libc/hash/sha1.c,v
> retrieving revision 1.21
> diff -u -p -r1.21 sha1.c
> --- hash/sha1.c       29 Jul 2008 19:32:50 -0000      1.21
> +++ hash/sha1.c       5 Aug 2012 07:38:10 -0000
> @@ -42,64 +42,6 @@
>  #define R3(v,w,x,y,z,i) 
> z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
>  #define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
>  
> -typedef union {
> -     u_int8_t c[64];
> -     u_int32_t l[16];
> -} CHAR64LONG16;
> -
> -#ifdef __sh__
> -static void do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, 
> u_int32_t *e, CHAR64LONG16 *);
> -static void do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, 
> u_int32_t *e, CHAR64LONG16 *);
> -static void do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, 
> u_int32_t *e, CHAR64LONG16 *);
> -static void do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, 
> u_int32_t *e, CHAR64LONG16 *);
> -
> -#define nR0(v,w,x,y,z,i) R0(*v,*w,*x,*y,*z,i)
> -#define nR1(v,w,x,y,z,i) R1(*v,*w,*x,*y,*z,i)
> -#define nR2(v,w,x,y,z,i) R2(*v,*w,*x,*y,*z,i)
> -#define nR3(v,w,x,y,z,i) R3(*v,*w,*x,*y,*z,i)
> -#define nR4(v,w,x,y,z,i) R4(*v,*w,*x,*y,*z,i)
> -
> -static void
> -do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, 
> CHAR64LONG16 *block)
> -{
> -    nR0(a,b,c,d,e, 0); nR0(e,a,b,c,d, 1); nR0(d,e,a,b,c, 2); nR0(c,d,e,a,b, 
> 3);
> -    nR0(b,c,d,e,a, 4); nR0(a,b,c,d,e, 5); nR0(e,a,b,c,d, 6); nR0(d,e,a,b,c, 
> 7);
> -    nR0(c,d,e,a,b, 8); nR0(b,c,d,e,a, 9); nR0(a,b,c,d,e,10); 
> nR0(e,a,b,c,d,11);
> -    nR0(d,e,a,b,c,12); nR0(c,d,e,a,b,13); nR0(b,c,d,e,a,14); 
> nR0(a,b,c,d,e,15);
> -    nR1(e,a,b,c,d,16); nR1(d,e,a,b,c,17); nR1(c,d,e,a,b,18); 
> nR1(b,c,d,e,a,19);
> -}
> -
> -static void
> -do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, 
> CHAR64LONG16 *block)
> -{
> -    nR2(a,b,c,d,e,20); nR2(e,a,b,c,d,21); nR2(d,e,a,b,c,22); 
> nR2(c,d,e,a,b,23);
> -    nR2(b,c,d,e,a,24); nR2(a,b,c,d,e,25); nR2(e,a,b,c,d,26); 
> nR2(d,e,a,b,c,27);
> -    nR2(c,d,e,a,b,28); nR2(b,c,d,e,a,29); nR2(a,b,c,d,e,30); 
> nR2(e,a,b,c,d,31);
> -    nR2(d,e,a,b,c,32); nR2(c,d,e,a,b,33); nR2(b,c,d,e,a,34); 
> nR2(a,b,c,d,e,35);
> -    nR2(e,a,b,c,d,36); nR2(d,e,a,b,c,37); nR2(c,d,e,a,b,38); 
> nR2(b,c,d,e,a,39);
> -}
> -
> -static void
> -do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, 
> CHAR64LONG16 *block)
> -{
> -    nR3(a,b,c,d,e,40); nR3(e,a,b,c,d,41); nR3(d,e,a,b,c,42); 
> nR3(c,d,e,a,b,43);
> -    nR3(b,c,d,e,a,44); nR3(a,b,c,d,e,45); nR3(e,a,b,c,d,46); 
> nR3(d,e,a,b,c,47);
> -    nR3(c,d,e,a,b,48); nR3(b,c,d,e,a,49); nR3(a,b,c,d,e,50); 
> nR3(e,a,b,c,d,51);
> -    nR3(d,e,a,b,c,52); nR3(c,d,e,a,b,53); nR3(b,c,d,e,a,54); 
> nR3(a,b,c,d,e,55);
> -    nR3(e,a,b,c,d,56); nR3(d,e,a,b,c,57); nR3(c,d,e,a,b,58); 
> nR3(b,c,d,e,a,59);
> -}
> -
> -static void
> -do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, 
> CHAR64LONG16 *block)
> -{
> -    nR4(a,b,c,d,e,60); nR4(e,a,b,c,d,61); nR4(d,e,a,b,c,62); 
> nR4(c,d,e,a,b,63);
> -    nR4(b,c,d,e,a,64); nR4(a,b,c,d,e,65); nR4(e,a,b,c,d,66); 
> nR4(d,e,a,b,c,67);
> -    nR4(c,d,e,a,b,68); nR4(b,c,d,e,a,69); nR4(a,b,c,d,e,70); 
> nR4(e,a,b,c,d,71);
> -    nR4(d,e,a,b,c,72); nR4(c,d,e,a,b,73); nR4(b,c,d,e,a,74); 
> nR4(a,b,c,d,e,75);
> -    nR4(e,a,b,c,d,76); nR4(d,e,a,b,c,77); nR4(c,d,e,a,b,78); 
> nR4(b,c,d,e,a,79);
> -}
> -#endif
> -
>  /*
>   * Hash a single 512-bit block. This is the core of the algorithm.
>   */
> @@ -108,6 +50,10 @@ SHA1Transform(u_int32_t state[5], const 
>  {
>       u_int32_t a, b, c, d, e;
>       u_int8_t workspace[SHA1_BLOCK_LENGTH];
> +     typedef union {
> +             u_int8_t c[64];
> +             u_int32_t l[16];
> +     } CHAR64LONG16;
>       CHAR64LONG16 *block = (CHAR64LONG16 *)workspace;
>  
>       (void)memcpy(block, buffer, SHA1_BLOCK_LENGTH);
> @@ -119,12 +65,6 @@ SHA1Transform(u_int32_t state[5], const 
>       d = state[3];
>       e = state[4];
>  
> -#ifdef __sh__
> -     do_R01(&a, &b, &c, &d, &e, block);
> -     do_R2(&a, &b, &c, &d, &e, block);
> -     do_R3(&a, &b, &c, &d, &e, block);
> -     do_R4(&a, &b, &c, &d, &e, block);
> -#else
>       /* 4 rounds of 20 operations each. Loop unrolled. */
>       R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
>       R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
> @@ -146,7 +86,6 @@ SHA1Transform(u_int32_t state[5], const 
>       R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
>       R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
>       R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
> -#endif
>  
>       /* Add the working vars back into context.state[] */
>       state[0] += a;
> 
> -- 
> This message has been scanned for viruses and
> dangerous content by MailScanner, and is
> believed to be clean.

Reply via email to