Acked-by: Hugh Greenberg <[EMAIL PROTECTED]>

On Mon, 2008-10-06 at 22:36 -0600, Abhishek Kulkarni wrote:
> # HG changeset patch
> # User Abhishek Kulkarni <[EMAIL PROTECTED]>
> # Date 1223199649 21600
> # Node ID bff0d23271408117225dfa48f67b108c3da9c678
> # Parent  66e2cc7728c349d370a94b0591a6eec51bd5c4bf
> Use the zlib adler32 function for rolling checksum.
> 
> This patch uses the zlib adler32 function to calculate the checksum of the 
> files.
> Instead of calculating the checksum during file setup stage, we use the 
> rolling hash technique to compute the checksum
> while the server serves the files to the clients. This reduces the server 
> setup time from almost zero to a few seconds.
> The clients compute their checksum while downloading the file which also 
> reduces the time in which the file is available
> to the clients
> 
> Signed-off-by: Abhishek Kulkarni <[EMAIL PROTECTED]>
> 
> diff --git a/xget/Makefile b/xget/Makefile
> --- a/xget/Makefile
> +++ b/xget/Makefile
> @@ -2,13 +2,12 @@ SYSNAME!=uname
>  SYSNAME!=uname
>  INCDIR=../include
>  CFLAGS=-Wall -g -I $(INCDIR) -DSYSNAME=$(SYSNAME)
> -LFLAGS=-L. -L../libstrutil -lstrutil -L../libspclient -lspclient 
> -L../libspfs -lspfs -lm
> +LFLAGS=-L. -L../libstrutil -lstrutil -L../libspclient -lspclient 
> -L../libspfs -lspfs -lm -lz
>  HFILES=$(INCDIR)/spfs.h $(INCDIR)/spclient.h $(INCDIR)/xcpu.h 
> $(INCDIR)/strutil.h xget.h
>  
>  CMD=xget
>  OFILES=\
>       xget.o\
> -     crc32.o\
>  
>  all: $(CMD)
>  
> @@ -16,7 +15,7 @@ xget: $(OFILES) $(HFILES) Makefile
>       $(CC) -o xget $(CFLAGS) $(OFILES) $(LFLAGS)
>  
>  xget.static: $(OFILES) $(HFILES) Makefile
> -     $(CC) -static -o xget.static $(CFLAGS) $(OFILES) $(LFLAGS) -lm
> +     $(CC) -static -o xget.static $(CFLAGS) $(OFILES) $(LFLAGS) -lm -lz
>  
>  install:
>       mkdir -p $(INSTALLPREFIX)/sbin
> diff --git a/xget/crc32.c b/xget/crc32.c
> deleted file mode 100644
> --- a/xget/crc32.c
> +++ /dev/null
> @@ -1,118 +0,0 @@
> -#include <stdlib.h>
> -#include <stdio.h>
> -#include <stdarg.h>
> -#include <string.h>
> -#include <unistd.h>
> -#include <errno.h>
> -#include <fcntl.h>
> -#include <assert.h>
> -#include <limits.h>
> -#include <sys/stat.h>
> -#include <sys/utsname.h>
> -#include <sys/wait.h>
> -#include <signal.h>
> -#include <dirent.h>
> -#include <regex.h>
> -#include <math.h>
> -#include <pthread.h>
> -#include <sys/types.h>
> -#include <sys/stat.h>
> -#include <unistd.h>
> -#include <sys/time.h>
> -#include <sys/mman.h>
> -
> -#include "spfs.h"
> -
> -#define CRC_MAX_READ 640000
> -
> -//crc32 and crc32 were taken from: 
> http://fxr.watson.org/fxr/source/libkern/crc32.c
> -
> -static u32      crc32_tab[] = {
> -     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
> -     0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
> -     0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
> -     0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
> -     0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
> -     0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
> -     0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
> -     0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
> -     0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
> -     0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
> -     0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
> -     0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
> -     0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
> -     0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
> -     0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
> -     0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
> -     0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
> -     0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
> -     0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
> -     0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
> -     0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
> -     0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
> -     0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
> -     0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
> -     0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
> -     0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
> -     0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
> -     0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
> -     0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
> -     0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
> -     0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
> -     0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
> -     0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
> -     0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
> -     0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
> -     0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
> -     0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
> -     0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
> -     0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
> -     0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
> -     0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
> -     0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
> -     0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
> -};
> -
> -u32
> -crc32(const void *buf, size_t size)
> -{
> -     const u8 *p = buf;
> -     u32 crc;
> -     
> -     crc = ~0U;
> -     while (size--)
> -             crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> -     return crc ^ ~0U;
> -}
> -
> -u32 
> -fcrc32(int fd, u64 size)
> -{
> -        u8 *buf, *p=NULL;
> -     u32 crc=~0U;
> -     u64 totalbytes, bytes_read;
> -
> -     totalbytes = bytes_read = 0;
> -        if (!( buf = sp_malloc(CRC_MAX_READ)))
> -       goto crc_error;
> -
> -        while ((bytes_read = read(fd, buf, CRC_MAX_READ)) > 0){
> -             p = buf;
> -             totalbytes += bytes_read;
> -             while (bytes_read--)
> -                     crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> -     }//while
> -
> -     if (bytes_read < 0 || totalbytes != size) {
> -             sp_uerror(errno);
> -             goto crc_error;
> -     }
> -
> -     free(buf);
> -     return crc ^ ~0U;
> -
> -crc_error:
> -        if (buf) 
> -       free(buf);
> -        return 0xFF ; //??
> -}
> diff --git a/xget/xget.c b/xget/xget.c
> --- a/xget/xget.c
> +++ b/xget/xget.c
> @@ -22,6 +22,7 @@
>  #include <unistd.h>
>  #include <sys/time.h>
>  #include <sys/mman.h>
> +#include <zlib.h>
>  
>  #include "spfs.h"
>  #include "spclient.h"
> @@ -358,7 +359,7 @@ fileretry(File *f, u64 offset)
>               goto error;
>       } 
>  
> -     f->retries++;           
> +     f->retries++; 
>       if ((file_finalize(f, 1)) < 0)
>               goto error;
>  
> @@ -367,6 +368,8 @@ fileretry(File *f, u64 offset)
>               f->datafd = NULL;
>       }
>  
> +        f->checksum = adler32(0L, Z_NULL, 0);
> +        f->checksum_ptr = 0;
>       f->progress = time(NULL);
>       f->finished = 0;
>       if (offset == 0)
> @@ -474,15 +477,16 @@ tick(void)
>               fdone = 1;
>               for(f = files; f != NULL; f = f->next) {
>  //                   debug(Dbgfn, "tick\n");
> +                        
> +                     if (f->finished && (f->finished = matchsum(f))) 
> +                                file_finalize(f, 0);                        
> +                        
>                       if (f->finished<0) {
>                               debug(Dbgclntfn, "File: %s checksum did not 
> match, retrying\n",
>                                     f->nname);
>                               if (fileretry(f, 0) < 0)
>                                       return -1;
>                       }
> -
> -                     if (f->finished == 1)
> -                             file_finalize(f, 0);
>                       
>                       if(f->finished == 2)
>                               continue;
> @@ -887,6 +891,7 @@ filealloc(Spfile *parent, char *nname, c
>       f->availfid = NULL;
>       f->datafd = NULL;
>       f->checksum = checksum;
> +        f->checksum_ptr = 0;        
>       f->finished = 0;
>       f->progress = time(NULL);
>       f->retries = 0;
> @@ -917,7 +922,6 @@ localfileread(Spfile *parent, char *file
>       Spfile *dir, *ret;
>       DIR *dirstr;
>       struct dirent *de;
> -     int fd;
>       u32 checksum, npmode;
>       Spuser *usr;
>       Spgroup *grp;
> @@ -944,18 +948,13 @@ localfileread(Spfile *parent, char *file
>       }
>  
>       if (S_ISREG(st.st_mode)) {
> -             if ((fd = open(filename, O_RDONLY)) == -1) {
> -                     sp_uerror(errno);
> -                     goto error;
> -             }
> -             
> -             checksum = fcrc32(fd, st.st_size);
> +                
> +             checksum = adler32(0L, Z_NULL, 0);                
>               f = filealloc(parent, name, filename, st.st_size, st.st_size,  
>                             st.st_mtime, checksum, npmode, usr, grp);
>               if (!f)
> -                     goto error;
> +                     return NULL;
>  
> -             close(fd);
>               debug(Dbgsrvfn, "Added file: %s\n", f->dir->name);
>               ret = f->dir;
>       } else if (S_ISDIR(st.st_mode)) {
> @@ -992,11 +991,6 @@ localfileread(Spfile *parent, char *file
>       }
>       
>       return ret;
> -
> -error:
> -     if (fd > -1)
> -             close(fd);
> -     return NULL;
>  }
>  
>  static void
> @@ -1035,6 +1029,11 @@ respondreqs(File *f)
>                       
>                       if ((n = read(fd, buf, count)) < 0)
>                               goto error;
> +
> +                        if (req->offset == f->checksum_ptr) {
> +                                f->checksum = adler32(f->checksum, (const 
> Bytef *)buf, n);
> +                                f->checksum_ptr += n;
> +                        }
>                       
>                       close(fd);
>                       if (n < count)
> @@ -1072,7 +1071,6 @@ netreadcb(Spcfd *fd, void *a)
>  {
>       int n, lfd, readsize;
>       File *f;
> -     u32 checksum;
>       u8 *buf;
>       struct stat *st;
>       
> @@ -1118,6 +1116,11 @@ netreadcb(Spcfd *fd, void *a)
>               xget_uerror(errno);
>               goto error;
>       }
> +
> +        if (f->datalen == f->checksum_ptr) {
> +                f->checksum = adler32(f->checksum, (const Bytef *)buf, n);
> +                f->checksum_ptr += n;
> +        }        
>               
>       f->datalen += n;
>       if (f->datalen >= f->datasize) {
> @@ -1128,11 +1131,7 @@ netreadcb(Spcfd *fd, void *a)
>                       goto error;
>               }
>  
> -             checksum = fcrc32(lfd, f->datasize);
> -             if (checksum == f->checksum) 
> -                     f->finished = 1;
> -             else 
> -                     f->finished = -1;
> +                f->finished = 5;
>       }
>  
>       f->progress = time(NULL);
> @@ -1152,17 +1151,48 @@ error:
>  }
>  
>  static int
> +matchsum(File *f)
> +{
> +        Spcfid *checksumfid = NULL;
> +        u32 checksum;
> +        char *buf;
> +        int blen, n;
> +        
> +        blen = strlen(f->nname) + 16;
> +     if (blen < 128)
> +             blen = 128;
> +     buf = sp_malloc(blen);
> +     if (!buf)
> +             return -1;
> +
> +        sprintf(buf, "%s/checksum", f->nname);
> +     checksumfid = spc_open(f->fs, buf, Oread);
> +     if (!checksumfid)
> +             return -1;
> +
> +     n = spc_read(checksumfid, (u8 *) buf, blen, 0);
> +     if (n < 0)
> +             return -1;
> +
> +     buf[n] = '\0';
> +     checksum = strtoul(buf, NULL, 0);
> +     spc_close(checksumfid);
> +        free(buf);
> +        return (f->checksum == checksum)?1:-1;
> +}
> +
> +
> +static int
>  netfileread(Spfile *dir, char *lname, char *nname, u64 len, int mtime, 
>           u32 npmode, Spuser *usr, Spgroup *grp)
>  {
> -     int n, blen, checksum;
> +     int n, blen;
>       char *buf, *fname, *redirto;
> -     Spcfid *datafid, *checksumfid, *availfid, *redirfid;
> +     Spcfid *datafid, *availfid, *redirfid;
>       Spcfsys *redirfs;
>       File *file;
>       
>       datafid = NULL;
> -     checksumfid = NULL;
>       availfid = NULL;
>       redirfid = NULL;
>       redirfs = NULL;
> @@ -1174,18 +1204,6 @@ netfileread(Spfile *dir, char *lname, ch
>       if (!buf)
>               return -1;
>  
> -     sprintf(buf, "%s/checksum", nname);
> -     checksumfid = spc_open(masterfs, buf, Oread);
> -     if (!checksumfid)
> -             goto error;
> -
> -     n = spc_read(checksumfid, (u8 *) buf, blen, 0);
> -     if (n < 0)
> -             goto error;
> -
> -     buf[n] = '\0';
> -     checksum = strtoul(buf, NULL, 0);
> -     spc_close(checksumfid);
>       fname = strrchr(nname, '/');
>       if (!fname)
>               fname = nname;
> @@ -1238,13 +1256,13 @@ netfileread(Spfile *dir, char *lname, ch
>  
>       snprintf(buf, blen, "%d %s", port, redirto);
>       n = spc_write(availfid, (u8 *) buf, strlen(buf) + 1, 0);
> -     if (n < 0) {
> -             goto error;
> -     }
> +     if (n < 0)
> +             goto error;     
>  
>       file->fs = redirfs;
>       file->datafid = datafid;
> -     file->checksum = checksum;
> +     file->checksum = adler32(0L, Z_NULL, 0);
> +        file->checksum_ptr = 0;
>       file->datafd = spcfd_add(file->datafid, netreadcb, file, 0);
>       file->availfid = availfid;
>       free(buf);
> @@ -1254,9 +1272,6 @@ error:
>  error:
>       if (datafid)
>               spc_close(datafid);
> -
> -     if (checksumfid)
> -             spc_close(checksumfid);
>  
>       if (availfid)
>               spc_close(availfid);
> diff --git a/xget/xget.h b/xget/xget.h
> --- a/xget/xget.h
> +++ b/xget/xget.h
> @@ -48,6 +48,7 @@ struct File {
>       File*   next;
>       File*   prev;
>       u32     checksum;
> +        u64  checksum_ptr;        
>       int     finished;
>       time_t  progress;
>       int     retries;
> @@ -112,5 +113,4 @@ static File     *filealloc(Spfile *paren
>                          u64 datasize, u64 datalen, u32 mtime, u32 checksum, 
>                          u32 mode, Spuser *user, Spgroup *group);
>  static int      file_finalize(File *f, int write);
> -u32 crc32(const void *buf, size_t size);
> -u32 fcrc32(int fd, u64 size);
> +static int   matchsum(File *f);

Reply via email to