Acked-by: Hugh Greenberg <[EMAIL PROTECTED]>
On Mon, 2008-10-06 at 22:36 -0600, Abhishek Kulkarni wrote: > # HG changeset patch > # User Abhishek Kulkarni <[EMAIL PROTECTED]> > # Date 1223199649 21600 > # Node ID bff0d23271408117225dfa48f67b108c3da9c678 > # Parent 66e2cc7728c349d370a94b0591a6eec51bd5c4bf > Use the zlib adler32 function for rolling checksum. > > This patch uses the zlib adler32 function to calculate the checksum of the > files. > Instead of calculating the checksum during file setup stage, we use the > rolling hash technique to compute the checksum > while the server serves the files to the clients. This reduces the server > setup time from almost zero to a few seconds. > The clients compute their checksum while downloading the file which also > reduces the time in which the file is available > to the clients > > Signed-off-by: Abhishek Kulkarni <[EMAIL PROTECTED]> > > diff --git a/xget/Makefile b/xget/Makefile > --- a/xget/Makefile > +++ b/xget/Makefile > @@ -2,13 +2,12 @@ SYSNAME!=uname > SYSNAME!=uname > INCDIR=../include > CFLAGS=-Wall -g -I $(INCDIR) -DSYSNAME=$(SYSNAME) > -LFLAGS=-L. -L../libstrutil -lstrutil -L../libspclient -lspclient > -L../libspfs -lspfs -lm > +LFLAGS=-L. -L../libstrutil -lstrutil -L../libspclient -lspclient > -L../libspfs -lspfs -lm -lz > HFILES=$(INCDIR)/spfs.h $(INCDIR)/spclient.h $(INCDIR)/xcpu.h > $(INCDIR)/strutil.h xget.h > > CMD=xget > OFILES=\ > xget.o\ > - crc32.o\ > > all: $(CMD) > > @@ -16,7 +15,7 @@ xget: $(OFILES) $(HFILES) Makefile > $(CC) -o xget $(CFLAGS) $(OFILES) $(LFLAGS) > > xget.static: $(OFILES) $(HFILES) Makefile > - $(CC) -static -o xget.static $(CFLAGS) $(OFILES) $(LFLAGS) -lm > + $(CC) -static -o xget.static $(CFLAGS) $(OFILES) $(LFLAGS) -lm -lz > > install: > mkdir -p $(INSTALLPREFIX)/sbin > diff --git a/xget/crc32.c b/xget/crc32.c > deleted file mode 100644 > --- a/xget/crc32.c > +++ /dev/null > @@ -1,118 +0,0 @@ > -#include <stdlib.h> > -#include <stdio.h> > -#include <stdarg.h> > -#include <string.h> > -#include <unistd.h> > -#include <errno.h> > -#include <fcntl.h> > -#include <assert.h> > -#include <limits.h> > -#include <sys/stat.h> > -#include <sys/utsname.h> > -#include <sys/wait.h> > -#include <signal.h> > -#include <dirent.h> > -#include <regex.h> > -#include <math.h> > -#include <pthread.h> > -#include <sys/types.h> > -#include <sys/stat.h> > -#include <unistd.h> > -#include <sys/time.h> > -#include <sys/mman.h> > - > -#include "spfs.h" > - > -#define CRC_MAX_READ 640000 > - > -//crc32 and crc32 were taken from: > http://fxr.watson.org/fxr/source/libkern/crc32.c > - > -static u32 crc32_tab[] = { > - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, > - 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, > - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, > - 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, > - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, > - 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, > - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, > - 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, > - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, > - 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, > - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, > - 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, > - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, > - 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, > - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, > - 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, > - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, > - 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, > - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, > - 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, > - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, > - 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, > - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, > - 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, > - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, > - 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, > - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, > - 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, > - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, > - 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, > - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, > - 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, > - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, > - 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, > - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, > - 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, > - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, > - 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, > - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, > - 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, > - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, > - 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, > - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d > -}; > - > -u32 > -crc32(const void *buf, size_t size) > -{ > - const u8 *p = buf; > - u32 crc; > - > - crc = ~0U; > - while (size--) > - crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); > - return crc ^ ~0U; > -} > - > -u32 > -fcrc32(int fd, u64 size) > -{ > - u8 *buf, *p=NULL; > - u32 crc=~0U; > - u64 totalbytes, bytes_read; > - > - totalbytes = bytes_read = 0; > - if (!( buf = sp_malloc(CRC_MAX_READ))) > - goto crc_error; > - > - while ((bytes_read = read(fd, buf, CRC_MAX_READ)) > 0){ > - p = buf; > - totalbytes += bytes_read; > - while (bytes_read--) > - crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); > - }//while > - > - if (bytes_read < 0 || totalbytes != size) { > - sp_uerror(errno); > - goto crc_error; > - } > - > - free(buf); > - return crc ^ ~0U; > - > -crc_error: > - if (buf) > - free(buf); > - return 0xFF ; //?? > -} > diff --git a/xget/xget.c b/xget/xget.c > --- a/xget/xget.c > +++ b/xget/xget.c > @@ -22,6 +22,7 @@ > #include <unistd.h> > #include <sys/time.h> > #include <sys/mman.h> > +#include <zlib.h> > > #include "spfs.h" > #include "spclient.h" > @@ -358,7 +359,7 @@ fileretry(File *f, u64 offset) > goto error; > } > > - f->retries++; > + f->retries++; > if ((file_finalize(f, 1)) < 0) > goto error; > > @@ -367,6 +368,8 @@ fileretry(File *f, u64 offset) > f->datafd = NULL; > } > > + f->checksum = adler32(0L, Z_NULL, 0); > + f->checksum_ptr = 0; > f->progress = time(NULL); > f->finished = 0; > if (offset == 0) > @@ -474,15 +477,16 @@ tick(void) > fdone = 1; > for(f = files; f != NULL; f = f->next) { > // debug(Dbgfn, "tick\n"); > + > + if (f->finished && (f->finished = matchsum(f))) > + file_finalize(f, 0); > + > if (f->finished<0) { > debug(Dbgclntfn, "File: %s checksum did not > match, retrying\n", > f->nname); > if (fileretry(f, 0) < 0) > return -1; > } > - > - if (f->finished == 1) > - file_finalize(f, 0); > > if(f->finished == 2) > continue; > @@ -887,6 +891,7 @@ filealloc(Spfile *parent, char *nname, c > f->availfid = NULL; > f->datafd = NULL; > f->checksum = checksum; > + f->checksum_ptr = 0; > f->finished = 0; > f->progress = time(NULL); > f->retries = 0; > @@ -917,7 +922,6 @@ localfileread(Spfile *parent, char *file > Spfile *dir, *ret; > DIR *dirstr; > struct dirent *de; > - int fd; > u32 checksum, npmode; > Spuser *usr; > Spgroup *grp; > @@ -944,18 +948,13 @@ localfileread(Spfile *parent, char *file > } > > if (S_ISREG(st.st_mode)) { > - if ((fd = open(filename, O_RDONLY)) == -1) { > - sp_uerror(errno); > - goto error; > - } > - > - checksum = fcrc32(fd, st.st_size); > + > + checksum = adler32(0L, Z_NULL, 0); > f = filealloc(parent, name, filename, st.st_size, st.st_size, > st.st_mtime, checksum, npmode, usr, grp); > if (!f) > - goto error; > + return NULL; > > - close(fd); > debug(Dbgsrvfn, "Added file: %s\n", f->dir->name); > ret = f->dir; > } else if (S_ISDIR(st.st_mode)) { > @@ -992,11 +991,6 @@ localfileread(Spfile *parent, char *file > } > > return ret; > - > -error: > - if (fd > -1) > - close(fd); > - return NULL; > } > > static void > @@ -1035,6 +1029,11 @@ respondreqs(File *f) > > if ((n = read(fd, buf, count)) < 0) > goto error; > + > + if (req->offset == f->checksum_ptr) { > + f->checksum = adler32(f->checksum, (const > Bytef *)buf, n); > + f->checksum_ptr += n; > + } > > close(fd); > if (n < count) > @@ -1072,7 +1071,6 @@ netreadcb(Spcfd *fd, void *a) > { > int n, lfd, readsize; > File *f; > - u32 checksum; > u8 *buf; > struct stat *st; > > @@ -1118,6 +1116,11 @@ netreadcb(Spcfd *fd, void *a) > xget_uerror(errno); > goto error; > } > + > + if (f->datalen == f->checksum_ptr) { > + f->checksum = adler32(f->checksum, (const Bytef *)buf, n); > + f->checksum_ptr += n; > + } > > f->datalen += n; > if (f->datalen >= f->datasize) { > @@ -1128,11 +1131,7 @@ netreadcb(Spcfd *fd, void *a) > goto error; > } > > - checksum = fcrc32(lfd, f->datasize); > - if (checksum == f->checksum) > - f->finished = 1; > - else > - f->finished = -1; > + f->finished = 5; > } > > f->progress = time(NULL); > @@ -1152,17 +1151,48 @@ error: > } > > static int > +matchsum(File *f) > +{ > + Spcfid *checksumfid = NULL; > + u32 checksum; > + char *buf; > + int blen, n; > + > + blen = strlen(f->nname) + 16; > + if (blen < 128) > + blen = 128; > + buf = sp_malloc(blen); > + if (!buf) > + return -1; > + > + sprintf(buf, "%s/checksum", f->nname); > + checksumfid = spc_open(f->fs, buf, Oread); > + if (!checksumfid) > + return -1; > + > + n = spc_read(checksumfid, (u8 *) buf, blen, 0); > + if (n < 0) > + return -1; > + > + buf[n] = '\0'; > + checksum = strtoul(buf, NULL, 0); > + spc_close(checksumfid); > + free(buf); > + return (f->checksum == checksum)?1:-1; > +} > + > + > +static int > netfileread(Spfile *dir, char *lname, char *nname, u64 len, int mtime, > u32 npmode, Spuser *usr, Spgroup *grp) > { > - int n, blen, checksum; > + int n, blen; > char *buf, *fname, *redirto; > - Spcfid *datafid, *checksumfid, *availfid, *redirfid; > + Spcfid *datafid, *availfid, *redirfid; > Spcfsys *redirfs; > File *file; > > datafid = NULL; > - checksumfid = NULL; > availfid = NULL; > redirfid = NULL; > redirfs = NULL; > @@ -1174,18 +1204,6 @@ netfileread(Spfile *dir, char *lname, ch > if (!buf) > return -1; > > - sprintf(buf, "%s/checksum", nname); > - checksumfid = spc_open(masterfs, buf, Oread); > - if (!checksumfid) > - goto error; > - > - n = spc_read(checksumfid, (u8 *) buf, blen, 0); > - if (n < 0) > - goto error; > - > - buf[n] = '\0'; > - checksum = strtoul(buf, NULL, 0); > - spc_close(checksumfid); > fname = strrchr(nname, '/'); > if (!fname) > fname = nname; > @@ -1238,13 +1256,13 @@ netfileread(Spfile *dir, char *lname, ch > > snprintf(buf, blen, "%d %s", port, redirto); > n = spc_write(availfid, (u8 *) buf, strlen(buf) + 1, 0); > - if (n < 0) { > - goto error; > - } > + if (n < 0) > + goto error; > > file->fs = redirfs; > file->datafid = datafid; > - file->checksum = checksum; > + file->checksum = adler32(0L, Z_NULL, 0); > + file->checksum_ptr = 0; > file->datafd = spcfd_add(file->datafid, netreadcb, file, 0); > file->availfid = availfid; > free(buf); > @@ -1254,9 +1272,6 @@ error: > error: > if (datafid) > spc_close(datafid); > - > - if (checksumfid) > - spc_close(checksumfid); > > if (availfid) > spc_close(availfid); > diff --git a/xget/xget.h b/xget/xget.h > --- a/xget/xget.h > +++ b/xget/xget.h > @@ -48,6 +48,7 @@ struct File { > File* next; > File* prev; > u32 checksum; > + u64 checksum_ptr; > int finished; > time_t progress; > int retries; > @@ -112,5 +113,4 @@ static File *filealloc(Spfile *paren > u64 datasize, u64 datalen, u32 mtime, u32 checksum, > u32 mode, Spuser *user, Spgroup *group); > static int file_finalize(File *f, int write); > -u32 crc32(const void *buf, size_t size); > -u32 fcrc32(int fd, u64 size); > +static int matchsum(File *f);
