Please accept this update for compress.c It has 2 bug fixes for compress.c 1st bug-fix is to remove the "static" of function get_word() which will cause problems for multiple instances of compress running at the same time.
2nd bug-fix is for words longer than 255 characters in length, so compress exits if longer than 255 characters rather than silently continue with corrupt results (to test it out, there is long.txt included). The diff file is against the 0.60 release and includes a few more refinements relative to the last update (for example there is no point in passing BUFSIZE to get_word() if everything is hardcoded to BUFSIZE in size). To test the long-word problem, test with: compress -c <long.txt >long.cwl To remove the long word fault, remove the "5" at the end of the very long string so that the word length is now only 255 chars instead of 256 chars. At this point then, long.ttt should be equal to long.txt if following steps are followed: compress -c <long.txt >long.cwl compress -d <long.cwl >long.ttt ...which to me, I think appears working okay then. Note: if you want to include versions, please replace version X with an appropriate number, otherwise remove the if statement for version. Sincerely, Jose Da Silva
/* * Copyright (c) 2000-2001 * Kevin Atkinson * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without * fee, provided that the above copyright notice appear in all copies * and that both that copyright notice and this permission notice * appear in supporting documentation. Kevin Atkinson makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * */ #include <stdio.h> #if defined(__CYGWIN__) || defined (_WIN32) # include <io.h> # include <fcntl.h> # define SETBIN(fno) _setmode( _fileno( fno ), _O_BINARY ) #else # define SETBIN(fno) #endif #define BUFSIZE 256 void usage () { fputs("Compresses or uncompresses sorted word lists.\n" , stderr); fputs("For best result the locale should be set to C\n" , stderr); fputs("before sorting by setting the environmental\n" , stderr); fputs("variable LANG to \"C\" before sorting.\n" , stderr); fputs("Copyright 2001,2004 by Kevin Atkinson.\n" , stderr); fputs("Usage: word-list-compress c[ompress]|d[ecompress]\n" , stderr); } // PRECOND: bufsize >= 2 int get_word(FILE * in, char * w) { int bufsize = BUFSIZE - 1; register int c; while (c = getc(in), c != EOF && c <= 32); if (c != EOF) { do { *w++ = (char)(c); } while (c = getc(in), c != EOF && c > 32 && --bufsize); } //printf(" *** %d ***",bufsize); /******remove this line******/ //printf(" *** %d ***\n", (BUFSIZE - bufsize)); /******remove this line******/ *w = '\0'; ungetc(c, in); if (c == EOF) return 0; /* done */ if (bufsize) return 1; /* normal return */ return 2; /* error, word larger than 255 chars */ } int main (int argc, const char *argv[]) { if (argc == 2) { char c = argv[1][0]; if (c == '-') c = argv[1][1]; if (c == 'v') { fputs("version X\n",stderr); return 0; } if (c == 'c') { char s1[BUFSIZE]; char s2[BUFSIZE]; char * prev = s2; char * cur = s1; *prev = '\0'; int errFlag; SETBIN (stdout); while ((errFlag = get_word(stdin, cur)) == 1) { int i = 0; /* get the length of the prefix */ while (prev[i] != '\0' && prev[i] == cur[i]) ++i; if (i > 31) { if (putc('\0', stdout) < 0) goto error_out_c; } if (putc(i+1, stdout) < 0) goto error_out_c; if (fputs(cur+i, stdout) < 0) goto error_out_c; if (cur == s1) { prev = s1; cur = s2; } else { prev = s2; cur = s1; } } if (fflush(stdout) < 0) goto error_out_c; if (errFlag) goto error_in_c; return 0; } if (c == 'd') { char cur[BUFSIZE+1]; int i; int c; SETBIN (stdin); i = getc(stdin); while (i != -1 ) { if (i == 0) i = getc(stdin); --i; if (i < 0) goto error_in_d; while ((c = getc(stdin)) > 32 && i < BUFSIZE) cur[i++] = (char)c; if (i >= BUFSIZE) goto error_in_d; /* one "puts" faster than 2 puts */ cur[i] = '\n'; cur[++i] = '\0'; if (fputs(cur, stdout) < 0) goto error_out_d; i = c; } return 0; error_in_c: error_in_d: fputs("ERROR: Corrupt Input.\n", stderr); return 2; error_out_c: error_out_d: /* output space full or other output fault */ fputs("ERROR: Output Data Error.\n", stderr); return 3; } } usage(); return 1; }
NextWordOkay 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234 NextWordTooLong 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 NextWordOkay 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234 Done
--- aspell-0.60/prog/compress.c 2004-06-23 02:14:26.000000000 -0700 +++ compress.c 2004-09-02 14:22:58.024057096 -0700 @@ -4,8 +4,8 @@ * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without - * fee, provided that the above copyright notice appear in all copies - * and that both that copyright notice and this permission notice + * fee, provided that the above copyright notice appear in all copies + * and that both that copyright notice and this permission notice * appear in supporting documentation. Kevin Atkinson makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied @@ -30,99 +30,117 @@ #define BUFSIZE 256 -void usage () +void usage () { fputs("Compresses or uncompresses sorted word lists.\n" , stderr); - fputs("For best result the locale should be set to C\n" , stderr); + fputs("For best result the locale should be set to C\n" , stderr); fputs("before sorting by setting the environmental\n" , stderr); - fputs("variable LANG to \"C\" before sorting.\n" , stderr); - fputs("Copyright 2001,2004 by Kevin Atkinson.\n" , stderr); + fputs("variable LANG to \"C\" before sorting.\n" , stderr); + fputs("Copyright 2001,2004 by Kevin Atkinson.\n" , stderr); fputs("Usage: word-list-compress c[ompress]|d[ecompress]\n" , stderr); } // PRECOND: bufsize >= 2 -static int get_word(FILE * in, char * w, size_t bufsize) +int get_word(FILE * in, char * w) { - int c; + int bufsize = BUFSIZE - 1; + register int c; + while (c = getc(in), c != EOF && c <= 32); - if (c == EOF) return 0; - do { - *w++ = (char)(c); - --bufsize; - } while (c = getc(in), c != EOF && c > 32 && bufsize > 1); + if (c != EOF) { + do { + *w++ = (char)(c); + } while (c = getc(in), c != EOF && c > 32 && --bufsize); + } +//printf(" *** %d ***",bufsize); /******remove this line******/ +//printf(" *** %d ***\n", (BUFSIZE - bufsize)); /******remove this line******/ *w = '\0'; ungetc(c, in); - if (c == EOF) return 0; - else return 1; + if (c == EOF) return 0; /* done */ + if (bufsize) return 1; /* normal return */ + return 2; /* error, word larger than 255 chars */ } int main (int argc, const char *argv[]) { - if (argc != 2) { - - usage(); - return 1; - - } else if (argv[1][0] == 'c') { - - char s1[BUFSIZE]; - char s2[BUFSIZE]; - char * prev = s2; - char * cur = s1; - *prev = '\0'; - - SETBIN (stdout); - - while (get_word(stdin, cur, BUFSIZE)) { - int i = 0; - /* get the length of the prefix */ - while (prev[i] != '\0' && cur[i] != '\0' && prev[i] == cur[i]) - ++i; - if (i > 31) { - putc('\0', stdout); - } - putc(i+1, stdout); - fputs(cur+i, stdout); - if (cur == s1) { - prev = s1; cur = s2; - } else { - prev = s2; cur = s1; - } + if (argc == 2) { + char c = argv[1][0]; + if (c == '-') c = argv[1][1]; + + if (c == 'v') { + fputs("version X\n",stderr); + return 0; } - return 0; - } else if (argv[1][0] == 'd') { - - char cur[256]; - int i; - int c; - - SETBIN (stdin); - - i = getc(stdin); - while (i != -1 ) { - if (i == 0) - i = getc(stdin); - --i; - if (i < 0) goto error; - while ((c = getc(stdin)) > 32 && i < BUFSIZE) - cur[i++] = (char)c; - if (i >= BUFSIZE) goto error; - cur[i] = '\0'; - fputs(cur, stdout); - putc('\n', stdout); - i = c; + if (c == 'c') { + + char s1[BUFSIZE]; + char s2[BUFSIZE]; + char * prev = s2; + char * cur = s1; + *prev = '\0'; + int errFlag; + + SETBIN (stdout); + + while ((errFlag = get_word(stdin, cur)) == 1) { + int i = 0; + /* get the length of the prefix */ + while (prev[i] != '\0' && prev[i] == cur[i]) + ++i; + if (i > 31) { + if (putc('\0', stdout) < 0) goto error_out_c; + } + if (putc(i+1, stdout) < 0) goto error_out_c; + if (fputs(cur+i, stdout) < 0) goto error_out_c; + if (cur == s1) { + prev = s1; cur = s2; + } else { + prev = s2; cur = s1; + } + } + if (fflush(stdout) < 0) goto error_out_c; + if (errFlag) goto error_in_c; + return 0; } - return 0; - error: - fputs("ERROR: Corrupt Input.\n", stderr); - return 2; + if (c == 'd') { - } else { + char cur[BUFSIZE+1]; + int i; + int c; + + SETBIN (stdin); + + i = getc(stdin); + while (i != -1 ) { + if (i == 0) + i = getc(stdin); + --i; + if (i < 0) goto error_in_d; + while ((c = getc(stdin)) > 32 && i < BUFSIZE) + cur[i++] = (char)c; + if (i >= BUFSIZE) goto error_in_d; + /* one "puts" faster than 2 puts */ + cur[i] = '\n'; cur[++i] = '\0'; + if (fputs(cur, stdout) < 0) goto error_out_d; + i = c; + } + return 0; - usage(); - return 1; - + error_in_c: + error_in_d: + fputs("ERROR: Corrupt Input.\n", stderr); + return 2; + + error_out_c: + error_out_d: + /* output space full or other output fault */ + fputs("ERROR: Output Data Error.\n", stderr); + return 3; + } } + + usage(); + return 1; }
_______________________________________________ Aspell-devel mailing list [EMAIL PROTECTED] http://lists.gnu.org/mailman/listinfo/aspell-devel