Please accept this update for compress.c
It has 2 bug fixes for compress.c

1st bug-fix is to remove the "static" of function get_word() which will cause 
problems for multiple instances of compress running at the same time.

2nd bug-fix is for words longer than 255 characters in length, so compress 
exits if longer than 255 characters rather than silently continue with 
corrupt results (to test it out, there is long.txt included).

The diff file is against the 0.60 release and includes a few more refinements 
relative to the last update (for example there is no point in passing 
BUFSIZE to get_word() if everything is hardcoded to BUFSIZE in size).

To test the long-word problem, test with:
compress -c <long.txt >long.cwl

To remove the long word fault, remove the "5" at the end of the very long 
string so that the word length is now only 255 chars instead of 256 chars.
At this point then, long.ttt should be equal to long.txt if following steps 
are followed:
compress -c <long.txt >long.cwl
compress -d <long.cwl >long.ttt
...which to me, I think appears working okay then.

Note: if you want to include versions, please replace version X with an 
appropriate number, otherwise remove the if statement for version.

Sincerely,
Jose Da Silva
/*
 * Copyright (c) 2000-2001
 * Kevin Atkinson
 *
 * Permission to use, copy, modify, distribute and sell this software
 * and its documentation for any purpose is hereby granted without
 * fee, provided that the above copyright notice appear in all copies
 * and that both that copyright notice and this permission notice
 * appear in supporting documentation.  Kevin Atkinson makes no
 * representations about the suitability of this software for any
 * purpose.  It is provided "as is" without express or implied
 * warranty.
 *
 */

#include <stdio.h>

#if defined(__CYGWIN__) || defined (_WIN32)

#  include <io.h>
#  include <fcntl.h>

#  define SETBIN(fno)  _setmode( _fileno( fno ), _O_BINARY )

#else

#  define SETBIN(fno)

#endif

#define BUFSIZE 256

void usage ()
{
  fputs("Compresses or uncompresses sorted word lists.\n"     , stderr);
  fputs("For best result the locale should be set to C\n"     , stderr);
  fputs("before sorting by setting the environmental\n"       , stderr);
  fputs("variable LANG to \"C\" before sorting.\n"	      , stderr);
  fputs("Copyright 2001,2004 by Kevin Atkinson.\n"	      , stderr);
  fputs("Usage: word-list-compress c[ompress]|d[ecompress]\n" , stderr);
}

// PRECOND: bufsize >= 2
int get_word(FILE * in, char * w)
{
  int bufsize = BUFSIZE - 1;
  register int c;

  while (c = getc(in), c != EOF && c <= 32);
  if (c != EOF) {
    do {
      *w++ = (char)(c);
    } while (c = getc(in), c != EOF && c > 32 && --bufsize);
  }
//printf(" *** %d ***",bufsize); /******remove this line******/
//printf(" *** %d ***\n", (BUFSIZE - bufsize)); /******remove this line******/
  *w = '\0';
  ungetc(c, in);
  if (c == EOF) return 0; /* done */
  if (bufsize)  return 1; /* normal return */
  return 2;		  /* error, word larger than 255 chars */
}

int main (int argc, const char *argv[]) {

  if (argc == 2) {
    char c = argv[1][0];
    if (c == '-') c = argv[1][1];

    if (c == 'v') {
      fputs("version X\n",stderr);
      return 0;
    }

    if (c == 'c') {

      char s1[BUFSIZE];
      char s2[BUFSIZE];
      char * prev = s2;
      char * cur = s1;
      *prev = '\0';
      int errFlag;

      SETBIN (stdout);

      while ((errFlag = get_word(stdin, cur)) == 1) {
	int i = 0;
	/* get the length of the prefix */
	while (prev[i] != '\0' && prev[i] == cur[i])
	  ++i;
	if (i > 31) {
	  if (putc('\0', stdout) < 0) goto error_out_c;
	}
	if (putc(i+1, stdout) < 0) goto error_out_c;
	if (fputs(cur+i, stdout) < 0) goto error_out_c;
	if (cur == s1) {
	  prev = s1; cur = s2;
	} else {
	  prev = s2; cur = s1;
	}
      }
      if (fflush(stdout) < 0) goto error_out_c;
      if (errFlag) goto error_in_c;
      return 0;
    }

    if (c == 'd') {

      char cur[BUFSIZE+1];
      int i;
      int c;

      SETBIN (stdin);

      i = getc(stdin);
      while (i != -1 ) {
	if (i == 0)
	  i = getc(stdin);
	--i;
	if (i < 0) goto error_in_d;
	while ((c = getc(stdin)) > 32 && i < BUFSIZE)
	  cur[i++] = (char)c;
	if (i >= BUFSIZE) goto error_in_d;
	/* one "puts" faster than 2 puts */
	cur[i] = '\n'; cur[++i] = '\0';
	if (fputs(cur, stdout) < 0) goto error_out_d;
	i = c;
      }
      return 0;

     error_in_c:
     error_in_d:
      fputs("ERROR: Corrupt Input.\n", stderr);
      return 2;

     error_out_c:
     error_out_d:
      /* output space full or other output fault */
      fputs("ERROR: Output Data Error.\n", stderr);
      return 3;
    }
  }

  usage();
  return 1;
}
NextWordOkay
012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234
NextWordTooLong
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
NextWordOkay
012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234
Done
--- aspell-0.60/prog/compress.c 2004-06-23 02:14:26.000000000 -0700
+++ compress.c  2004-09-02 14:22:58.024057096 -0700
@@ -4,8 +4,8 @@
  *
  * Permission to use, copy, modify, distribute and sell this software
  * and its documentation for any purpose is hereby granted without
- * fee, provided that the above copyright notice appear in all copies  
- * and that both that copyright notice and this permission notice 
+ * fee, provided that the above copyright notice appear in all copies
+ * and that both that copyright notice and this permission notice
  * appear in supporting documentation.  Kevin Atkinson makes no
  * representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
@@ -30,99 +30,117 @@
 
 #define BUFSIZE 256
 
-void usage () 
+void usage ()
 {
   fputs("Compresses or uncompresses sorted word lists.\n"     , stderr);
-  fputs("For best result the locale should be set to C\n"    , stderr);
+  fputs("For best result the locale should be set to C\n"     , stderr);
   fputs("before sorting by setting the environmental\n"       , stderr);
-  fputs("variable LANG to \"C\" before sorting.\n"            , stderr);
-  fputs("Copyright 2001,2004 by Kevin Atkinson.\n"  , stderr);
+  fputs("variable LANG to \"C\" before sorting.\n"           , stderr);
+  fputs("Copyright 2001,2004 by Kevin Atkinson.\n"           , stderr);
   fputs("Usage: word-list-compress c[ompress]|d[ecompress]\n" , stderr);
 }
 
 // PRECOND: bufsize >= 2
-static int get_word(FILE * in, char * w, size_t bufsize) 
+int get_word(FILE * in, char * w)
 {
-  int c;
+  int bufsize = BUFSIZE - 1;
+  register int c;
+
   while (c = getc(in), c != EOF && c <= 32);
-  if (c == EOF) return 0;
-  do {
-    *w++ = (char)(c);
-    --bufsize;
-  } while (c = getc(in), c != EOF && c > 32 && bufsize > 1);
+  if (c != EOF) {
+    do {
+      *w++ = (char)(c);
+    } while (c = getc(in), c != EOF && c > 32 && --bufsize);
+  }
+//printf(" *** %d ***",bufsize); /******remove this line******/
+//printf(" *** %d ***\n", (BUFSIZE - bufsize)); /******remove this line******/
   *w = '\0';
   ungetc(c, in);
-  if (c == EOF) return 0;
-  else return 1;
+  if (c == EOF) return 0; /* done */
+  if (bufsize)  return 1; /* normal return */
+  return 2;              /* error, word larger than 255 chars */
 }
 
 int main (int argc, const char *argv[]) {
 
-  if (argc != 2) {
-
-    usage();
-    return 1;
-    
-  } else if (argv[1][0] == 'c') {
-
-    char s1[BUFSIZE];
-    char s2[BUFSIZE];
-    char * prev = s2;
-    char * cur = s1;
-    *prev = '\0';
-
-    SETBIN (stdout);
-
-    while (get_word(stdin, cur, BUFSIZE)) {
-      int i = 0;
-      /* get the length of the prefix */
-      while (prev[i] != '\0' && cur[i] != '\0' && prev[i] == cur[i])
-        ++i;
-      if (i > 31) {
-        putc('\0', stdout);
-      }
-      putc(i+1, stdout);
-      fputs(cur+i, stdout);
-      if (cur == s1) {
-        prev = s1; cur = s2;
-      } else {
-        prev = s2; cur = s1;
-      }
+  if (argc == 2) {
+    char c = argv[1][0];
+    if (c == '-') c = argv[1][1];
+
+    if (c == 'v') {
+      fputs("version X\n",stderr);
+      return 0;
     }
-    return 0;
 
-  } else if (argv[1][0] == 'd') {
-    
-    char cur[256];
-    int i;
-    int c;
-
-    SETBIN (stdin);
-
-    i = getc(stdin);
-    while (i != -1 ) {
-      if (i == 0)
-        i = getc(stdin);
-      --i;
-      if (i < 0) goto error;
-      while ((c = getc(stdin)) > 32 && i < BUFSIZE)
-        cur[i++] = (char)c;
-      if (i >= BUFSIZE) goto error;
-      cur[i] = '\0';
-      fputs(cur, stdout);
-      putc('\n', stdout);
-      i = c;
+    if (c == 'c') {
+
+      char s1[BUFSIZE];
+      char s2[BUFSIZE];
+      char * prev = s2;
+      char * cur = s1;
+      *prev = '\0';
+      int errFlag;
+
+      SETBIN (stdout);
+
+      while ((errFlag = get_word(stdin, cur)) == 1) {
+       int i = 0;
+       /* get the length of the prefix */
+       while (prev[i] != '\0' && prev[i] == cur[i])
+         ++i;
+       if (i > 31) {
+         if (putc('\0', stdout) < 0) goto error_out_c;
+       }
+       if (putc(i+1, stdout) < 0) goto error_out_c;
+       if (fputs(cur+i, stdout) < 0) goto error_out_c;
+       if (cur == s1) {
+         prev = s1; cur = s2;
+       } else {
+         prev = s2; cur = s1;
+       }
+      }
+      if (fflush(stdout) < 0) goto error_out_c;
+      if (errFlag) goto error_in_c;
+      return 0;
     }
-    return 0;
 
-   error:
-    fputs("ERROR: Corrupt Input.\n", stderr);
-    return 2;
+    if (c == 'd') {
 
-  } else {
+      char cur[BUFSIZE+1];
+      int i;
+      int c;
+
+      SETBIN (stdin);
+
+      i = getc(stdin);
+      while (i != -1 ) {
+       if (i == 0)
+         i = getc(stdin);
+       --i;
+       if (i < 0) goto error_in_d;
+       while ((c = getc(stdin)) > 32 && i < BUFSIZE)
+         cur[i++] = (char)c;
+       if (i >= BUFSIZE) goto error_in_d;
+       /* one "puts" faster than 2 puts */
+       cur[i] = '\n'; cur[++i] = '\0';
+       if (fputs(cur, stdout) < 0) goto error_out_d;
+       i = c;
+      }
+      return 0;
 
-    usage();
-    return 1;
-    
+     error_in_c:
+     error_in_d:
+      fputs("ERROR: Corrupt Input.\n", stderr);
+      return 2;
+
+     error_out_c:
+     error_out_d:
+      /* output space full or other output fault */
+      fputs("ERROR: Output Data Error.\n", stderr);
+      return 3;
+    }
   }
+
+  usage();
+  return 1;
 }
_______________________________________________
Aspell-devel mailing list
[EMAIL PROTECTED]
http://lists.gnu.org/mailman/listinfo/aspell-devel

Reply via email to