Hello!

I made a patch to texindex (from texinfo-4.0) to support locale.
What it does:
- use unsigned char instead of char everywhere when dealing with input text
- conditionally use locale-aware functions when doing comparison
(if HAVE_SETLOCALE is not defined, use old method instead).

Please check and apply this patch.

Thank you in advance,
Dima.

--- /home/dima/texindex.c       Fri Aug  6 17:03:14 1999
+++ texindex.c  Sat Apr  1 12:26:30 2000
@@ -48,9 +48,9 @@
    and the position and length of its first keyfield.  */
 struct lineinfo
 {
-  char *text;           /* The actual text of the line. */
+  unsigned char *text;           /* The actual text of the line. */
   union {
-    char *text;         /* The start of the key (for textual comparison). */
+    unsigned char *text;         /* The start of the key (for textual comparison). */
     long number;        /* The numeric value (for numeric comparison). */
   } key;
   long keylen;          /* Length of KEY field. */
@@ -88,7 +88,7 @@
 int num_infiles;
 
 /* Pointer to the array of pointers to lines being sorted. */
-char **linearray;
+unsigned char **linearray;
 
 /* The allocated length of `linearray'. */
 long nlines;
@@ -108,7 +108,7 @@
 
 /* During in-core sort, this points to the base of the data block
    which contains all the lines of data.  */
-char *text_base;
+unsigned char *text_base;
 
 /* Additional command switches .*/
 
@@ -119,12 +119,12 @@
 void decode_command ();
 void sort_in_core ();
 void sort_offline ();
-char **parsefile ();
-char *find_field ();
-char *find_pos ();
+unsigned char **parsefile ();
+unsigned char *find_field ();
+unsigned char *find_pos ();
 long find_value ();
-char *find_braced_pos ();
-char *find_braced_end ();
+unsigned char *find_braced_pos ();
+unsigned char *find_braced_end ();
 void writelines ();
 int compare_field ();
 int compare_full ();
@@ -135,7 +135,7 @@
 void fatal ();
 void error ();
 void *xmalloc (), *xrealloc ();
-char *concat ();
+unsigned char *concat ();
 void flush_tempfiles ();
 
 #define MAX_IN_CORE_SORT 500000
@@ -153,6 +153,10 @@
 #ifdef HAVE_SETLOCALE
   /* Set locale via LC_ALL.  */
   setlocale (LC_ALL, "");
+  if(!setlocale (LC_COLLATE, ""))
+    printf("WARNING! unable to setlocale(LC_COLLATE)\n");
+  if(!setlocale (LC_CTYPE, ""))
+    printf("WARNING! unable to setlocale(LC_CTYPE)\n");
 #endif
 
   /* Set the text message domain.  */
@@ -231,12 +235,12 @@
 
 typedef struct
 {
-  char *long_name;
-  char *short_name;
+  unsigned char *long_name;
+  unsigned char *short_name;
   int *variable_ref;
   int variable_value;
   char *arg_name;
-  char *doc_string;
+  unsigned char *doc_string;
 } TEXINDEX_OPTION;
 
 TEXINDEX_OPTION texindex_options[] = {
@@ -407,7 +411,7 @@
 
 int
 compare_full (line1, line2)
-     char **line1, **line2;
+     unsigned char **line1, **line2;
 {
   int i;
 
@@ -418,8 +422,8 @@
   for (i = 0; i < num_keyfields; i++)
     {
       long length1, length2;
-      char *start1 = find_field (&keyfields[i], *line1, &length1);
-      char *start2 = find_field (&keyfields[i], *line2, &length2);
+      unsigned char *start1 = find_field (&keyfields[i], *line1, &length1);
+      unsigned char *start2 = find_field (&keyfields[i], *line2, &length2);
       int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
                                start2, length2, *line2 - text_base);
       if (tem)
@@ -444,7 +448,7 @@
 {
   int i;
   int tem;
-  char *text1, *text2;
+  unsigned char *text1, *text2;
 
   /* Compare using the first keyfield, which has been found for us already. */
   if (keyfields->positional)
@@ -476,8 +480,8 @@
   for (i = 1; i < num_keyfields; i++)
     {
       long length1, length2;
-      char *start1 = find_field (&keyfields[i], text1, &length1);
-      char *start2 = find_field (&keyfields[i], text2, &length2);
+      unsigned char *start1 = find_field (&keyfields[i], text1, &length1);
+      unsigned char *start2 = find_field (&keyfields[i], text2, &length2);
       int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
                                start2, length2, text2 - text_base);
       if (tem)
@@ -498,7 +502,7 @@
 
 int
 compare_general (str1, str2, pos1, pos2, use_keyfields)
-     char *str1, *str2;
+     unsigned char *str1, *str2;
      long pos1, pos2;
      int use_keyfields;
 {
@@ -511,8 +515,8 @@
   for (i = 0; i < use_keyfields; i++)
     {
       long length1, length2;
-      char *start1 = find_field (&keyfields[i], str1, &length1);
-      char *start2 = find_field (&keyfields[i], str2, &length2);
+      unsigned char *start1 = find_field (&keyfields[i], str1, &length1);
+      unsigned char *start2 = find_field (&keyfields[i], str2, &length2);
       int tem = compare_field (&keyfields[i], start1, length1, pos1,
                                start2, length2, pos2);
       if (tem)
@@ -530,15 +534,15 @@
    A pointer to the starting character is returned, and the length
    is stored into the int that LENGTHPTR points to.  */
 
-char *
+unsigned char *
 find_field (keyfield, str, lengthptr)
      struct keyfield *keyfield;
-     char *str;
+     unsigned char *str;
      long *lengthptr;
 {
-  char *start;
-  char *end;
-  char *(*fun) ();
+  unsigned char *start;
+  unsigned char *end;
+  unsigned char *(*fun) ();
 
   if (keyfield->braced)
     fun = find_braced_pos;
@@ -573,18 +577,18 @@
    If IGNORE_BLANKS is nonzero, we skip all blanks
    after finding the specified word.  */
 
-char *
+unsigned char *
 find_pos (str, words, chars, ignore_blanks)
-     char *str;
+     unsigned char *str;
      int words, chars;
      int ignore_blanks;
 {
   int i;
-  char *p = str;
+  unsigned char *p = str;
 
   for (i = 0; i < words; i++)
     {
-      char c;
+      unsigned char c;
       /* Find next bunch of nonblanks and skip them. */
       while ((c = *p) == ' ' || c == '\t')
         p++;
@@ -609,16 +613,16 @@
 /* Like find_pos but assumes that each field is surrounded by braces
    and that braces within fields are balanced. */
 
-char *
+unsigned char *
 find_braced_pos (str, words, chars, ignore_blanks)
-     char *str;
+     unsigned char *str;
      int words, chars;
      int ignore_blanks;
 {
   int i;
   int bracelevel;
-  char *p = str;
-  char c;
+  unsigned char *p = str;
+  unsigned char c;
 
   for (i = 0; i < words; i++)
     {
@@ -661,13 +665,13 @@
 /* Find the end of the balanced-brace field which starts at STR.
    The position returned is just before the closing brace. */
 
-char *
+unsigned char *
 find_braced_end (str)
-     char *str;
+     unsigned char *str;
 {
   int bracelevel;
-  char *p = str;
-  char c;
+  unsigned char *p = str;
+  unsigned char c;
 
   bracelevel = 1;
   while (bracelevel)
@@ -685,7 +689,7 @@
 
 long
 find_value (start, length)
-     char *start;
+     unsigned char *start;
      long length;
 {
   while (length != 0L)
@@ -698,6 +702,7 @@
   return 0l;
 }
 
+#ifndef HAVE_SETLOCALE
 /* Vector used to translate characters for comparison.
    This is how we make all alphanumerics follow all else,
    and ignore case in the first sorting.  */
@@ -719,6 +724,23 @@
       char_order[i + 'A' - 'a'] = 512 + i;
     }
 }
+#endif
+
+#ifdef HAVE_SETLOCALE
+int
+compare_char (int c1, int c2) {
+  unsigned char buf1[2];
+  unsigned char buf2[2];
+  
+  buf1[1] = (unsigned char) 0;  buf2[1] = (unsigned char) 0;
+  buf1[0] = (unsigned char) c1;  buf2[0] = (unsigned char) c2;
+  if(isalpha(c1) && !isalpha(c2))
+    return 1;
+  if(!isalpha(c1) && isalpha(c2))
+    return -1;
+  return strcoll(buf1, buf2);
+};
+#endif
 
 /* Compare two fields (each specified as a start pointer and a character count)
    according to KEYFIELD.
@@ -727,10 +749,10 @@
 int
 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
      struct keyfield *keyfield;
-     char *start1;
+     unsigned char *start1;
      long length1;
      long pos1;
-     char *start2;
+     unsigned char *start2;
      long length2;
      long pos2;
 {
@@ -752,10 +774,10 @@
     }
   else
     {
-      char *p1 = start1;
-      char *p2 = start2;
-      char *e1 = start1 + length1;
-      char *e2 = start2 + length2;
+      unsigned char *p1 = start1;
+      unsigned char *p2 = start2;
+      unsigned char *e1 = start1 + length1;
+      unsigned char *e2 = start2 + length2;
 
       while (1)
         {
@@ -770,8 +792,13 @@
           else
             c2 = *p2++;
 
-          if (char_order[c1] != char_order[c2])
-            return char_order[c1] - char_order[c2];
+#ifdef HAVE_SETLOCALE
+          if (toupper(c1) != toupper(c2))
+            return compare_char(toupper(c1), toupper(c2));
+#else
+         if (char_order[c1] != char_order[c2])
+           return char_order[c1] - char_order[c2];
+#endif
           if (!c1)
             break;
         }
@@ -792,9 +819,14 @@
           else
             c2 = *p2++;
 
-          if (c1 != c2)
+#ifdef HAVE_SETLOCALE
+          if (compare_char(c1, c2))
             /* Reverse sign here so upper case comes out last.  */
-            return c2 - c1;
+            return compare_char(c2, c1);
+#else
+         if (c1 != c2)
+           return c2 - c1;
+#endif
           if (!c1)
             break;
         }
@@ -810,7 +842,7 @@
 struct linebuffer
 {
   long size;
-  char *buffer;
+  unsigned char *buffer;
 };
 
 /* Initialize LINEBUFFER for use. */
@@ -820,7 +852,7 @@
      struct linebuffer *linebuffer;
 {
   linebuffer->size = 200;
-  linebuffer->buffer = (char *) xmalloc (200);
+  linebuffer->buffer = (unsigned char *) xmalloc (200);
 }
 
 /* Read a line of text from STREAM into LINEBUFFER.
@@ -831,16 +863,16 @@
      struct linebuffer *linebuffer;
      FILE *stream;
 {
-  char *buffer = linebuffer->buffer;
-  char *p = linebuffer->buffer;
-  char *end = p + linebuffer->size;
+  unsigned char *buffer = linebuffer->buffer;
+  unsigned char *p = linebuffer->buffer;
+  unsigned char *end = p + linebuffer->size;
 
   while (1)
     {
       int c = getc (stream);
       if (p == end)
         {
-          buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
+          buffer = (unsigned char *) xrealloc (buffer, linebuffer->size *= 2);
           p += buffer - linebuffer->buffer;
           end += buffer - linebuffer->buffer;
           linebuffer->buffer = buffer;
@@ -964,9 +996,9 @@
      off_t total;
      char *outfile;
 {
-  char **nextline;
-  char *data = (char *) xmalloc (total + 1);
-  char *file_data;
+  unsigned char **nextline;
+  unsigned char *data = (char *) xmalloc (total + 1);
+  unsigned char *file_data;
   long file_size;
   int i;
   FILE *ostream = stdout;
@@ -996,7 +1028,9 @@
       return;
     }
 
+#ifndef HAVE_SETLOCALE
   init_char_order ();
+#endif
 
   /* Sort routines want to know this address. */
 
@@ -1008,7 +1042,7 @@
   nlines = total / 50;
   if (!nlines)
     nlines = 2;
-  linearray = (char **) xmalloc (nlines * sizeof (char *));
+  linearray = (unsigned char **) xmalloc (nlines * sizeof (unsigned char *));
 
   /* `nextline' points to the next free slot in this array.
      `nlines' is the allocated size.  */
@@ -1035,7 +1069,7 @@
   if (lineinfo)
     {
       struct lineinfo *lp;
-      char **p;
+      unsigned char **p;
 
       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
         {
@@ -1054,7 +1088,7 @@
       free (lineinfo);
     }
   else
-    qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
+    qsort (linearray, nextline - linearray, sizeof (unsigned char *), compare_full);
 
   /* Open the output file. */
 
@@ -1079,15 +1113,15 @@
    The value returned is the first entry in LINEARRAY still unused.
    Value 0 means input file contents are invalid.  */
 
-char **
+unsigned char **
 parsefile (filename, nextline, data, size)
      char *filename;
-     char **nextline;
-     char *data;
+     unsigned char **nextline;
+     unsigned char *data;
      long size;
 {
-  char *p, *end;
-  char **line = nextline;
+  unsigned char *p, *end;
+  unsigned char **line = nextline;
 
   p = data;
   end = p + size;
@@ -1107,8 +1141,8 @@
       line++;
       if (line == linearray + nlines)
         {
-          char **old = linearray;
-          linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
+          unsigned char **old = linearray;
+          linearray = (unsigned char **) xrealloc (linearray, sizeof (unsigned char 
+*) * (nlines *= 4));
           line += linearray - old;
         }
     }
@@ -1134,12 +1168,12 @@
 
 /* The last primary name we wrote a \primary entry for.
    If only one level of indexing is being done, this is the last name seen. */
-char *lastprimary;
+unsigned char *lastprimary;
 /* Length of storage allocated for lastprimary. */
 int lastprimarylength;
 
 /* Similar, for the secondary name. */
-char *lastsecondary;
+unsigned char *lastsecondary;
 int lastsecondarylength;
 
 /* Zero if we are not in the middle of writing an entry.
@@ -1152,14 +1186,14 @@
 /* The initial (for sorting purposes) of the last primary entry written.
    When this changes, a \initial {c} line is written */
 
-char *lastinitial;
+unsigned char *lastinitial;
 
 int lastinitiallength;
 
 /* When we need a string of length 1 for the value of lastinitial,
    store it here.  */
 
-char lastinitial1[2];
+unsigned char lastinitial1[2];
 
 /* Initialize static storage for writing an index. */
 
@@ -1172,10 +1206,10 @@
   lastinitial1[1] = 0;
   lastinitiallength = 0;
   lastprimarylength = 100;
-  lastprimary = (char *) xmalloc (lastprimarylength + 1);
+  lastprimary = (unsigned char *) xmalloc (lastprimarylength + 1);
   memset (lastprimary, '\0', lastprimarylength + 1);
   lastsecondarylength = 100;
-  lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
+  lastsecondary = (unsigned char *) xmalloc (lastsecondarylength + 1);
   memset (lastsecondary, '\0', lastsecondarylength + 1);
 }
 
@@ -1184,16 +1218,16 @@
 
 void
 indexify (line, ostream)
-     char *line;
+     unsigned char *line;
      FILE *ostream;
 {
-  char *primary, *secondary, *pagenumber;
+  unsigned char *primary, *secondary, *pagenumber;
   int primarylength, secondarylength = 0, pagelength;
   int nosecondary;
   int initiallength;
-  char *initial;
-  char initial1[2];
-  register char *p;
+  unsigned char *initial;
+  unsigned char initial1[2];
+  unsigned char *p;
 
   /* First, analyze the parts of the entry fed to us this time. */
 
@@ -1212,8 +1246,13 @@
       initial1[1] = 0;
       initiallength = 1;
 
-      if (initial1[0] >= 'a' && initial1[0] <= 'z')
-        initial1[0] -= 040;
+#ifdef HAVE_SETLOCALE
+      if (isalpha(initial1[0]))
+        initial1[0] = (unsigned char) toupper(initial1[0]);
+#else
+      if (initial[0] >= 'a' && initial[0] <= 'z')
+        initial[0] -= 040;
+#endif
     }
 
   pagenumber = find_braced_pos (line, 1, 0, 0);
@@ -1277,7 +1316,7 @@
       if (lastprimarylength < primarylength)
         {
           lastprimarylength = primarylength + 100;
-          lastprimary = (char *) xrealloc (lastprimary,
+          lastprimary = (unsigned char *) xrealloc (lastprimary,
                                            1 + lastprimarylength);
         }
       strncpy (lastprimary, primary, primarylength);
@@ -1311,7 +1350,7 @@
       if (lastsecondarylength < secondarylength)
         {
           lastsecondarylength = secondarylength + 100;
-          lastsecondary = (char *) xrealloc (lastsecondary,
+          lastsecondary = (unsigned char *) xrealloc (lastsecondary,
                                              1 + lastsecondarylength);
         }
       strncpy (lastsecondary, secondary, secondarylength);
@@ -1341,12 +1380,12 @@
 
 void
 writelines (linearray, nlines, ostream)
-     char **linearray;
+     unsigned char **linearray;
      int nlines;
      FILE *ostream;
 {
-  char **stop_line = linearray + nlines;
-  char **next_line;
+  unsigned char **stop_line = linearray + nlines;
+  unsigned char **next_line;
 
   init_index ();
 
@@ -1360,8 +1399,8 @@
          explicitly specd keyfields. */
           || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 
1))
         {
-          char *p = *next_line;
-          char c;
+          unsigned char *p = *next_line;
+          unsigned char c;
 
           while ((c = *p++) && c != '\n')
             /* Do nothing. */ ;
@@ -1635,12 +1674,12 @@
 /* Return a newly-allocated string whose contents concatenate those of
    S1, S2, S3.  */
 
-char *
+unsigned char *
 concat (s1, s2, s3)
      char *s1, *s2, *s3;
 {
   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
-  char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
+  unsigned char *result = (unsigned char *) xmalloc (len1 + len2 + len3 + 1);
 
   strcpy (result, s1);
   strcpy (result + len1, s2);
@@ -1651,9 +1690,9 @@
 }
 
 #if !defined (HAVE_STRCHR)
-char *
+unsigned char *
 strrchr (string, character)
-     char *string;
+     unsigned char *string;
      int character;
 {
   register int i;

Reply via email to