Hello!
I made a patch to texindex (from texinfo-4.0) to support locale.
What it does:
- use unsigned char instead of char everywhere when dealing with input text
- conditionally use locale-aware functions when doing comparison
(if HAVE_SETLOCALE is not defined, use old method instead).
Please check and apply this patch.
Thank you in advance,
Dima.
--- /home/dima/texindex.c Fri Aug 6 17:03:14 1999
+++ texindex.c Sat Apr 1 12:26:30 2000
@@ -48,9 +48,9 @@
and the position and length of its first keyfield. */
struct lineinfo
{
- char *text; /* The actual text of the line. */
+ unsigned char *text; /* The actual text of the line. */
union {
- char *text; /* The start of the key (for textual comparison). */
+ unsigned char *text; /* The start of the key (for textual comparison). */
long number; /* The numeric value (for numeric comparison). */
} key;
long keylen; /* Length of KEY field. */
@@ -88,7 +88,7 @@
int num_infiles;
/* Pointer to the array of pointers to lines being sorted. */
-char **linearray;
+unsigned char **linearray;
/* The allocated length of `linearray'. */
long nlines;
@@ -108,7 +108,7 @@
/* During in-core sort, this points to the base of the data block
which contains all the lines of data. */
-char *text_base;
+unsigned char *text_base;
/* Additional command switches .*/
@@ -119,12 +119,12 @@
void decode_command ();
void sort_in_core ();
void sort_offline ();
-char **parsefile ();
-char *find_field ();
-char *find_pos ();
+unsigned char **parsefile ();
+unsigned char *find_field ();
+unsigned char *find_pos ();
long find_value ();
-char *find_braced_pos ();
-char *find_braced_end ();
+unsigned char *find_braced_pos ();
+unsigned char *find_braced_end ();
void writelines ();
int compare_field ();
int compare_full ();
@@ -135,7 +135,7 @@
void fatal ();
void error ();
void *xmalloc (), *xrealloc ();
-char *concat ();
+unsigned char *concat ();
void flush_tempfiles ();
#define MAX_IN_CORE_SORT 500000
@@ -153,6 +153,10 @@
#ifdef HAVE_SETLOCALE
/* Set locale via LC_ALL. */
setlocale (LC_ALL, "");
+ if(!setlocale (LC_COLLATE, ""))
+ printf("WARNING! unable to setlocale(LC_COLLATE)\n");
+ if(!setlocale (LC_CTYPE, ""))
+ printf("WARNING! unable to setlocale(LC_CTYPE)\n");
#endif
/* Set the text message domain. */
@@ -231,12 +235,12 @@
typedef struct
{
- char *long_name;
- char *short_name;
+ unsigned char *long_name;
+ unsigned char *short_name;
int *variable_ref;
int variable_value;
char *arg_name;
- char *doc_string;
+ unsigned char *doc_string;
} TEXINDEX_OPTION;
TEXINDEX_OPTION texindex_options[] = {
@@ -407,7 +411,7 @@
int
compare_full (line1, line2)
- char **line1, **line2;
+ unsigned char **line1, **line2;
{
int i;
@@ -418,8 +422,8 @@
for (i = 0; i < num_keyfields; i++)
{
long length1, length2;
- char *start1 = find_field (&keyfields[i], *line1, &length1);
- char *start2 = find_field (&keyfields[i], *line2, &length2);
+ unsigned char *start1 = find_field (&keyfields[i], *line1, &length1);
+ unsigned char *start2 = find_field (&keyfields[i], *line2, &length2);
int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
start2, length2, *line2 - text_base);
if (tem)
@@ -444,7 +448,7 @@
{
int i;
int tem;
- char *text1, *text2;
+ unsigned char *text1, *text2;
/* Compare using the first keyfield, which has been found for us already. */
if (keyfields->positional)
@@ -476,8 +480,8 @@
for (i = 1; i < num_keyfields; i++)
{
long length1, length2;
- char *start1 = find_field (&keyfields[i], text1, &length1);
- char *start2 = find_field (&keyfields[i], text2, &length2);
+ unsigned char *start1 = find_field (&keyfields[i], text1, &length1);
+ unsigned char *start2 = find_field (&keyfields[i], text2, &length2);
int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
start2, length2, text2 - text_base);
if (tem)
@@ -498,7 +502,7 @@
int
compare_general (str1, str2, pos1, pos2, use_keyfields)
- char *str1, *str2;
+ unsigned char *str1, *str2;
long pos1, pos2;
int use_keyfields;
{
@@ -511,8 +515,8 @@
for (i = 0; i < use_keyfields; i++)
{
long length1, length2;
- char *start1 = find_field (&keyfields[i], str1, &length1);
- char *start2 = find_field (&keyfields[i], str2, &length2);
+ unsigned char *start1 = find_field (&keyfields[i], str1, &length1);
+ unsigned char *start2 = find_field (&keyfields[i], str2, &length2);
int tem = compare_field (&keyfields[i], start1, length1, pos1,
start2, length2, pos2);
if (tem)
@@ -530,15 +534,15 @@
A pointer to the starting character is returned, and the length
is stored into the int that LENGTHPTR points to. */
-char *
+unsigned char *
find_field (keyfield, str, lengthptr)
struct keyfield *keyfield;
- char *str;
+ unsigned char *str;
long *lengthptr;
{
- char *start;
- char *end;
- char *(*fun) ();
+ unsigned char *start;
+ unsigned char *end;
+ unsigned char *(*fun) ();
if (keyfield->braced)
fun = find_braced_pos;
@@ -573,18 +577,18 @@
If IGNORE_BLANKS is nonzero, we skip all blanks
after finding the specified word. */
-char *
+unsigned char *
find_pos (str, words, chars, ignore_blanks)
- char *str;
+ unsigned char *str;
int words, chars;
int ignore_blanks;
{
int i;
- char *p = str;
+ unsigned char *p = str;
for (i = 0; i < words; i++)
{
- char c;
+ unsigned char c;
/* Find next bunch of nonblanks and skip them. */
while ((c = *p) == ' ' || c == '\t')
p++;
@@ -609,16 +613,16 @@
/* Like find_pos but assumes that each field is surrounded by braces
and that braces within fields are balanced. */
-char *
+unsigned char *
find_braced_pos (str, words, chars, ignore_blanks)
- char *str;
+ unsigned char *str;
int words, chars;
int ignore_blanks;
{
int i;
int bracelevel;
- char *p = str;
- char c;
+ unsigned char *p = str;
+ unsigned char c;
for (i = 0; i < words; i++)
{
@@ -661,13 +665,13 @@
/* Find the end of the balanced-brace field which starts at STR.
The position returned is just before the closing brace. */
-char *
+unsigned char *
find_braced_end (str)
- char *str;
+ unsigned char *str;
{
int bracelevel;
- char *p = str;
- char c;
+ unsigned char *p = str;
+ unsigned char c;
bracelevel = 1;
while (bracelevel)
@@ -685,7 +689,7 @@
long
find_value (start, length)
- char *start;
+ unsigned char *start;
long length;
{
while (length != 0L)
@@ -698,6 +702,7 @@
return 0l;
}
+#ifndef HAVE_SETLOCALE
/* Vector used to translate characters for comparison.
This is how we make all alphanumerics follow all else,
and ignore case in the first sorting. */
@@ -719,6 +724,23 @@
char_order[i + 'A' - 'a'] = 512 + i;
}
}
+#endif
+
+#ifdef HAVE_SETLOCALE
+int
+compare_char (int c1, int c2) {
+ unsigned char buf1[2];
+ unsigned char buf2[2];
+
+ buf1[1] = (unsigned char) 0; buf2[1] = (unsigned char) 0;
+ buf1[0] = (unsigned char) c1; buf2[0] = (unsigned char) c2;
+ if(isalpha(c1) && !isalpha(c2))
+ return 1;
+ if(!isalpha(c1) && isalpha(c2))
+ return -1;
+ return strcoll(buf1, buf2);
+};
+#endif
/* Compare two fields (each specified as a start pointer and a character count)
according to KEYFIELD.
@@ -727,10 +749,10 @@
int
compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
struct keyfield *keyfield;
- char *start1;
+ unsigned char *start1;
long length1;
long pos1;
- char *start2;
+ unsigned char *start2;
long length2;
long pos2;
{
@@ -752,10 +774,10 @@
}
else
{
- char *p1 = start1;
- char *p2 = start2;
- char *e1 = start1 + length1;
- char *e2 = start2 + length2;
+ unsigned char *p1 = start1;
+ unsigned char *p2 = start2;
+ unsigned char *e1 = start1 + length1;
+ unsigned char *e2 = start2 + length2;
while (1)
{
@@ -770,8 +792,13 @@
else
c2 = *p2++;
- if (char_order[c1] != char_order[c2])
- return char_order[c1] - char_order[c2];
+#ifdef HAVE_SETLOCALE
+ if (toupper(c1) != toupper(c2))
+ return compare_char(toupper(c1), toupper(c2));
+#else
+ if (char_order[c1] != char_order[c2])
+ return char_order[c1] - char_order[c2];
+#endif
if (!c1)
break;
}
@@ -792,9 +819,14 @@
else
c2 = *p2++;
- if (c1 != c2)
+#ifdef HAVE_SETLOCALE
+ if (compare_char(c1, c2))
/* Reverse sign here so upper case comes out last. */
- return c2 - c1;
+ return compare_char(c2, c1);
+#else
+ if (c1 != c2)
+ return c2 - c1;
+#endif
if (!c1)
break;
}
@@ -810,7 +842,7 @@
struct linebuffer
{
long size;
- char *buffer;
+ unsigned char *buffer;
};
/* Initialize LINEBUFFER for use. */
@@ -820,7 +852,7 @@
struct linebuffer *linebuffer;
{
linebuffer->size = 200;
- linebuffer->buffer = (char *) xmalloc (200);
+ linebuffer->buffer = (unsigned char *) xmalloc (200);
}
/* Read a line of text from STREAM into LINEBUFFER.
@@ -831,16 +863,16 @@
struct linebuffer *linebuffer;
FILE *stream;
{
- char *buffer = linebuffer->buffer;
- char *p = linebuffer->buffer;
- char *end = p + linebuffer->size;
+ unsigned char *buffer = linebuffer->buffer;
+ unsigned char *p = linebuffer->buffer;
+ unsigned char *end = p + linebuffer->size;
while (1)
{
int c = getc (stream);
if (p == end)
{
- buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
+ buffer = (unsigned char *) xrealloc (buffer, linebuffer->size *= 2);
p += buffer - linebuffer->buffer;
end += buffer - linebuffer->buffer;
linebuffer->buffer = buffer;
@@ -964,9 +996,9 @@
off_t total;
char *outfile;
{
- char **nextline;
- char *data = (char *) xmalloc (total + 1);
- char *file_data;
+ unsigned char **nextline;
+ unsigned char *data = (char *) xmalloc (total + 1);
+ unsigned char *file_data;
long file_size;
int i;
FILE *ostream = stdout;
@@ -996,7 +1028,9 @@
return;
}
+#ifndef HAVE_SETLOCALE
init_char_order ();
+#endif
/* Sort routines want to know this address. */
@@ -1008,7 +1042,7 @@
nlines = total / 50;
if (!nlines)
nlines = 2;
- linearray = (char **) xmalloc (nlines * sizeof (char *));
+ linearray = (unsigned char **) xmalloc (nlines * sizeof (unsigned char *));
/* `nextline' points to the next free slot in this array.
`nlines' is the allocated size. */
@@ -1035,7 +1069,7 @@
if (lineinfo)
{
struct lineinfo *lp;
- char **p;
+ unsigned char **p;
for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
{
@@ -1054,7 +1088,7 @@
free (lineinfo);
}
else
- qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
+ qsort (linearray, nextline - linearray, sizeof (unsigned char *), compare_full);
/* Open the output file. */
@@ -1079,15 +1113,15 @@
The value returned is the first entry in LINEARRAY still unused.
Value 0 means input file contents are invalid. */
-char **
+unsigned char **
parsefile (filename, nextline, data, size)
char *filename;
- char **nextline;
- char *data;
+ unsigned char **nextline;
+ unsigned char *data;
long size;
{
- char *p, *end;
- char **line = nextline;
+ unsigned char *p, *end;
+ unsigned char **line = nextline;
p = data;
end = p + size;
@@ -1107,8 +1141,8 @@
line++;
if (line == linearray + nlines)
{
- char **old = linearray;
- linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
+ unsigned char **old = linearray;
+ linearray = (unsigned char **) xrealloc (linearray, sizeof (unsigned char
+*) * (nlines *= 4));
line += linearray - old;
}
}
@@ -1134,12 +1168,12 @@
/* The last primary name we wrote a \primary entry for.
If only one level of indexing is being done, this is the last name seen. */
-char *lastprimary;
+unsigned char *lastprimary;
/* Length of storage allocated for lastprimary. */
int lastprimarylength;
/* Similar, for the secondary name. */
-char *lastsecondary;
+unsigned char *lastsecondary;
int lastsecondarylength;
/* Zero if we are not in the middle of writing an entry.
@@ -1152,14 +1186,14 @@
/* The initial (for sorting purposes) of the last primary entry written.
When this changes, a \initial {c} line is written */
-char *lastinitial;
+unsigned char *lastinitial;
int lastinitiallength;
/* When we need a string of length 1 for the value of lastinitial,
store it here. */
-char lastinitial1[2];
+unsigned char lastinitial1[2];
/* Initialize static storage for writing an index. */
@@ -1172,10 +1206,10 @@
lastinitial1[1] = 0;
lastinitiallength = 0;
lastprimarylength = 100;
- lastprimary = (char *) xmalloc (lastprimarylength + 1);
+ lastprimary = (unsigned char *) xmalloc (lastprimarylength + 1);
memset (lastprimary, '\0', lastprimarylength + 1);
lastsecondarylength = 100;
- lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
+ lastsecondary = (unsigned char *) xmalloc (lastsecondarylength + 1);
memset (lastsecondary, '\0', lastsecondarylength + 1);
}
@@ -1184,16 +1218,16 @@
void
indexify (line, ostream)
- char *line;
+ unsigned char *line;
FILE *ostream;
{
- char *primary, *secondary, *pagenumber;
+ unsigned char *primary, *secondary, *pagenumber;
int primarylength, secondarylength = 0, pagelength;
int nosecondary;
int initiallength;
- char *initial;
- char initial1[2];
- register char *p;
+ unsigned char *initial;
+ unsigned char initial1[2];
+ unsigned char *p;
/* First, analyze the parts of the entry fed to us this time. */
@@ -1212,8 +1246,13 @@
initial1[1] = 0;
initiallength = 1;
- if (initial1[0] >= 'a' && initial1[0] <= 'z')
- initial1[0] -= 040;
+#ifdef HAVE_SETLOCALE
+ if (isalpha(initial1[0]))
+ initial1[0] = (unsigned char) toupper(initial1[0]);
+#else
+ if (initial[0] >= 'a' && initial[0] <= 'z')
+ initial[0] -= 040;
+#endif
}
pagenumber = find_braced_pos (line, 1, 0, 0);
@@ -1277,7 +1316,7 @@
if (lastprimarylength < primarylength)
{
lastprimarylength = primarylength + 100;
- lastprimary = (char *) xrealloc (lastprimary,
+ lastprimary = (unsigned char *) xrealloc (lastprimary,
1 + lastprimarylength);
}
strncpy (lastprimary, primary, primarylength);
@@ -1311,7 +1350,7 @@
if (lastsecondarylength < secondarylength)
{
lastsecondarylength = secondarylength + 100;
- lastsecondary = (char *) xrealloc (lastsecondary,
+ lastsecondary = (unsigned char *) xrealloc (lastsecondary,
1 + lastsecondarylength);
}
strncpy (lastsecondary, secondary, secondarylength);
@@ -1341,12 +1380,12 @@
void
writelines (linearray, nlines, ostream)
- char **linearray;
+ unsigned char **linearray;
int nlines;
FILE *ostream;
{
- char **stop_line = linearray + nlines;
- char **next_line;
+ unsigned char **stop_line = linearray + nlines;
+ unsigned char **next_line;
init_index ();
@@ -1360,8 +1399,8 @@
explicitly specd keyfields. */
|| compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields -
1))
{
- char *p = *next_line;
- char c;
+ unsigned char *p = *next_line;
+ unsigned char c;
while ((c = *p++) && c != '\n')
/* Do nothing. */ ;
@@ -1635,12 +1674,12 @@
/* Return a newly-allocated string whose contents concatenate those of
S1, S2, S3. */
-char *
+unsigned char *
concat (s1, s2, s3)
char *s1, *s2, *s3;
{
int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
- char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
+ unsigned char *result = (unsigned char *) xmalloc (len1 + len2 + len3 + 1);
strcpy (result, s1);
strcpy (result + len1, s2);
@@ -1651,9 +1690,9 @@
}
#if !defined (HAVE_STRCHR)
-char *
+unsigned char *
strrchr (string, character)
- char *string;
+ unsigned char *string;
int character;
{
register int i;