martin 99/11/24 14:31:10
Modified: src/regex regcomp.c
Log:
This patch fixes the some of the bogosity in regular expressions
on EBCDIC based machines: a character range [a-z] would match much more
than only the islower() characters because in the EBCDIC charset there
are "holes in the contiguity" between a-i, j-r and s-z.
This patch fixes [<lowercase>-<lowercase>] and [<uppercase>-<uppercase>]
ranges by only regarding alphabetic characters between the lower and
upper bound. (Any other range definition remains unchanged.)
Revision Changes Path
1.10 +37 -0 apache-1.3/src/regex/regcomp.c
Index: regcomp.c
===================================================================
RCS file: /export/home/cvs/apache-1.3/src/regex/regcomp.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- regcomp.c 1998/09/15 19:47:45 1.9
+++ regcomp.c 1999/11/24 22:31:08 1.10
@@ -100,7 +100,30 @@
#else
# define GOODFLAGS(f) ((f)&~REG_DUMP)
#endif
+#ifdef CHARSET_EBCDIC /* Added for Apache by <[EMAIL PROTECTED]> */
+ static int initialized = 0;
+ if (!initialized) {
+ unsigned ch, idx = 0;
+ static unsigned char ctlchars_ebcdic[256+1];
+
+ for (ch = 1; ch <= 0xFF; ++ch) {
+ if (ap_iscntrl(ch)) {
+ ctlchars_ebcdic[idx++] = ch;
+ }
+ }
+ ctlchars_ebcdic[idx++] = '\0'; /* redundant */
+
+ for (idx=0; idx < sizeof(cclasses) / sizeof(cclasses[0]);
++idx) {
+ if (strcmp(cclasses[idx].name, "cntrl") == 0) {
+ cclasses[idx].chars = ctlchars_ebcdic;
+ break;
+ }
+ }
+ initialized = 1;
+ }
+#endif /*CHARSET_EBCDIC*/
+
cflags = GOODFLAGS(cflags);
if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
return(REG_INVARG);
@@ -708,8 +731,22 @@
finish = start;
/* xxx what about signed chars here... */
REQUIRE(start <= finish, REG_ERANGE);
+#ifndef CHARSET_EBCDIC
for (i = start; i <= finish; i++)
CHadd(cs, i);
+#else /* Added for Apache by <[EMAIL PROTECTED]> */
+ /* Special provision for character ranges [a-zA-Z], */
+ /* which are non-contiguous in EBCDIC: */
+ if ((ap_isupper(start) && ap_isupper(finish)) ||
+ (ap_islower(start) && ap_islower(finish))) {
+ for (i = start; i <= finish; i++)
+ if (ap_isalpha(i))
+ CHadd(cs, i);
+ } else {
+ for (i = start; i <= finish; i++)
+ CHadd(cs, i);
+ }
+#endif /*CHARSET_EBCDIC*/
break;
}
}