martin 99/11/24 14:31:10
Modified: src/regex regcomp.c Log: This patch fixes the some of the bogosity in regular expressions on EBCDIC based machines: a character range [a-z] would match much more than only the islower() characters because in the EBCDIC charset there are "holes in the contiguity" between a-i, j-r and s-z. This patch fixes [<lowercase>-<lowercase>] and [<uppercase>-<uppercase>] ranges by only regarding alphabetic characters between the lower and upper bound. (Any other range definition remains unchanged.) Revision Changes Path 1.10 +37 -0 apache-1.3/src/regex/regcomp.c Index: regcomp.c =================================================================== RCS file: /export/home/cvs/apache-1.3/src/regex/regcomp.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- regcomp.c 1998/09/15 19:47:45 1.9 +++ regcomp.c 1999/11/24 22:31:08 1.10 @@ -100,7 +100,30 @@ #else # define GOODFLAGS(f) ((f)&~REG_DUMP) #endif +#ifdef CHARSET_EBCDIC /* Added for Apache by <[EMAIL PROTECTED]> */ + static int initialized = 0; + if (!initialized) { + unsigned ch, idx = 0; + static unsigned char ctlchars_ebcdic[256+1]; + + for (ch = 1; ch <= 0xFF; ++ch) { + if (ap_iscntrl(ch)) { + ctlchars_ebcdic[idx++] = ch; + } + } + ctlchars_ebcdic[idx++] = '\0'; /* redundant */ + + for (idx=0; idx < sizeof(cclasses) / sizeof(cclasses[0]); ++idx) { + if (strcmp(cclasses[idx].name, "cntrl") == 0) { + cclasses[idx].chars = ctlchars_ebcdic; + break; + } + } + initialized = 1; + } +#endif /*CHARSET_EBCDIC*/ + cflags = GOODFLAGS(cflags); if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) return(REG_INVARG); @@ -708,8 +731,22 @@ finish = start; /* xxx what about signed chars here... */ REQUIRE(start <= finish, REG_ERANGE); +#ifndef CHARSET_EBCDIC for (i = start; i <= finish; i++) CHadd(cs, i); +#else /* Added for Apache by <[EMAIL PROTECTED]> */ + /* Special provision for character ranges [a-zA-Z], */ + /* which are non-contiguous in EBCDIC: */ + if ((ap_isupper(start) && ap_isupper(finish)) || + (ap_islower(start) && ap_islower(finish))) { + for (i = start; i <= finish; i++) + if (ap_isalpha(i)) + CHadd(cs, i); + } else { + for (i = start; i <= finish; i++) + CHadd(cs, i); + } +#endif /*CHARSET_EBCDIC*/ break; } }