On Wed, Nov 20, 2002 at 02:27:53PM +1100, Tim Robbins wrote: > On Wed, Nov 20, 2002 at 04:38:38AM +0300, Andrey A. Chernov wrote: > > > On Tue, Nov 19, 2002 at 14:52:02 +0200, Ruslan Ermilov wrote: > > > It seems that this patch has never been committed. This is a critical > > > bug that should be fixed before 5.0-RELEASE is out. > > > > I agree. There is no locale yet and I never see that patch. > > This patch seems to work, I used the logic from regcomp.c in libc. > Long lines make it ugly, but it was like that when I got here ;)
> Index: src/usr.bin/awk/Makefile
> ===================================================================
> RCS file: /x/freebsd/src/usr.bin/awk/Makefile,v
> retrieving revision 1.9
> diff -u -r1.9 Makefile
> --- src/usr.bin/awk/Makefile 10 May 2002 20:36:21 -0000 1.9
> +++ src/usr.bin/awk/Makefile 20 Nov 2002 03:13:50 -0000
> @@ -6,7 +6,7 @@
> PROG= nawk
> SRCS= awkgram.y b.c lex.c lib.c main.c parse.c proctab.c run.c tran.c ytab.h
>
> -CFLAGS+= -I. -I${AWKSRC}
> +CFLAGS+= -I. -I${AWKSRC} -I${.CURDIR}/../../lib/libc/locale
>
Ouch.
> DPADD= ${LIBM}
> LDADD= -lm
> Index: src/contrib/one-true-awk/b.c
> ===================================================================
> RCS file: /x/freebsd/src/contrib/one-true-awk/b.c,v
> retrieving revision 1.1.1.2
> diff -u -r1.1.1.2 b.c
> --- src/contrib/one-true-awk/b.c 19 Feb 2002 09:35:24 -0000 1.1.1.2
> +++ src/contrib/one-true-awk/b.c 20 Nov 2002 03:16:10 -0000
> @@ -32,6 +32,7 @@
> #include <stdlib.h>
> #include "awk.h"
> #include "ytab.h"
> +#include "collate.h"
>
> #define HAT (NCHARS-2) /* matches ^ in regular expr */
> /* NCHARS is 2**n */
> @@ -284,7 +285,7 @@
>
> char *cclenter(char *argp) /* add a character class */
> {
> - int i, c, c2;
> + int i, j, c, c2;
> uschar *p = (uschar *) argp;
> uschar *op, *bp;
> static uschar *buf = 0;
> @@ -308,12 +309,24 @@
> i--;
> continue;
> }
> - while (c < c2) {
> - if (!adjbuf((char **) &buf, &bufsz, bp-buf+2,
>100, (char **) &bp, 0))
> - FATAL("out of space for character
>class [%.10s...] 2", p);
> - *bp++ = ++c;
> - i++;
> - }
> + if (__collate_load_error) {
> + while (c < c2) {
> + if (!adjbuf((char **) &buf, &bufsz,
>bp-buf+2, 100, (char **) &bp, 0))
> + FATAL("out of space for
>character class [%.10s...] 2", p);
> + *bp++ = ++c;
> + i++;
> + }
> + } else {
> + for (j = CHAR_MIN; j <= CHAR_MAX; j++) {
> + if (!adjbuf((char **) &buf, &bufsz,
>bp-buf+2, 100, (char **) &bp, 0))
> + FATAL("out of space for
>character class [%.10s...] 2", p);
> + if (__collate_range_cmp(c, j) <= 0
> + && __collate_range_cmp(j, c2) <=
>0) {
> + *bp++ = j;
> + i++;
> + }
> + }
> + }
> continue;
> }
> }
There are a number of problems here:
1. The "empty range" check preceding this block should be made
locale-aware too.
2. CHAR_MAX evaluates to 127 here.
Here's my version of the above fix plus [[:class:]] fixes Andrey mentioned.
I gave it only light testing.
The collate_range_cmp() was stolen from the old awk(1).
Cheers,
--
Ruslan Ermilov Sysadmin and DBA,
[EMAIL PROTECTED] Sunbay Software AG,
[EMAIL PROTECTED] FreeBSD committer,
+380.652.512.251 Simferopol, Ukraine
http://www.FreeBSD.org The Power To Serve
http://www.oracle.com Enabling The Information Age
Index: b.c
===================================================================
RCS file: /home/ncvs/src/contrib/one-true-awk/b.c,v
retrieving revision 1.1.1.2
diff -u -p -r1.1.1.2 b.c
--- b.c 19 Feb 2002 09:35:24 -0000 1.1.1.2
+++ b.c 20 Nov 2002 12:51:10 -0000
@@ -282,9 +282,25 @@ int quoted(char **pp) /* pick up next th
return c;
}
+static int collate_range_cmp (a, b)
+ int a, b;
+{
+ int r;
+ static char s[2][2];
+
+ if ((unsigned char)a == (unsigned char)b)
+ return 0;
+ s[0][0] = a;
+ s[1][0] = b;
+ if ((r = strcoll(s[0], s[1])) == 0)
+ r = (unsigned char)a - (unsigned char)b;
+ return r;
+}
+
char *cclenter(char *argp) /* add a character class */
{
int i, c, c2;
+ int j;
uschar *p = (uschar *) argp;
uschar *op, *bp;
static uschar *buf = 0;
@@ -303,15 +319,18 @@ char *cclenter(char *argp) /* add a char
c2 = *p++;
if (c2 == '\\')
c2 = quoted((char **) &p);
- if (c > c2) { /* empty; ignore */
+ if (collate_range_cmp(c, c2) > 0) { /* empty;
+ignore */
bp--;
i--;
continue;
}
- while (c < c2) {
+ for (j = 0; j < NCHARS; j++) {
+ if ((collate_range_cmp(c, j) > 0) ||
+ collate_range_cmp(j, c2) > 0)
+ continue;
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2,
100, (char **) &bp, 0))
FATAL("out of space for character
class [%.10s...] 2", p);
- *bp++ = ++c;
+ *bp++ = j;
i++;
}
continue;
@@ -696,20 +715,20 @@ Node *unary(Node *np)
struct charclass {
const char *cc_name;
int cc_namelen;
- const char *cc_expand;
+ int (*cc_func)(int);
} charclasses[] = {
- { "alnum", 5, "0-9A-Za-z" },
- { "alpha", 5, "A-Za-z" },
- { "blank", 5, " \t" },
- { "cntrl", 5, "\000-\037\177" },
- { "digit", 5, "0-9" },
- { "graph", 5, "\041-\176" },
- { "lower", 5, "a-z" },
- { "print", 5, " \041-\176" },
- { "punct", 5, "\041-\057\072-\100\133-\140\173-\176" },
- { "space", 5, " \f\n\r\t\v" },
- { "upper", 5, "A-Z" },
- { "xdigit", 6, "0-9A-Fa-f" },
+ { "alnum", 5, isalnum },
+ { "alpha", 5, isalpha },
+ { "blank", 5, isblank },
+ { "cntrl", 5, iscntrl },
+ { "digit", 5, isdigit },
+ { "graph", 5, isgraph },
+ { "lower", 5, islower },
+ { "print", 5, isprint },
+ { "punct", 5, ispunct },
+ { "space", 5, isspace },
+ { "upper", 5, isupper },
+ { "xdigit", 6, isxdigit },
{ NULL, 0, NULL },
};
@@ -722,7 +741,7 @@ int relex(void) /* lexical analyzer for
static int bufsz = 100;
uschar *bp;
struct charclass *cc;
- const uschar *p;
+ int i;
switch (c = *prestr++) {
case '|': return OR;
@@ -771,8 +790,14 @@ int relex(void) /* lexical analyzer for
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen]
== ':' &&
prestr[2 + cc->cc_namelen] == ']') {
prestr += cc->cc_namelen + 3;
- for (p = (const uschar *) cc->cc_expand; *p;
p++)
- *bp++ = *p;
+ for (i = 0; i < NCHARS; i++) {
+ if (!adjbuf((char **) &buf, &bufsz,
+bp-buf+1, 100, (char **) &bp, 0))
+ FATAL("out of space for reg expr
+%.10s...", lastre);
+ if (cc->cc_func(i)) {
+ *bp++ = i;
+ n++;
+ }
+ }
} else
*bp++ = c;
} else if (c == '\0') {
Index: main.c
===================================================================
RCS file: /home/ncvs/src/contrib/one-true-awk/main.c,v
retrieving revision 1.1.1.3
diff -u -p -r1.1.1.3 main.c
--- main.c 16 Mar 2002 16:50:56 -0000 1.1.1.3
+++ main.c 20 Nov 2002 12:51:10 -0000
@@ -27,6 +27,7 @@ char *version = "version 20020210";
#define DEBUG
#include <stdio.h>
#include <ctype.h>
+#include <locale.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
@@ -54,6 +55,7 @@ int main(int argc, char *argv[])
{
char *fs = NULL;
+ setlocale(LC_ALL, "");
cmdname = argv[0];
if (argc == 1) {
fprintf(stderr, "Usage: %s [-f programfile | 'program'] [-Ffieldsep]
[-v var=value] [files]\n", cmdname);
Index: run.c
===================================================================
RCS file: /home/ncvs/src/contrib/one-true-awk/run.c,v
retrieving revision 1.1.1.2
diff -u -p -r1.1.1.2 run.c
--- run.c 19 Feb 2002 09:35:25 -0000 1.1.1.2
+++ run.c 20 Nov 2002 12:51:10 -0000
@@ -1504,11 +1504,11 @@ Cell *bltin(Node **a, int n) /* builtin
if (t == FTOUPPER) {
for (p = buf; *p; p++)
if (islower((uschar) *p))
- *p = toupper(*p);
+ *p = toupper((uschar)*p);
} else {
for (p = buf; *p; p++)
if (isupper((uschar) *p))
- *p = tolower(*p);
+ *p = tolower((uschar)*p);
}
tempfree(x);
x = gettemp();
msg46995/pgp00000.pgp
Description: PGP signature
