> it mostly works but part of the configure script fails where it runs
> awk over a header file and uses a rather sill RE to find all 
> defines/undefines.
> 
> /^[\t ]*#[\t ]*(define|undef)[\t 
> ]+[_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ][_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789]*([\t
>  (]|$)// { print; }
> 
> For some reasone they don't like character class ranges, and if I
> replaces the lists with ranges the RE works as I expect, 
> 
> /^[\t ]*#[\t ]*(define|undef)[\t ]+[_a-zA-Z][_a-zA-Z0-9]*([\t (]|$)/ { print; 
> }
> 
> I looked at the awk source and even upped MAXRE - though its
> existing value of 512 looks fine to me.
> 
> anyone any ideas why awk might not match with an RE like this?

this is something i fixed when it came up on the list some time
ago.  i believe the patch was rejected, and done another way.\
so the diff is noisy.  sorry.

one remaining problem with the code on sources is NSPANS
isn't appropriatly sized for an a through z class for a reasonable
alphabet.  you also might try the awk in my contrib—it has a large
number of fixes from bwk and is much more careful about converting
between floating point and decimal so as not to cause spurious
exceptions.

- erik




; cd /sys/include
; 9diff regexp.h
/n/sources/plan9//sys/include/regexp.h:1,6 - regexp.h:1,11
  #pragma       src     "/sys/src/libregexp"
  #pragma       lib     "libregexp.a"
  
+ enum {
+       NSPANS  = 128,          /* max rune ranges per character class */
+       NCLASS  = 16,           /* max character classes per program */
+ };
+ 
  typedef struct Resub          Resub;
  typedef struct Reclass                Reclass;
  typedef struct Reinst         Reinst;
/n/sources/plan9//sys/include/regexp.h:27,33 - regexp.h:32,38
   */
  struct Reclass{
        Rune    *end;
-       Rune    spans[64];
+       Rune    spans[NSPANS*2];
  };
  
  /*
/n/sources/plan9//sys/include/regexp.h:52,58 - regexp.h:57,63
   */
  struct Reprog{
        Reinst  *startinst;     /* start pc */
-       Reclass class[16];      /* .data */
+       Reclass class[NCLASS];  /* .data */
        Reinst  firstinst[5];   /* .text */
  };
  
; cd /sys/src/libregexp
; 9diff *
/n/sources/plan9//sys/src/libregexp/regcomp.c:16,27 - regcomp.c:16,21
        Reinst* last;
  }Node;
  
- /* max character classes per program is nelem(reprog->class) */
- static Reprog *reprog;
- 
- /* max rune ranges per character class is nelem(classp->spans)/2 */
- #define NCCRUNE       nelem(classp->spans)
- 
  #define       NSTACK  20
  static        Node    andstack[NSTACK];
  static        Node    *andp;
/n/sources/plan9//sys/src/libregexp/regcomp.c:328,335 - regcomp.c:322,329
  static        Reclass*
  newclass(void)
  {
-       if(nclass >= nelem(reprog->class))
-               rcerror("too many character classes; increase Reprog.class 
size");
+       if(nclass >= NCLASS)
+               regerr2("too many character classes; limit", NCLASS+'0');
        return &(classp[nclass++]);
  }
  
/n/sources/plan9//sys/src/libregexp/regcomp.c:389,399 - regcomp.c:383,408
        return RUNE;
  }
  
+ static void
+ debugspan(void)
+ {
+ #ifdef DEBUG
+       int i, nspan;
+       Rune r;
+ 
+       nspan = yyclassp->end - yyclassp->spans >>1;
+       fprint(2, "nspan = %d\n", nspan);
+       p = yyclassp->spans;
+       for(i = 0; i < nspan; i++)
+               print("%C %C    %.4ux %.4ux\n", p[2*i], p[2*i+1], p[2*i], 
p[2*i+1]);
+ #endif
+ }
+ 
  static int
  bldcclass(void)
  {
        int type;
-       Rune r[NCCRUNE];
+       Rune r[NSPANS*2];
        Rune *p, *ep, *np;
        Rune rune;
        int quoted;
/n/sources/plan9//sys/src/libregexp/regcomp.c:414,420 - regcomp.c:423,433
        }
  
        /* parse class into a set of spans */
-       while(ep < &r[NCCRUNE-1]){
+       for(;;){
+               if(ep == r + nelem(r)){
+                       rcerror("class too large");
+                       return 0;
+               }
                if(rune == 0){
                        rcerror("malformed '[]'");
                        return 0;
/n/sources/plan9//sys/src/libregexp/regcomp.c:438,447 - regcomp.c:451,456
                }
                quoted = nextc(&rune);
        }
-       if(ep >= &r[NCCRUNE-1]) {
-               rcerror("char class too large; increase Reclass.spans size");
-               return 0;
-       }
  
        /* sort on span start */
        for(p = r; p < ep; p += 2){
/n/sources/plan9//sys/src/libregexp/regcomp.c:465,474 - regcomp.c:474,482
                np[0] = *p++;
                np[1] = *p++;
                for(; p < ep; p += 2)
-                       /* overlapping or adjacent ranges? */
-                       if(p[0] <= np[1] + 1){
+                       if(p[0] <= np[1]+1){
                                if(p[1] >= np[1])
-                                       np[1] = p[1];   /* coalesce */
+                                       np[1] = p[1];
                        } else {
                                np += 2;
                                np[0] = p[0];
/n/sources/plan9//sys/src/libregexp/regcomp.c:475,480 - regcomp.c:483,489
                                np[1] = p[1];
                        }
                yyclassp->end = np+2;
+               debugspan();
        }
  
        return type;


/n/sources/plan9//sys/src/ape/lib/regexp/regcomp.h:1,23 - regcomp.h:1,17
  /*
   *  substitution list
   */
+ enum {
+       NSUBEXP = 32,
+       LISTINCREMENT   = 8,
+ };
+ 
  typedef struct Resublist      Resublist;
  struct        Resublist
  {
-       Resub   m[32];
+       Resub   m[NSUBEXP];
  };
  
- /* max subexpressions per program */
- Resublist ReSuBlIsT;
- #define NSUBEXP (sizeof(ReSuBlIsT.m)/sizeof(Resub))
- 
- /* max character classes per program */
- Reprog        RePrOg;
- #define       NCLASS  (sizeof(RePrOg.class)/sizeof(Reclass))
- 
- /* max rune ranges per character class */
- #define NCCRUNE       (sizeof(Reclass)/sizeof(wchar_t))
- 
  /*
   * Actions and Tokens (Reinst types)
   *
/n/sources/plan9//sys/src/ape/lib/regexp/regcomp.h:46,52 - regcomp.h:40,45
  /*
   *  regexec execution lists
   */
- #define LISTINCREMENT 8
  typedef struct Relist Relist;
  struct Relist
  {
; lc
mkfile          regcomp.c       regerror.c      regsub.c                
rregsub.c
regaux.c                regcomp.h       regexec.c               rregexec.c
; find /sys/include|grep reg
/sys/include/ape/regexp.h
/sys/include/regexp.h
; 9diff /sys/include/ape/regexp.h
/n/sources/plan9/sys/include/ape/regexp.h:35,43 - 
/sys/include/ape/regexp.h:35,50
  /*
   *    character class, each pair of rune's defines a range
   */
+ enum{
+       NCCRUNE = 256,
+       NCLASS  = 16,
+       NINST           = 5,
+ 
+ };
+ 
  struct Reclass{
        wchar_t *end;
-       wchar_t spans[64];
+       wchar_t spans[NCCRUNE];
  };
  
  /*
/n/sources/plan9/sys/include/ape/regexp.h:62,69 - 
/sys/include/ape/regexp.h:69,76
   */
  struct Reprog{
        Reinst  *startinst;     /* start pc */
-       Reclass class[16];      /* .data */
-       Reinst  firstinst[5];   /* .text */
+       Reclass class[NCLASS];  /* .data */
+       Reinst  firstinst[NINST];       /* .text */
  };
  
  extern Reprog *regcomp(char*);

Reply via email to