On Fri Oct 23 22:47:46 EDT 2009, [email protected] wrote:
> try using [_a-zA-Z][_a-zA-Z0-9] rather than the expanded form. i
> didn't track it down to be sure, but plan 9 awk seem to have a limit
> 34 characters inside square brackets. a rather odd number
> 
> cpu% echo hello | awk '/^[abcdefghijklmnopqrstuvwxyzABCDEFGH].+/ {
> line = $0 ; print line }'
> cpu% echo hello | awk '/^[abcdefghijklmnopqrstuvwxyzABCDEFG].+/ { line
> = $0 ; print line }'
> hello

there are two bits to fixing this.  steve simon
submitted an excellent patch, which i applied.
but i think there is still a bug.  34 is as noted a
really wierd number.

a bit of background:  the way cclasses work is
that each entry gets 2 entries.  a start and
an end.  for a-b the start is a and the end is b.
for just a, the start and end are both a.  one
would expect since the original code has 64
spaces, we would get a maximum of 32 chars
per class.  however, /sys/src/ape/lib/regexp/regcomp.h
has
        /* max rune ranges per character class */
        #define NCCRUNE (sizeof(Reclass)/sizeof(wchar_t))
which is wrong since there is more stuff in Reclass
than an array of wchar_t's.  this is a big problem
since it will scribble 4 bytes past the end of the
Reclass.

these are the diffs i came up with.  they attack
the problem a bit differently, but i think it's a
bit cleaner and worth a few extra lines of diff:

; 9diff regcomp.h
/n/sources/plan9//sys/src/ape/lib/regexp/regcomp.h:1,23 - regcomp.h:1,17
  /*
   *  substitution list
   */
+ enum {
+       NSUBEXP = 32,
+       LISTINCREMENT   = 8,
+ };
+ 
  typedef struct Resublist      Resublist;
  struct        Resublist
  {
-       Resub   m[32];
+       Resub   m[NSUBEXP];
  };
  
- /* max subexpressions per program */
- Resublist ReSuBlIsT;
- #define NSUBEXP (sizeof(ReSuBlIsT.m)/sizeof(Resub))
- 
- /* max character classes per program */
- Reprog        RePrOg;
- #define       NCLASS  (sizeof(RePrOg.class)/sizeof(Reclass))
- 
- /* max rune ranges per character class */
- #define NCCRUNE       (sizeof(Reclass)/sizeof(wchar_t))
- 
  /*
   * Actions and Tokens (Reinst types)
   *
/n/sources/plan9//sys/src/ape/lib/regexp/regcomp.h:46,52 - regcomp.h:40,45
  /*
   *  regexec execution lists
   */
- #define LISTINCREMENT 8
  typedef struct Relist Relist;
  struct Relist
  {
; 9diff regexp.h
/n/sources/plan9//sys/include/ape/regexp.h:35,43 - regexp.h:35,50
  /*
   *    character class, each pair of rune's defines a range
   */
+ enum{
+       NCCRUNE = 256,
+       NCLASS  = 16,
+       NINST           = 5,
+ 
+ };
+ 
  struct Reclass{
        wchar_t *end;
-       wchar_t spans[64];
+       wchar_t spans[NCCRUNE];
  };
  
  /*
/n/sources/plan9//sys/include/ape/regexp.h:62,69 - regexp.h:69,76
   */
  struct Reprog{
        Reinst  *startinst;     /* start pc */
-       Reclass class[16];      /* .data */
-       Reinst  firstinst[5];   /* .text */
+       Reclass class[NCLASS];  /* .data */
+       Reinst  firstinst[NINST];       /* .text */
  };
  
  extern Reprog *regcomp(char*);

- erik

Reply via email to