Author: lupus
Date: 2008-02-12 12:35:01 -0500 (Tue, 12 Feb 2008)
New Revision: 95534
Added:
trunk/mcs/class/System/System.Text.RegularExpressions/RxCompiler.cs
trunk/mcs/class/System/System.Text.RegularExpressions/RxInterpreter.cs
trunk/mcs/class/System/System.Text.RegularExpressions/RxOp.cs
Modified:
trunk/mcs/class/System/System.Text.RegularExpressions/ChangeLog
Log:
Tue Feb 12 19:16:49 CET 2008 Paolo Molaro <[EMAIL PROTECTED]>
* RxCompiler.cs, RxInterpreter.cs, RxOp.cs: experimental new
interpreter.
Modified: trunk/mcs/class/System/System.Text.RegularExpressions/ChangeLog
===================================================================
--- trunk/mcs/class/System/System.Text.RegularExpressions/ChangeLog
2008-02-12 17:25:33 UTC (rev 95533)
+++ trunk/mcs/class/System/System.Text.RegularExpressions/ChangeLog
2008-02-12 17:35:01 UTC (rev 95534)
@@ -1,3 +1,9 @@
+
+Tue Feb 12 19:16:49 CET 2008 Paolo Molaro <[EMAIL PROTECTED]>
+
+ * RxCompiler.cs, RxInterpreter.cs, RxOp.cs: experimental new
+ interpreter.
+
2008-02-10 Zoltan Varga <[EMAIL PROTECTED]>
* replace.cs (NeedsGroupsOrCaptures): New property. Determines whenever
the
Added: trunk/mcs/class/System/System.Text.RegularExpressions/RxCompiler.cs
===================================================================
--- trunk/mcs/class/System/System.Text.RegularExpressions/RxCompiler.cs
2008-02-12 17:25:33 UTC (rev 95533)
+++ trunk/mcs/class/System/System.Text.RegularExpressions/RxCompiler.cs
2008-02-12 17:35:01 UTC (rev 95534)
@@ -0,0 +1,461 @@
+using System;
+using System.Collections;
+
+namespace System.Text.RegularExpressions {
+
+ /* This behaves like a growing list of tuples (base, offsetpos) */
+ class RxLinkRef: LinkRef {
+ public int[] offsets;
+ public int current = 0;
+
+ public RxLinkRef ()
+ {
+ offsets = new int [8];
+ }
+
+ // the start of the branch instruction
+ // in the program stream
+ public void PushInstructionBase (int offset)
+ {
+ if ((current & 1) != 0)
+ throw new Exception ();
+ if (current == offsets.Length) {
+ int[] newarray = new int [offsets.Length * 2];
+ Buffer.BlockCopy (offsets, 0, newarray, 0,
offsets.Length);
+ offsets = newarray;
+ }
+ offsets [current++] = offset;
+ }
+
+ // the position in the program stream where the jump offset is
stored
+ public void PushOffsetPosition (int offset)
+ {
+ if ((current & 1) == 0)
+ throw new Exception ();
+ offsets [current++] = offset;
+ }
+
+ }
+
+ class RxCompiler : ICompiler {
+ byte[] program = new byte [32];
+ int curpos = 0;
+
+ public RxCompiler () {
+ }
+
+ void MakeRoom (int bytes)
+ {
+ while (curpos + bytes > program.Length) {
+ int newsize = program.Length * 2;
+ byte[] newp = new byte [newsize];
+ Buffer.BlockCopy (program, 0, newp, 0,
program.Length);
+ program = newp;
+ }
+ }
+
+ void Emit (byte val)
+ {
+ MakeRoom (1);
+ program [curpos] = val;
+ ++curpos;
+ }
+
+ void Emit (RxOp opcode)
+ {
+ Emit ((byte)opcode);
+ }
+
+ void Emit (ushort val)
+ {
+ MakeRoom (2);
+ program [curpos] = (byte)val;
+ program [curpos + 1] = (byte)(val >> 8);
+ curpos += 2;
+ }
+
+ void Emit (int val)
+ {
+ MakeRoom (4);
+ program [curpos] = (byte)val;
+ program [curpos + 1] = (byte)(val >> 8);
+ program [curpos + 2] = (byte)(val >> 16);
+ program [curpos + 3] = (byte)(val >> 24);
+ curpos += 4;
+ }
+
+ void BeginLink (LinkRef lref) {
+ RxLinkRef link = lref as RxLinkRef;
+ link.PushInstructionBase (curpos);
+ }
+
+ void EmitLink (LinkRef lref)
+ {
+ RxLinkRef link = lref as RxLinkRef;
+ link.PushOffsetPosition (curpos);
+ Emit ((ushort)0);
+ }
+
+ // ICompiler implementation
+ public void Reset ()
+ {
+ curpos = 0;
+ }
+
+ public IMachineFactory GetMachineFactory ()
+ {
+ byte[] code = new byte [curpos];
+ Buffer.BlockCopy (program, 0, code, 0, curpos);
+ //Console.WriteLine ("Program size: {0}", curpos);
+
+ return new RxInterpreterFactory (code);
+ }
+
+ public void EmitFalse ()
+ {
+ Emit (RxOp.False);
+ }
+
+ public void EmitTrue ()
+ {
+ Emit (RxOp.True);
+ }
+
+ public void EmitCharacter (char c, bool negate, bool ignore,
bool reverse)
+ {
+ int offset = 0;
+ if (negate)
+ offset += 1;
+ if (ignore) {
+ offset += 2;
+ c = Char.ToLower (c);
+ }
+ if (reverse)
+ offset += 4;
+ if (c < 256) {
+ Emit ((RxOp)((int)RxOp.Char + offset));
+ Emit ((byte)c);
+ } else {
+ Emit ((RxOp)((int)RxOp.UnicodeChar + offset));
+ Emit ((ushort)c);
+ }
+ }
+
+ public void EmitCategory (Category cat, bool negate, bool
reverse)
+ {
+ if (negate | reverse)
+ throw new NotSupportedException ();
+ switch (cat) {
+ case Category.Any:
+ Emit (RxOp.CategoryAny);
+ break;
+ default:
+ Console.WriteLine ("Missing cat: {0}", cat);
+ throw new NotSupportedException ();
+ }
+ }
+
+ public void EmitNotCategory (Category cat, bool negate, bool
reverse)
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitRange (char lo, char hi, bool negate, bool
ignore, bool reverse)
+ {
+ int offset = 0;
+ if (negate)
+ offset += 1;
+ if (ignore)
+ offset += 2;
+ if (reverse)
+ offset += 4;
+ if (lo < 256 && hi < 256) {
+ Emit ((RxOp)((int)RxOp.Range + offset));
+ Emit ((byte)lo);
+ Emit ((byte)hi);
+ } else {
+ Emit ((RxOp)((int)RxOp.UnicodeRange + offset));
+ Emit ((ushort)lo);
+ Emit ((ushort)hi);
+ }
+ }
+
+ public void EmitSet (char lo, BitArray set, bool negate, bool
ignore, bool reverse)
+ {
+ int offset = 0;
+ if (negate)
+ offset += 1;
+ if (ignore)
+ offset += 2;
+ if (reverse)
+ offset += 4;
+ int len = (set.Length + 0x7) >> 3;
+ if (lo < 256 && len < 256) {
+ Emit ((RxOp)((int)RxOp.Bitmap + offset));
+ Emit ((byte)lo);
+ Emit ((byte)len);
+ } else {
+ Emit ((RxOp)((int)RxOp.UnicodeBitmap + offset));
+ Emit ((ushort)lo);
+ Emit ((ushort)len);
+ }
+ // emit the bitmap bytes
+ int b = 0;
+ while (len-- != 0) {
+ int word = 0;
+ for (int i = 0; i < 8; ++ i) {
+ if (b >= set.Length)
+ break;
+ if (set [b ++])
+ word |= 1 << i;
+ }
+ Emit ((byte)word);
+ }
+ }
+
+ public void EmitString (string str, bool ignore, bool reverse)
+ {
+ bool islatin1 = false;
+ int i;
+ int offset = 0;
+ if (ignore)
+ offset += 1;
+ if (reverse)
+ offset += 2;
+ if (str.Length < 256) {
+ islatin1 = true;
+ for (i = 0; i < str.Length; ++i) {
+ if (str [i] >= 256) {
+ islatin1 = false;
+ break;
+ }
+ }
+ }
+ if (islatin1) {
+ Emit ((RxOp)((int)RxOp.String + offset));
+ Emit ((byte)str.Length);
+ for (i = 0; i < str.Length; ++i)
+ Emit ((byte)str [i]);
+ } else {
+ Emit ((RxOp)((int)RxOp.UnicodeString + offset));
+ if (str.Length > ushort.MaxValue)
+ throw new NotSupportedException ();
+ Emit ((ushort)str.Length);
+ for (i = 0; i < str.Length; ++i)
+ Emit ((ushort)str [i]);
+ }
+ }
+
+ public void EmitPosition (Position pos)
+ {
+ switch (pos) {
+ case Position.Any:
+ Emit (RxOp.AnyPosition);
+ break;
+ case Position.Start:
+ Emit (RxOp.StartOfString);
+ break;
+ case Position.StartOfString:
+ Emit (RxOp.StartOfString);
+ break;
+ case Position.StartOfLine:
+ Emit (RxOp.StartOfLine);
+ break;
+ case Position.StartOfScan:
+ Emit (RxOp.StartOfScan);
+ break;
+ case Position.End:
+ Emit (RxOp.End);
+ break;
+ case Position.EndOfString:
+ Emit (RxOp.EndOfString);
+ break;
+ case Position.EndOfLine:
+ Emit (RxOp.EndOfLine);
+ break;
+ case Position.Boundary:
+ Emit (RxOp.WordBoundary);
+ break;
+ case Position.NonBoundary:
+ Emit (RxOp.NoWordBoundary);
+ break;
+ default:
+ throw new NotSupportedException ();
+ }
+ }
+
+ public void EmitOpen (int gid)
+ {
+ if (gid > ushort.MaxValue)
+ throw new NotSupportedException ();
+ Emit (RxOp.OpenGroup);
+ Emit ((ushort)gid);
+ }
+
+ public void EmitClose (int gid)
+ {
+ if (gid > ushort.MaxValue)
+ throw new NotSupportedException ();
+ Emit (RxOp.CloseGroup);
+ Emit ((ushort)gid);
+ }
+
+ public void EmitBalanceStart(int gid, int balance, bool
capture, LinkRef tail)
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitBalance ()
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitReference (int gid, bool ignore, bool reverse)
+ {
+ int offset = 0;
+ if (ignore)
+ offset += 1;
+ if (reverse)
+ offset += 2;
+ Emit ((RxOp)((int)RxOp.Reference + offset));
+ Emit ((ushort)gid);
+ }
+
+ public void EmitIfDefined (int gid, LinkRef tail)
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitSub (LinkRef tail)
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitTest (LinkRef yes, LinkRef tail)
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitBranch (LinkRef next)
+ {
+ BeginLink (next);
+ Emit (RxOp.Branch);
+ EmitLink (next);
+ }
+
+ public void EmitJump (LinkRef target)
+ {
+ BeginLink (target);
+ Emit (RxOp.Jump);
+ EmitLink (target);
+ }
+
+ public void EmitRepeat (int min, int max, bool lazy, LinkRef
until)
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitUntil (LinkRef repeat)
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitIn (LinkRef tail)
+ {
+ throw new NotSupportedException ();
+ }
+
+ public void EmitInfo (int count, int min, int max)
+ {
+ Emit (RxOp.Info);
+ if (count > ushort.MaxValue)
+ throw new NotSupportedException ();
+ Emit ((ushort)count);
+ Emit (min);
+ Emit (max);
+ }
+
+ public void EmitFastRepeat (int min, int max, bool lazy,
LinkRef tail)
+ {
+ BeginLink (tail);
+ if (lazy)
+ Emit (RxOp.RepeatLazy);
+ else
+ Emit (RxOp.Repeat);
+ EmitLink (tail);
+ Emit (min);
+ Emit (max);
+ }
+
+ public void EmitAnchor (bool reverse, int offset, LinkRef tail)
+ {
+ BeginLink (tail);
+ if (reverse)
+ Emit (RxOp.AnchorReverse);
+ else
+ Emit (RxOp.Anchor);
+ EmitLink (tail);
+ if (offset > ushort.MaxValue)
+ throw new NotSupportedException ();
+ Emit ((ushort)offset);
+ }
+
+ // event for the CILCompiler
+ public void EmitBranchEnd ()
+ {
+ }
+
+ public void EmitAlternationEnd ()
+ {
+ }
+
+ public LinkRef NewLink ()
+ {
+ return new RxLinkRef ();
+ }
+
+ public void ResolveLink (LinkRef link)
+ {
+ RxLinkRef l = link as RxLinkRef;
+ for (int i = 0; i < l.current; i += 2) {
+ int offset = curpos - l.offsets [i];
+ if (offset > ushort.MaxValue)
+ throw new NotSupportedException ();
+ int offsetpos = l.offsets [i + 1];
+ program [offsetpos] = (byte)offset;
+ program [offsetpos + 1] = (byte)(offset >> 8);
+ }
+ }
+
+ }
+
+ class RxInterpreterFactory : IMachineFactory {
+ public RxInterpreterFactory (byte[] program) {
+ this.program = program;
+ }
+
+ public IMachine NewInstance () {
+ return new RxInterpreter (program);
+ }
+
+ public int GroupCount {
+ get { return program [1] | (program [2] << 8); }
+ }
+
+ public IDictionary Mapping {
+ get { return mapping; }
+ set { mapping = value; }
+ }
+
+ public string [] NamesMapping {
+ get { return namesMapping; }
+ set { namesMapping = value; }
+ }
+
+ private IDictionary mapping;
+ private byte[] program;
+ private string[] namesMapping;
+ }
+
+}
+
Added: trunk/mcs/class/System/System.Text.RegularExpressions/RxInterpreter.cs
===================================================================
--- trunk/mcs/class/System/System.Text.RegularExpressions/RxInterpreter.cs
2008-02-12 17:25:33 UTC (rev 95533)
+++ trunk/mcs/class/System/System.Text.RegularExpressions/RxInterpreter.cs
2008-02-12 17:35:01 UTC (rev 95534)
@@ -0,0 +1,308 @@
+
+using System;
+using System.Collections;
+
+namespace System.Text.RegularExpressions {
+
+ class RxInterpreter: BaseMachine {
+ byte[] program;
+ string str;
+ int string_start;
+ int string_end;
+ int group_count;
+ int match_start;
+ int[] groups;
+
+ static int ReadInt (byte[] code, int pc)
+ {
+ int val = code [pc];
+ val |= code [pc + 1] << 8;
+ val |= code [pc + 2] << 16;
+ val |= code [pc + 3] << 24;
+ return val;
+ }
+
+ public RxInterpreter (byte[] program)
+ {
+ this.program = program;
+ group_count = 1 + (program [1] | (program [2] << 8));
+ groups = new int [group_count];
+ }
+
+ public override Match Scan (Regex regex, string text, int
start, int end) {
+ str = text;
+ string_start = start;
+ string_end = end;
+ int res = 0;
+ if (EvalByteCode (11, start, ref res)) {
+ Match m = new Match (regex, this, text, end, 0,
match_start, res - match_start);
+ return m;
+ }
+ return Match.Empty;
+ }
+
+ bool EvalByteCode (int pc, int strpos, ref int strpos_result)
+ {
+ int length, start, end;
+ while (true) {
+ //Console.WriteLine ("evaluating: {0} at pc:
{1}, strpos: {2}", (RxOp)program [pc], pc, strpos);
+ switch ((RxOp)program [pc]) {
+ case RxOp.True:
+ strpos_result = strpos;
+ return true;
+ case RxOp.False:
+ return false;
+ case RxOp.AnyPosition:
+ pc++;
+ continue;
+ case RxOp.StartOfString:
+ if (strpos != 0)
+ return false;
+ pc++;
+ continue;
+ case RxOp.StartOfLine:
+ if (strpos == 0 || str [strpos - 1] ==
'\n') {
+ pc++;
+ continue;
+ }
+ return false;
+ case RxOp.StartOfScan:
+ if (strpos != string_start)
+ return false;
+ pc++;
+ continue;
+ case RxOp.End:
+ if (strpos == string_end || (strpos ==
string_end - 1 && str [strpos] == '\n')) {
+ pc++;
+ continue;
+ }
+ return false;
+ case RxOp.EndOfString:
+ if (strpos != string_end)
+ return false;
+ pc++;
+ continue;
+ case RxOp.EndOfLine:
+ if (strpos == string_end || str
[strpos] == '\n') {
+ pc++;
+ continue;
+ }
+ return false;
+ case RxOp.Anchor:
+ // FIXME: test anchor
+ length = program [pc + 3] | (program
[pc + 4] << 8);
+ pc += program [pc + 1] | (program [pc +
2] << 8);
+ while (strpos < string_end) {
+ int res = strpos;
+ if (EvalByteCode (pc, strpos,
ref res)) {
+ match_start = strpos;
+ strpos_result = res;
+ return true;
+ }
+ strpos++;
+ }
+ return false;
+ case RxOp.Jump:
+ pc += program [pc + 1] | (program [pc +
2] << 8);
+ continue;
+ case RxOp.String:
+ start = pc + 2;
+ length = program [pc + 1];
+ if (strpos + length > string_end)
+ return false;
+ end = start + length;
+ for (; start < end; ++start) {
+ if (str [strpos] != program
[start])
+ return false;
+ strpos++;
+ }
+ pc = end;
+ continue;
+ case RxOp.StringIgnoreCase:
+ start = pc + 2;
+ length = program [pc + 1];
+ if (strpos + length > string_end)
+ return false;
+ end = start + length;
+ for (; start < end; ++start) {
+ if (str [strpos] != program
[start] && Char.ToLower (str [strpos]) != program [start])
+ return false;
+ strpos++;
+ }
+ pc = end;
+ continue;
+ case RxOp.UnicodeString:
+ start = pc + 3;
+ length = program [pc + 1] | (program
[pc + 2] << 8);
+ if (strpos + length > string_end)
+ return false;
+ end = start + length * 2;
+ for (; start < end; start += 2) {
+ int c = program [start] |
(program [start + 1] << 8);
+ if (str [strpos] != c)
+ return false;
+ strpos++;
+ }
+ pc = end;
+ continue;
+ case RxOp.UnicodeStringIgnoreCase:
+ start = pc + 3;
+ length = program [pc + 1] | (program
[pc + 2] << 8);
+ if (strpos + length > string_end)
+ return false;
+ end = start + length * 2;
+ for (; start < end; start += 2) {
+ int c = program [start] |
(program [start + 1] << 8);
+ if (str [strpos] != c &&
Char.ToLower (str [strpos]) != c)
+ return false;
+ strpos++;
+ }
+ pc = end;
+ continue;
+ case RxOp.Char:
+ if (strpos < string_end && (str
[strpos] == program [pc + 1])) {
+ strpos++;
+ pc += 2;
+ continue;
+ }
+ return false;
+ case RxOp.NoChar:
+ if (strpos < string_end && (str
[strpos] != program [pc + 1])) {
+ strpos++;
+ pc += 2;
+ continue;
+ }
+ return false;
+ case RxOp.CharIgnoreCase:
+ if (strpos < string_end &&
(Char.ToLower (str [strpos]) == program [pc + 1])) {
+ strpos++;
+ pc += 2;
+ continue;
+ }
+ return false;
+ case RxOp.NoCharIgnoreCase:
+ if (strpos < string_end &&
(Char.ToLower (str [strpos]) != program [pc + 1])) {
+ strpos++;
+ pc += 2;
+ continue;
+ }
+ return false;
+ case RxOp.Range:
+ if (strpos < string_end) {
+ int c = str [strpos];
+ if (c >= program [pc + 1] && c
<= program [pc + 2]) {
+ strpos++;
+ pc += 3;
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoRange:
+ if (strpos < string_end) {
+ int c = str [strpos];
+ if (c >= program [pc + 1] && c
<= program [pc + 2])
+ return false;
+ strpos++;
+ pc += 3;
+ continue;
+ }
+ return false;
+ case RxOp.RangeIgnoreCase:
+ if (strpos < string_end) {
+ int c = Char.ToLower (str
[strpos]);
+ if (c >= program [pc + 1] && c
<= program [pc + 2]) {
+ strpos++;
+ pc += 3;
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoRangeIgnoreCase:
+ if (strpos < string_end) {
+ int c = Char.ToLower (str
[strpos]);
+ if (c >= program [pc + 1] && c
<= program [pc + 2])
+ return false;
+ strpos++;
+ pc += 3;
+ continue;
+ }
+ return false;
+ case RxOp.Bitmap:
+ if (strpos < string_end) {
+ int c = str [strpos];
+ c -= program [pc + 1];
+ length = program [pc + 2];
+ if (c < 0 || c >= (length << 3))
+ return false;
+ pc += 3;
+ if ((program [pc + (c >> 3)] &
(1 << (c & 0x7))) != 0) {
+ strpos++;
+ pc += length;
+ continue;
+ }
+ }
+ return false;
+ case RxOp.BitmapIgnoreCase:
+ if (strpos < string_end) {
+ int c = Char.ToLower (str
[strpos]);
+ c -= program [pc + 1];
+ length = program [pc + 2];
+ if (c < 0 || c >= (length << 3))
+ return false;
+ pc += 3;
+ if ((program [pc + (c >> 3)] &
(1 << (c & 0x7))) != 0) {
+ strpos++;
+ pc += length;
+ continue;
+ }
+ }
+ return false;
+ case RxOp.CategoryAny:
+ if (strpos < string_end && str [strpos]
!= '\n') {
+ strpos++;
+ pc++;
+ continue;
+ }
+ return false;
+ case RxOp.Branch: {
+ int res = 0;
+ if (EvalByteCode (pc + 3, strpos, ref
res)) {
+ strpos_result = res;
+ return true;
+ }
+ //Console.WriteLine ("branch offset:
{0}", program [pc + 1] | (program [pc + 2] << 8));
+ pc += program [pc + 1] | (program [pc +
2] << 8);
+ continue;
+ }
+ case RxOp.Repeat:
+ case RxOp.RepeatLazy: {
+ int res = 0;
+ start = ReadInt (program, pc + 3);
+ end = ReadInt (program, pc + 7);
+ //Console.WriteLine ("min: {0}, max:
{1}", start, end);
+ length = 0;
+ while (length < end) {
+ if (!EvalByteCode (pc + 11,
strpos, ref res)) {
+ if (length >= start) {
+ goto
repeat_success;
+ }
+ return false;
+ }
+ strpos = res;
+ length++;
+ }
+ if (length != end)
+ return false;
+ repeat_success:
+ pc += program [pc + 1] | (program [pc +
2] << 8);
+ continue;
+ }
+ default:
+ Console.WriteLine ("evaluating: {0} at
pc: {1}, strpos: {2}", (RxOp)program [pc], pc, strpos);
+ throw new NotSupportedException ();
+ }
+ }
+ }
+ }
+}
+
Added: trunk/mcs/class/System/System.Text.RegularExpressions/RxOp.cs
===================================================================
--- trunk/mcs/class/System/System.Text.RegularExpressions/RxOp.cs
2008-02-12 17:25:33 UTC (rev 95533)
+++ trunk/mcs/class/System/System.Text.RegularExpressions/RxOp.cs
2008-02-12 17:35:01 UTC (rev 95534)
@@ -0,0 +1,154 @@
+
+namespace System.Text.RegularExpressions {
+
+ // for the IgnoreCase opcodes, the char data is stored lowercased
+ // two-byte integers are in little endian format
+ enum RxOp : byte {
+ // followed by count, min, max integers
+ Info,
+
+ False,
+ True,
+
+ // position anchors
+ AnyPosition,
+ StartOfString,
+ StartOfLine,
+ StartOfScan,
+ EndOfString,
+ EndOfLine,
+ End,
+ WordBoundary,
+ NoWordBoundary,
+
+ // latin1 strings
+ // followed by single byte length and latin1 bytes
+ // keep the order, see EmitString ()
+ String,
+ StringIgnoreCase,
+ StringReverse,
+ StringIgnoreCaseReverse,
+
+ // followed by two byte length and unicode chars (two bytes per
char)
+ // a better setup may be to have all the unicode chars in a
separate
+ // char array and reference them from here with (offset,
length) pairs
+ // keep the order, see EmitString ()
+ UnicodeString,
+ UnicodeStringIgnoreCase,
+ UnicodeStringReverse,
+ UnicodeStringIgnoreCaseReverse,
+
+ // latin1 single char
+ // followed by a latin1 byte
+ // keep the order, see EmitCharacter ()
+ Char,
+ NoChar,
+ CharIgnoreCase,
+ NoCharIgnoreCase,
+ CharReverse,
+ NoCharReverse,
+ CharIgnoreCaseReverse,
+ NoCharIgnoreCaseReverse,
+
+ // followed by latin1 min and max bytes
+ // keep the order, see EmitRange ()
+ Range,
+ NoRange,
+ RangeIgnoreCase,
+ NoRangeIgnoreCase,
+ RangeReverse,
+ NoRangeReverse,
+ RangeIgnoreCaseReverse,
+ NoRangeIgnoreCaseReverse,
+
+ // followed by lowbyte and length of the bitmap and by the
bitmap
+ // keep the order, see EmitSet ()
+ Bitmap,
+ NoBitmap,
+ BitmapIgnoreCase,
+ NoBitmapIgnoreCase,
+ BitmapReverse,
+ NoBitmapReverse,
+ BitmapIgnoreCaseReverse,
+ NoBitmapIgnoreCaseReverse,
+
+ // unicode chars
+ // followed by a unicode char
+ // keep the order, see EmitCharacter ()
+ UnicodeChar,
+ NoUnicodeChar,
+ UnicodeCharIgnoreCase,
+ NoUnicodeCharIgnoreCase,
+ UnicodeCharReverse,
+ NoUnicodeCharReverse,
+ UnicodeCharIgnoreCaseReverse,
+ NoUnicodeCharIgnoreCaseReverse,
+
+ // followed by unicode char min and max chars
+ // keep the order, see EmitRange ()
+ UnicodeRange,
+ UnicodeNoRange,
+ UnicodeRangeIgnoreCase,
+ UnicodeNoRangeIgnoreCase,
+ UnicodeRangeReverse,
+ UnicodeNoRangeReverse,
+ UnicodeRangeIgnoreCaseReverse,
+ UnicodeNoRangeIgnoreCaseReverse,
+
+ // followed by lowchar and length of the bitmap and by the
bitmap
+ UnicodeBitmap,
+ UnicodeNoBitmap,
+ UnicodeBitmapIgnoreCase,
+ UnicodeNoBitmapIgnoreCase,
+ UnicodeBitmapReverse,
+ UnicodeNoBitmapReverse,
+ UnicodeBitmapIgnoreCaseReverse,
+ UnicodeNoBitmapIgnoreCaseReverse,
+
+ // add reverse and negate versions of the categories
+ CategoryAny,
+ CategoryDigit,
+ CategoryWord,
+ CategoryWhiteSpace,
+ CategoryEcmaWord,
+ CategoryEcmaWhiteSpace,
+
+ // followed by a unicode category value (byte)
+ CategoryUnicode,
+ // add more categories
+
+ // backreferences
+ // followed by two-byte reference number
+ // keep the order, see EmitReference ()
+ Reference,
+ ReversenceIgnoreCase,
+ ReferenceReverse,
+ ReversenceIgnoreCaseReverse,
+
+ // group/capture support
+ // followed by two-byte group id
+ OpenGroup,
+ CloseGroup,
+
+ // skip ahead num bytes
+ // followed by two-byte offset
+ Jump,
+
+ // followed by two-byte offset
+ Branch,
+
+ // anchoring expression
+ // followed by offset of tail and offset
+ Anchor,
+ AnchorReverse,
+
+ // repetition support
+ // followed by min, max ints
+ Repeat,
+ RepeatLazy,
+ // followed by min byte
+ RepeatInfinite,
+ RepeatInfiniteLazy,
+ }
+}
+
_______________________________________________
Mono-patches maillist - [email protected]
http://lists.ximian.com/mailman/listinfo/mono-patches