hello all,
this is my first attempt at fixing bugs #45966 and #45976, so i'm
posting the list first for comments; i'll attach it to the bug reports
once everyone is happy. i'm sure the code is not the best ever written
:-) but according to my tests it fixes the problems reported - which are
related to ignoring case on intervals. a couple of questions though:
- can you declare statics on a method? if yes, how? i would like to
remove the two member variables i've added since they are only used
within AddRange.
- does anyone know of a test suite of regexp tests done for another
language? any language would do as long as the tests are fairly
complete. i would like to update RegexTest.cs (hopefully that's our unit
test for this subsystem) but unfortunately my knowledge of regexps is
limited.
thanks for your time,
marco
--
serenese y apunte bien! va usted a matar un hombre! -- ultimas palavras
de che guevara
--- interval.cs.~1.1.~ 2002-01-31 08:00:16.000000000 +0000
+++ interval.cs 2004-01-10 19:58:53.000000000 +0000
@@ -95,6 +95,14 @@
return low <= i && i <= high;
}
+ public bool Intersects (Interval i) {
+ if (IsEmpty || i.IsEmpty)
+ return false;
+
+ return ((Contains (i.low) && !Contains (i.high)) ||
+ (Contains (i.high) && !Contains (i.low)));
+ }
+
public void Merge (Interval i) {
if (i.IsEmpty)
return;
--- syntax.cs.~1.1.~ 2002-01-31 08:00:16.000000000 +0000
+++ syntax.cs 2004-01-11 12:51:32.000000000 +0000
@@ -779,11 +779,39 @@
}
public void AddCharacter (char c) {
- intervals.Add (new Interval (c, c));
+ // TODO: this is certainly not the most efficient way of doing
things
+ // TODO: but at least it produces correct results.
+ AddRange (c, c);
}
public void AddRange (char lo, char hi) {
- intervals.Add (new Interval (lo, hi));
+ Interval new_interval = new Interval (lo, hi);
+
+ // ignore case is on. we must make sure our interval does not
+ // use upper case. if it does, we must normalize the upper case
+ // characters into lower case.
+ if (ignore) {
+ if (upper_case_characters.Intersects (new_interval)) {
+ Interval partial_new_interval;
+
+ if (new_interval.low <
upper_case_characters.low) {
+ partial_new_interval = new Interval
(upper_case_characters.low + distance_between_upper_and_lower_case,
+
new_interval.high +
distance_between_upper_and_lower_case);
+ new_interval.high =
upper_case_characters.low - 1;
+ }
+ else {
+ partial_new_interval = new Interval
(new_interval.low + distance_between_upper_and_lower_case,
+
upper_case_characters.high +
distance_between_upper_and_lower_case);
+ new_interval.low =
upper_case_characters.high + 1;
+ }
+ intervals.Add (partial_new_interval);
+ }
+ else if (upper_case_characters.Contains
(new_interval)) {
+ new_interval.high +=
distance_between_upper_and_lower_case;
+ new_interval.low +=
distance_between_upper_and_lower_case;
+ }
+ }
+ intervals.Add (new_interval);
}
public override void Compile (ICompiler cmp, bool reverse) {
@@ -871,6 +899,8 @@
return 3; //
Range
}
+ private static Interval upper_case_characters = new Interval
((char)65, (char)90);
+ private const int distance_between_upper_and_lower_case = 32;
private bool negate, ignore;
private bool[] pos_cats, neg_cats;
private IntervalCollection intervals;