Revision: 3689
Author: [email protected]
Date: Mon Jan 25 04:56:49 2010
Log: Fix bug in character-set merging. Add test case.
See Chromium bug 32637.

Review URL: http://codereview.chromium.org/553067
http://code.google.com/p/v8/source/detail?r=3689

Modified:
 /branches/bleeding_edge/src/jsregexp.cc
 /branches/bleeding_edge/test/cctest/test-regexp.cc

=======================================
--- /branches/bleeding_edge/src/jsregexp.cc     Thu Jan  7 11:01:23 2010
+++ /branches/bleeding_edge/src/jsregexp.cc     Mon Jan 25 04:56:49 2010
@@ -4462,10 +4462,13 @@
   while (i1 < n1 || i2 < n2) {
     CharacterRange next_range;
     int range_source;
-    if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) {
+    if (i2 == n2 ||
+ (i1 < n1 && first_set->at(i1).from() < second_set->at(i2).from())) {
+      // Next smallest element is in first set.
       next_range = first_set->at(i1++);
       range_source = kInsideFirst;
     } else {
+      // Next smallest element is in second set.
       next_range = second_set->at(i2++);
       range_source = kInsideSecond;
     }
=======================================
--- /branches/bleeding_edge/test/cctest/test-regexp.cc Thu Jan 7 22:40:09 2010 +++ /branches/bleeding_edge/test/cctest/test-regexp.cc Mon Jan 25 04:56:49 2010
@@ -1650,6 +1650,163 @@
   ASSERT_EQ(30, list->at(0).to());
 }

+// Checks whether a character is in the set represented by a list of ranges.
+static bool CharacterInSet(ZoneList<CharacterRange>* set, uc16 value) {
+  for (int i = 0; i < set->length(); i++) {
+    CharacterRange range = set->at(i);
+    if (range.from() <= value && value <= range.to()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+TEST(CharacterRangeMerge) {
+  ZoneScope zone_scope(DELETE_ON_EXIT);
+  ZoneList<CharacterRange> l1(4);
+  ZoneList<CharacterRange> l2(4);
+ // Create all combinations of intersections of ranges, both singletons and
+  // longer.
+
+  int offset = 0;
+
+  // The five kinds of singleton intersections:
+  //     X
+  //   Y      - outside before
+  //    Y     - outside touching start
+  //     Y    - overlap
+  //      Y   - outside touching end
+  //       Y  - outside after
+
+  for (int i = 0; i < 5; i++) {
+    l1.Add(CharacterRange::Singleton(offset + 2));
+    l2.Add(CharacterRange::Singleton(offset + i));
+    offset += 6;
+  }
+
+  // The seven kinds of singleton/non-singleton intersections:
+  //    XXX
+  //  Y        - outside before
+  //   Y       - outside touching start
+  //    Y      - inside touching start
+  //     Y     - entirely inside
+  //      Y    - inside touching end
+  //       Y   - outside touching end
+  //        Y  - disjoint after
+
+  for (int i = 0; i < 7; i++) {
+    l1.Add(CharacterRange::Range(offset + 2, offset + 4));
+    l2.Add(CharacterRange::Singleton(offset + i));
+    offset += 8;
+  }
+
+  // The eleven kinds of non-singleton intersections:
+  //
+  //       XXXXXXXX
+  // YYYY                  - outside before.
+  //   YYYY                - outside touching start.
+  //     YYYY              - overlapping start
+  //       YYYY            - inside touching start
+  //         YYYY          - entirely inside
+  //           YYYY        - inside touching end
+  //             YYYY      - overlapping end
+  //               YYYY    - outside touching end
+  //                 YYYY  - outside after
+  //       YYYYYYYY        - identical
+  //     YYYYYYYYYYYY      - containing entirely.
+
+  for (int i = 0; i < 9; i++) {
+    l1.Add(CharacterRange::Range(offset + 6, offset + 15));  // Length 8.
+    l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3));
+    offset += 22;
+  }
+  l1.Add(CharacterRange::Range(offset + 6, offset + 15));
+  l2.Add(CharacterRange::Range(offset + 6, offset + 15));
+  offset += 22;
+  l1.Add(CharacterRange::Range(offset + 6, offset + 15));
+  l2.Add(CharacterRange::Range(offset + 4, offset + 17));
+  offset += 22;
+
+  // Different kinds of multi-range overlap:
+  // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
+  //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
+
+  l1.Add(CharacterRange::Range(offset, offset + 21));
+  l1.Add(CharacterRange::Range(offset + 31, offset + 52));
+  for (int i = 0; i < 6; i++) {
+    l2.Add(CharacterRange::Range(offset + 2, offset + 5));
+    l2.Add(CharacterRange::Singleton(offset + 8));
+    offset += 9;
+  }
+
+  ASSERT(CharacterRange::IsCanonical(&l1));
+  ASSERT(CharacterRange::IsCanonical(&l2));
+
+  ZoneList<CharacterRange> first_only(4);
+  ZoneList<CharacterRange> second_only(4);
+  ZoneList<CharacterRange> both(4);
+
+  // Merge one direction.
+  CharacterRange::Merge(&l1, &l2, &first_only, &second_only, &both);
+
+  CHECK(CharacterRange::IsCanonical(&first_only));
+  CHECK(CharacterRange::IsCanonical(&second_only));
+  CHECK(CharacterRange::IsCanonical(&both));
+
+  for (uc16 i = 0; i < offset; i++) {
+    bool in_first = CharacterInSet(&l1, i);
+    bool in_second = CharacterInSet(&l2, i);
+    CHECK((in_first && !in_second) == CharacterInSet(&first_only, i));
+    CHECK((!in_first && in_second) == CharacterInSet(&second_only, i));
+    CHECK((in_first && in_second) == CharacterInSet(&both, i));
+  }
+
+  first_only.Clear();
+  second_only.Clear();
+  both.Clear();
+
+  // Merge other direction.
+  CharacterRange::Merge(&l2, &l1, &second_only, &first_only, &both);
+
+  CHECK(CharacterRange::IsCanonical(&first_only));
+  CHECK(CharacterRange::IsCanonical(&second_only));
+  CHECK(CharacterRange::IsCanonical(&both));
+
+  for (uc16 i = 0; i < offset; i++) {
+    bool in_first = CharacterInSet(&l1, i);
+    bool in_second = CharacterInSet(&l2, i);
+    CHECK((in_first && !in_second) == CharacterInSet(&first_only, i));
+    CHECK((!in_first && in_second) == CharacterInSet(&second_only, i));
+    CHECK((in_first && in_second) == CharacterInSet(&both, i));
+  }
+
+  first_only.Clear();
+  second_only.Clear();
+  both.Clear();
+
+  // Merge but don't record all combinations.
+  CharacterRange::Merge(&l1, &l2, NULL, NULL, &both);
+
+  CHECK(CharacterRange::IsCanonical(&both));
+
+  for (uc16 i = 0; i < offset; i++) {
+    bool in_first = CharacterInSet(&l1, i);
+    bool in_second = CharacterInSet(&l2, i);
+    CHECK((in_first && in_second) == CharacterInSet(&both, i));
+  }
+
+  // Merge into same set.
+  ZoneList<CharacterRange> all(4);
+  CharacterRange::Merge(&l1, &l2, &all, &all, &all);
+
+  CHECK(CharacterRange::IsCanonical(&all));
+
+  for (uc16 i = 0; i < offset; i++) {
+    bool in_first = CharacterInSet(&l1, i);
+    bool in_second = CharacterInSet(&l2, i);
+    CHECK((in_first || in_second) == CharacterInSet(&all, i));
+  }
+}


 TEST(Graph) {

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to