Author: Armin Rigo <[email protected]>
Branch: unicode-utf8-re
Changeset: r93233:3c9e5313506f
Date: 2017-12-01 18:19 +0100
http://bitbucket.org/pypy/pypy/changeset/3c9e5313506f/

Log:    Start of step 1

        Making sure that we don't use integer arithmetic uncheckedly on
        string positions, because they are going to be byte positions with
        utf8. In-progress.

diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py
--- a/rpython/rlib/debug.py
+++ b/rpython/rlib/debug.py
@@ -316,14 +316,21 @@
 class ExpectedRegularInt(Exception):
     pass
 
+class NegativeArgumentNotAllowed(Exception):
+    pass
+
 def check_nonneg(x):
     """Give a translation-time error if 'x' is not known to be non-negative.
     To help debugging, this also gives a translation-time error if 'x' is
     actually typed as an r_uint (in which case the call to check_nonneg()
     is a bit strange and probably unexpected).
     """
-    assert type(x)(-1) < 0     # otherwise, 'x' is a r_uint or similar
-    assert x >= 0
+    try:
+        assert type(x)(-1) < 0     # otherwise, 'x' is a r_uint or similar
+    except NegativeArgumentNotAllowed:
+        pass
+    else:
+        assert x >= 0
     return x
 
 class Entry(ExtRegistryEntry):
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -83,6 +83,9 @@
     def __init__(self, msg):
         self.msg = msg
 
+class EndOfString(Exception):
+    pass
+
 class AbstractMatchContext(object):
     """Abstract base class"""
     _immutable_fields_ = ['pattern[*]', 'flags', 'end']
@@ -135,8 +138,12 @@
         """Similar to str()."""
         raise NotImplementedError
 
+    def debug_check_pos(self, pos):
+        pass
+
     def get_mark(self, gid):
-        return find_mark(self.match_marks, gid)
+        mark = find_mark(self.match_marks, gid)
+        return self.slowly_convert_byte_pos_to_index(mark)
 
     def flatten_marks(self):
         # for testing
@@ -317,7 +324,7 @@
             ctx.jitdriver_RepeatOne.jit_merge_point(
                 self=self, ptr=ptr, ctx=ctx, nextppos=nextppos)
             result = sre_match(ctx, nextppos, ptr, self.start_marks)
-            ptr -= 1
+            ptr = ctx.prev_or_minus1(ptr)
             if result is not None:
                 self.subresult = result
                 self.start_ptr = ptr
@@ -331,28 +338,31 @@
                       reds=['ptr', 'self', 'ctx'],
                       debugprint=(2, 0))   # indices in 'greens'
 
-    def __init__(self, nextppos, ppos3, maxptr, ptr, marks):
+    def __init__(self, nextppos, ppos3, max_count, ptr, marks):
         self.nextppos = nextppos
         self.ppos3 = ppos3
-        self.maxptr = maxptr
+        self.max_count = max_count
         self.start_ptr = ptr
         self.start_marks = marks
 
     def find_first_result(self, ctx):
         ptr = self.start_ptr
         nextppos = self.nextppos
+        max_count = self.max_count
         ppos3 = self.ppos3
-        while ptr <= self.maxptr:
+        while max_count >= 0:
             ctx.jitdriver_MinRepeatOne.jit_merge_point(
                 self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3)
             result = sre_match(ctx, nextppos, ptr, self.start_marks)
             if result is not None:
                 self.subresult = result
                 self.start_ptr = ptr
+                self.max_count = max_count
                 return self
             if not self.next_char_ok(ctx, ptr, ppos3):
                 break
-            ptr += 1
+            ptr = ctx.next(ptr)
+            max_count -= 1
 
     def find_next_result(self, ctx):
         ptr = self.start_ptr
@@ -520,6 +530,7 @@
     need all results; in that case we use the method move_to_next_result()
     of the MatchResult."""
     while True:
+        ctx.debug_check_pos(ptr)
         op = ctx.pat(ppos)
         ppos += 1
 
@@ -551,22 +562,25 @@
             # <ANY>
             if ptr >= ctx.end or rsre_char.is_linebreak(ctx.str(ptr)):
                 return
-            ptr += 1
+            ptr = ctx.next(ptr)
 
         elif op == OPCODE_ANY_ALL:
             # match anything
             # <ANY_ALL>
             if ptr >= ctx.end:
                 return
-            ptr += 1
+            ptr = ctx.next(ptr)
 
         elif op == OPCODE_ASSERT:
             # assert subpattern
             # <ASSERT> <0=skip> <1=back> <pattern>
-            ptr1 = ptr - ctx.pat(ppos+1)
+            try:
+                ptr1 = ctx.prev_n(ptr, ctx.pat(ppos+1), ctx.ZERO)
+            except EndOfString:
+                return
             saved = ctx.fullmatch_only
             ctx.fullmatch_only = False
-            stop = ptr1 < 0 or sre_match(ctx, ppos + 2, ptr1, marks) is None
+            stop = sre_match(ctx, ppos + 2, ptr1, marks) is None
             ctx.fullmatch_only = saved
             if stop:
                 return
@@ -576,14 +590,17 @@
         elif op == OPCODE_ASSERT_NOT:
             # assert not subpattern
             # <ASSERT_NOT> <0=skip> <1=back> <pattern>
-            ptr1 = ptr - ctx.pat(ppos+1)
-            saved = ctx.fullmatch_only
-            ctx.fullmatch_only = False
-            stop = (ptr1 >= 0 and sre_match(ctx, ppos + 2, ptr1, marks)
-                                      is not None)
-            ctx.fullmatch_only = saved
-            if stop:
-                return
+            try:
+                ptr1 = ctx.prev_n(ptr, ctx.pat(ppos+1), ctx.ZERO)
+            except EndOfString:
+                pass
+            else:
+                saved = ctx.fullmatch_only
+                ctx.fullmatch_only = False
+                stop = sre_match(ctx, ppos + 2, ptr1, marks) is not None
+                ctx.fullmatch_only = saved
+                if stop:
+                    return
             ppos += ctx.pat(ppos)
 
         elif op == OPCODE_AT:
@@ -661,7 +678,7 @@
         elif op == OPCODE_INFO:
             # optimization info block
             # <INFO> <0=skip> <1=flags> <2=min> ...
-            if (ctx.end - ptr) < ctx.pat(ppos+2):
+            if ctx.maximum_distance(ptr, ctx.end) < ctx.pat(ppos+2):
                 return
             ppos += ctx.pat(ppos)
 
@@ -674,7 +691,7 @@
             if ptr >= ctx.end or ctx.str(ptr) != ctx.pat(ppos):
                 return
             ppos += 1
-            ptr += 1
+            ptr = ctx.next(ptr)
 
         elif op == OPCODE_LITERAL_IGNORE:
             # match literal string, ignoring case
@@ -743,8 +760,9 @@
             # use the MAX_REPEAT operator.
             # <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
             start = ptr
-            minptr = start + ctx.pat(ppos+1)
-            if minptr > ctx.end:
+            try:
+                minptr = ctx.next_n(start, ctx.pat(ppos+1), ctx.end)
+            except EndOfString:
                 return    # cannot match
             ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2),
                                       marks)
@@ -765,7 +783,7 @@
             start = ptr
             min = ctx.pat(ppos+1)
             if min > 0:
-                minptr = ptr + min
+                min_count = ptr + min
                 if minptr > ctx.end:
                     return   # cannot match
                 # count using pattern min as the maximum
@@ -773,14 +791,12 @@
                 if ptr < minptr:
                     return   # did not match minimum number of times
 
-            maxptr = ctx.end
+            max_count = sys.maxint
             max = ctx.pat(ppos+2)
             if max != rsre_char.MAXREPEAT:
-                maxptr1 = start + max
-                if maxptr1 <= maxptr:
-                    maxptr = maxptr1
+                max_count = max
             nextppos = ppos + ctx.pat(ppos)
-            result = MinRepeatOneMatchResult(nextppos, ppos+3, maxptr,
+            result = MinRepeatOneMatchResult(nextppos, ppos+3, max_count,
                                              ptr, marks)
             return result.find_first_result(ctx)
 
@@ -818,7 +834,7 @@
 @specializectx
 def find_repetition_end(ctx, ppos, ptr, maxcount, marks):
     end = ctx.end
-    ptrp1 = ptr + 1
+    ptrp1 = ctx.next(ptr)
     # First get rid of the cases where we don't have room for any match.
     if maxcount <= 0 or ptrp1 > end:
         return ptr
@@ -904,7 +920,7 @@
                 ctx.jitdriver_MatchIn.jit_merge_point(ctx=ctx, ptr=ptr,
                                                       end=end, ppos=ppos)
                 if ptr < end and checkerfn(ctx, ptr, ppos):
-                    ptr += 1
+                    ptr = ctx.next(ptr)
                 else:
                     return ptr
     elif checkerfn == match_IN_IGNORE:
@@ -927,7 +943,7 @@
         @specializectx
         def fre(ctx, ptr, end, ppos):
             while ptr < end and checkerfn(ctx, ptr, ppos):
-                ptr += 1
+                ptr = ctx.next(ptr)
             return ptr
     fre = func_with_new_name(fre, 'fre_' + checkerfn.__name__)
     return fre
diff --git a/rpython/rlib/rsre/test/support.py 
b/rpython/rlib/rsre/test/support.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rsre/test/support.py
@@ -0,0 +1,102 @@
+import sys, random
+from rpython.rlib import debug
+from rpython.rlib.rsre.rsre_core import _adjust, match_context
+from rpython.rlib.rsre.rsre_core import StrMatchContext, EndOfString
+
+
+class Position(object):
+    def __init__(self, p):
+        assert isinstance(p, int)
+        if p < 0:
+            raise debug.NegativeArgumentNotAllowed(
+                "making a Position with byte index %r" % p)
+        self._p = p
+    def __repr__(self):
+        return '<Position %d>' % (self._p)
+    def __cmp__(self, other):
+        if not isinstance(other, (Position, MinusOnePosition)):
+            raise TypeError("cannot compare %r with %r" % (self, other))
+        return cmp(self._p, other._p)
+
+class MinusOnePosition(object):
+    _p = -1
+    def __repr__(self):
+        return '<MinusOnePosition>'
+    def __cmp__(self, other):
+        if not isinstance(other, (Position, MinusOnePosition)):
+            raise TypeError("cannot compare %r with %r" % (self, other))
+        return cmp(self._p, other._p)
+
+
+class MatchContextForTests(StrMatchContext):
+    """Concrete subclass for matching in a plain string, tweaked for tests"""
+
+    ZERO = Position(0)
+    MINUS1 = MinusOnePosition()
+    EXACT_DISTANCE = False
+
+    def next(self, position):
+        assert isinstance(position, Position)
+        return Position(position._p + 1)
+
+    def prev_or_minus1(self, position):
+        assert isinstance(position, Position)
+        if position._p == 0:
+            return self.MINUS1
+        return Position(position._p - 1)
+
+    def next_n(self, position, n, end_position):
+        assert isinstance(position, Position)
+        assert isinstance(end_position, Position)
+        assert position._p <= end_position._p
+        r = position._p + n
+        if r > end_position._p:
+            raise EndOfString
+        return Position(r)
+
+    def prev_n(self, position, n, start_position):
+        assert isinstance(position, Position)
+        assert isinstance(start_position, Position)
+        assert position._p >= start_position._p
+        r = position._p - n
+        if r < start_position._p:
+            raise EndOfString
+        return Position(r)
+
+    def slowly_convert_byte_pos_to_index(self, position):
+        assert isinstance(position, Position)
+        return position._p
+
+    def str(self, position):
+        assert isinstance(position, Position)
+        return ord(self._string[position._p])
+
+    def debug_check_pos(self, position):
+        assert isinstance(position, Position)
+
+    #def minimum_distance(self, position_low, position_high):
+    #    """Return an estimate.  The real value may be higher."""
+    #    assert isinstance(position_low, Position)
+    #    assert isinstance(position_high, Position)
+    #    dist = position_high._p - position_low._p
+    #    if dist == 0:
+    #        return 0
+    #    return random.randrange(1, dist + 1)
+
+    def maximum_distance(self, position_low, position_high):
+        """Return an estimate.  The real value may be lower."""
+        assert isinstance(position_low, Position)
+        assert isinstance(position_high, Position)
+        return position_high._p - position_low._p + random.randrange(0, 10)
+
+
+def match(pattern, string, start=0, end=sys.maxint, flags=0, fullmatch=False):
+    start, end = _adjust(start, end, len(string))
+    start = Position(start)
+    end = Position(end)
+    ctx = MatchContextForTests(pattern, string, start, end, flags)
+    ctx.fullmatch_only = fullmatch
+    if match_context(ctx):
+        return ctx
+    else:
+        return None
diff --git a/rpython/rlib/rsre/test/test_match.py 
b/rpython/rlib/rsre/test/test_match.py
--- a/rpython/rlib/rsre/test/test_match.py
+++ b/rpython/rlib/rsre/test/test_match.py
@@ -1,6 +1,7 @@
 import re, random, py
-from rpython.rlib.rsre import rsre_core, rsre_char
+from rpython.rlib.rsre import rsre_char
 from rpython.rlib.rsre.rpy import get_code, VERSION
+from rpython.rlib.rsre.test.support import match
 
 
 def get_code_and_re(regexp):
@@ -16,61 +17,61 @@
 
     def test_or(self):
         r = get_code(r"a|bc|def")
-        assert rsre_core.match(r, "a")
-        assert rsre_core.match(r, "bc")
-        assert rsre_core.match(r, "def")
-        assert not rsre_core.match(r, "ghij")
+        assert match(r, "a")
+        assert match(r, "bc")
+        assert match(r, "def")
+        assert not match(r, "ghij")
 
     def test_any(self):
         r = get_code(r"ab.cd")
-        assert rsre_core.match(r, "abXcdef")
-        assert not rsre_core.match(r, "ab\ncdef")
-        assert not rsre_core.match(r, "abXcDef")
+        assert match(r, "abXcdef")
+        assert not match(r, "ab\ncdef")
+        assert not match(r, "abXcDef")
 
     def test_any_repetition(self):
         r = get_code(r"ab.*cd")
-        assert rsre_core.match(r, "abXXXXcdef")
-        assert rsre_core.match(r, "abcdef")
-        assert not rsre_core.match(r, "abX\nXcdef")
-        assert not rsre_core.match(r, "abXXXXcDef")
+        assert match(r, "abXXXXcdef")
+        assert match(r, "abcdef")
+        assert not match(r, "abX\nXcdef")
+        assert not match(r, "abXXXXcDef")
 
     def test_any_all(self):
         r = get_code(r"(?s)ab.cd")
-        assert rsre_core.match(r, "abXcdef")
-        assert rsre_core.match(r, "ab\ncdef")
-        assert not rsre_core.match(r, "ab\ncDef")
+        assert match(r, "abXcdef")
+        assert match(r, "ab\ncdef")
+        assert not match(r, "ab\ncDef")
 
     def test_any_all_repetition(self):
         r = get_code(r"(?s)ab.*cd")
-        assert rsre_core.match(r, "abXXXXcdef")
-        assert rsre_core.match(r, "abcdef")
-        assert rsre_core.match(r, "abX\nXcdef")
-        assert not rsre_core.match(r, "abX\nXcDef")
+        assert match(r, "abXXXXcdef")
+        assert match(r, "abcdef")
+        assert match(r, "abX\nXcdef")
+        assert not match(r, "abX\nXcDef")
 
     def test_assert(self):
         r = get_code(r"abc(?=def)(.)")
-        res = rsre_core.match(r, "abcdefghi")
+        res = match(r, "abcdefghi")
         assert res is not None and res.get_mark(1) == 4
-        assert not rsre_core.match(r, "abcdeFghi")
+        assert not match(r, "abcdeFghi")
 
     def test_assert_not(self):
         r = get_code(r"abc(?!def)(.)")
-        res = rsre_core.match(r, "abcdeFghi")
+        res = match(r, "abcdeFghi")
         assert res is not None and res.get_mark(1) == 4
-        assert not rsre_core.match(r, "abcdefghi")
+        assert not match(r, "abcdefghi")
 
     def test_lookbehind(self):
         r = get_code(r"([a-z]*)(?<=de)")
-        assert rsre_core.match(r, "ade")
-        res = rsre_core.match(r, "adefg")
+        assert match(r, "ade")
+        res = match(r, "adefg")
         assert res is not None and res.get_mark(1) == 3
-        assert not rsre_core.match(r, "abc")
-        assert not rsre_core.match(r, "X")
-        assert not rsre_core.match(r, "eX")
+        assert not match(r, "abc")
+        assert not match(r, "X")
+        assert not match(r, "eX")
 
     def test_negative_lookbehind(self):
         def found(s):
-            res = rsre_core.match(r, s)
+            res = match(r, s)
             assert res is not None
             return res.get_mark(1)
         r = get_code(r"([a-z]*)(?<!dd)")
@@ -84,125 +85,125 @@
 
     def test_at(self):
         r = get_code(r"abc$")
-        assert rsre_core.match(r, "abc")
-        assert not rsre_core.match(r, "abcd")
-        assert not rsre_core.match(r, "ab")
+        assert match(r, "abc")
+        assert not match(r, "abcd")
+        assert not match(r, "ab")
 
     def test_repeated_set(self):
         r = get_code(r"[a0x]+f")
-        assert rsre_core.match(r, "a0af")
-        assert not rsre_core.match(r, "a0yaf")
+        assert match(r, "a0af")
+        assert not match(r, "a0yaf")
 
     def test_category(self):
         r = get_code(r"[\sx]")
-        assert rsre_core.match(r, "x")
-        assert rsre_core.match(r, " ")
-        assert not rsre_core.match(r, "n")
+        assert match(r, "x")
+        assert match(r, " ")
+        assert not match(r, "n")
 
     def test_groupref(self):
         r = get_code(r"(xx+)\1+$")     # match non-prime numbers of x
-        assert not rsre_core.match(r, "xx")
-        assert not rsre_core.match(r, "xxx")
-        assert     rsre_core.match(r, "xxxx")
-        assert not rsre_core.match(r, "xxxxx")
-        assert     rsre_core.match(r, "xxxxxx")
-        assert not rsre_core.match(r, "xxxxxxx")
-        assert     rsre_core.match(r, "xxxxxxxx")
-        assert     rsre_core.match(r, "xxxxxxxxx")
+        assert not match(r, "xx")
+        assert not match(r, "xxx")
+        assert     match(r, "xxxx")
+        assert not match(r, "xxxxx")
+        assert     match(r, "xxxxxx")
+        assert not match(r, "xxxxxxx")
+        assert     match(r, "xxxxxxxx")
+        assert     match(r, "xxxxxxxxx")
 
     def test_groupref_ignore(self):
         r = get_code(r"(?i)(xx+)\1+$")     # match non-prime numbers of x
-        assert not rsre_core.match(r, "xX")
-        assert not rsre_core.match(r, "xxX")
-        assert     rsre_core.match(r, "Xxxx")
-        assert not rsre_core.match(r, "xxxXx")
-        assert     rsre_core.match(r, "xXxxxx")
-        assert not rsre_core.match(r, "xxxXxxx")
-        assert     rsre_core.match(r, "xxxxxxXx")
-        assert     rsre_core.match(r, "xxxXxxxxx")
+        assert not match(r, "xX")
+        assert not match(r, "xxX")
+        assert     match(r, "Xxxx")
+        assert not match(r, "xxxXx")
+        assert     match(r, "xXxxxx")
+        assert not match(r, "xxxXxxx")
+        assert     match(r, "xxxxxxXx")
+        assert     match(r, "xxxXxxxxx")
 
     def test_groupref_exists(self):
         r = get_code(r"((a)|(b))c(?(2)d)$")
-        assert not rsre_core.match(r, "ac")
-        assert     rsre_core.match(r, "acd")
-        assert     rsre_core.match(r, "bc")
-        assert not rsre_core.match(r, "bcd")
+        assert not match(r, "ac")
+        assert     match(r, "acd")
+        assert     match(r, "bc")
+        assert not match(r, "bcd")
         #
         r = get_code(r"((a)|(b))c(?(2)d|e)$")
-        assert not rsre_core.match(r, "ac")
-        assert     rsre_core.match(r, "acd")
-        assert not rsre_core.match(r, "ace")
-        assert not rsre_core.match(r, "bc")
-        assert not rsre_core.match(r, "bcd")
-        assert     rsre_core.match(r, "bce")
+        assert not match(r, "ac")
+        assert     match(r, "acd")
+        assert not match(r, "ace")
+        assert not match(r, "bc")
+        assert not match(r, "bcd")
+        assert     match(r, "bce")
 
     def test_in_ignore(self):
         r = get_code(r"(?i)[a-f]")
-        assert rsre_core.match(r, "b")
-        assert rsre_core.match(r, "C")
-        assert not rsre_core.match(r, "g")
+        assert match(r, "b")
+        assert match(r, "C")
+        assert not match(r, "g")
         r = get_code(r"(?i)[a-f]+$")
-        assert rsre_core.match(r, "bCdEf")
-        assert not rsre_core.match(r, "g")
-        assert not rsre_core.match(r, "aaagaaa")
+        assert match(r, "bCdEf")
+        assert not match(r, "g")
+        assert not match(r, "aaagaaa")
 
     def test_not_literal(self):
         r = get_code(r"[^a]")
-        assert rsre_core.match(r, "A")
-        assert not rsre_core.match(r, "a")
+        assert match(r, "A")
+        assert not match(r, "a")
         r = get_code(r"[^a]+$")
-        assert rsre_core.match(r, "Bx123")
-        assert not rsre_core.match(r, "--a--")
+        assert match(r, "Bx123")
+        assert not match(r, "--a--")
 
     def test_not_literal_ignore(self):
         r = get_code(r"(?i)[^a]")
-        assert rsre_core.match(r, "G")
-        assert not rsre_core.match(r, "a")
-        assert not rsre_core.match(r, "A")
+        assert match(r, "G")
+        assert not match(r, "a")
+        assert not match(r, "A")
         r = get_code(r"(?i)[^a]+$")
-        assert rsre_core.match(r, "Gx123")
-        assert not rsre_core.match(r, "--A--")
+        assert match(r, "Gx123")
+        assert not match(r, "--A--")
 
     def test_repeated_single_character_pattern(self):
         r = get_code(r"foo(?:(?<=foo)x)+$")
-        assert rsre_core.match(r, "foox")
+        assert match(r, "foox")
 
     def test_flatten_marks(self):
         r = get_code(r"a(b)c((d)(e))+$")
-        res = rsre_core.match(r, "abcdedede")
+        res = match(r, "abcdedede")
         assert res.flatten_marks() == [0, 9, 1, 2, 7, 9, 7, 8, 8, 9]
         assert res.flatten_marks() == [0, 9, 1, 2, 7, 9, 7, 8, 8, 9]
 
     def test_bug1(self):
         # REPEAT_ONE inside REPEAT
         r = get_code(r"(?:.+)?B")
-        assert rsre_core.match(r, "AB") is not None
+        assert match(r, "AB") is not None
         r = get_code(r"(?:AA+?)+B")
-        assert rsre_core.match(r, "AAAB") is not None
+        assert match(r, "AAAB") is not None
         r = get_code(r"(?:AA+)+?B")
-        assert rsre_core.match(r, "AAAB") is not None
+        assert match(r, "AAAB") is not None
         r = get_code(r"(?:AA+?)+?B")
-        assert rsre_core.match(r, "AAAB") is not None
+        assert match(r, "AAAB") is not None
         # REPEAT inside REPEAT
         r = get_code(r"(?:(?:xy)+)?B")
-        assert rsre_core.match(r, "xyB") is not None
+        assert match(r, "xyB") is not None
         r = get_code(r"(?:xy(?:xy)+?)+B")
-        assert rsre_core.match(r, "xyxyxyB") is not None
+        assert match(r, "xyxyxyB") is not None
         r = get_code(r"(?:xy(?:xy)+)+?B")
-        assert rsre_core.match(r, "xyxyxyB") is not None
+        assert match(r, "xyxyxyB") is not None
         r = get_code(r"(?:xy(?:xy)+?)+?B")
-        assert rsre_core.match(r, "xyxyxyB") is not None
+        assert match(r, "xyxyxyB") is not None
 
     def test_assert_group(self):
         r = get_code(r"abc(?=(..)f)(.)")
-        res = rsre_core.match(r, "abcdefghi")
+        res = match(r, "abcdefghi")
         assert res is not None
         assert res.span(2) == (3, 4)
         assert res.span(1) == (3, 5)
 
     def test_assert_not_group(self):
         r = get_code(r"abc(?!(de)f)(.)")
-        res = rsre_core.match(r, "abcdeFghi")
+        res = match(r, "abcdeFghi")
         assert res is not None
         assert res.span(2) == (3, 4)
         # this I definitely classify as Horrendously Implementation Dependent.
@@ -211,39 +212,39 @@
 
     def test_match_start(self):
         r = get_code(r"^ab")
-        assert     rsre_core.match(r, "abc")
-        assert not rsre_core.match(r, "xxxabc", start=3)
-        assert not rsre_core.match(r, "xx\nabc", start=3)
+        assert     match(r, "abc")
+        assert not match(r, "xxxabc", start=3)
+        assert not match(r, "xx\nabc", start=3)
         #
         r = get_code(r"(?m)^ab")
-        assert     rsre_core.match(r, "abc")
-        assert not rsre_core.match(r, "xxxabc", start=3)
-        assert     rsre_core.match(r, "xx\nabc", start=3)
+        assert     match(r, "abc")
+        assert not match(r, "xxxabc", start=3)
+        assert     match(r, "xx\nabc", start=3)
 
     def test_match_end(self):
         r = get_code("ab")
-        assert     rsre_core.match(r, "abc")
-        assert     rsre_core.match(r, "abc", end=333)
-        assert     rsre_core.match(r, "abc", end=3)
-        assert     rsre_core.match(r, "abc", end=2)
-        assert not rsre_core.match(r, "abc", end=1)
-        assert not rsre_core.match(r, "abc", end=0)
-        assert not rsre_core.match(r, "abc", end=-1)
+        assert     match(r, "abc")
+        assert     match(r, "abc", end=333)
+        assert     match(r, "abc", end=3)
+        assert     match(r, "abc", end=2)
+        assert not match(r, "abc", end=1)
+        assert not match(r, "abc", end=0)
+        assert not match(r, "abc", end=-1)
 
     def test_match_bug1(self):
         r = get_code(r'(x??)?$')
-        assert rsre_core.match(r, "x")
+        assert match(r, "x")
 
     def test_match_bug2(self):
         r = get_code(r'(x??)??$')
-        assert rsre_core.match(r, "x")
+        assert match(r, "x")
 
     def test_match_bug3(self):
         if VERSION == "2.7.5":
             py.test.skip("pattern fails to compile with exactly 2.7.5 "
                          "(works on 2.7.3 and on 2.7.trunk though)")
         r = get_code(r'([ax]*?x*)?$')
-        assert rsre_core.match(r, "aaxaa")
+        assert match(r, "aaxaa")
 
     def test_bigcharset(self):
         for i in range(100):
@@ -252,10 +253,10 @@
             pattern = u'[%s]' % (u''.join(chars),)
             r = get_code(pattern)
             for c in chars:
-                assert rsre_core.match(r, c)
+                assert match(r, c)
             for i in range(200):
                 c = unichr(random.randrange(0x0, 0xD000))
-                res = rsre_core.match(r, c)
+                res = match(r, c)
                 if c in chars:
                     assert res is not None
                 else:
@@ -264,14 +265,14 @@
     def test_simple_match_1(self):
         r = get_code(r"ab*bbbbbbbc")
         print r
-        match = rsre_core.match(r, "abbbbbbbbbcdef")
-        assert match
-        assert match.match_end == 11
+        m = match(r, "abbbbbbbbbcdef")
+        assert m
+        assert m.match_end == 11
 
     def test_empty_maxuntil(self):
         r = get_code("\\{\\{((?:.*?)+)\\}\\}")
-        match = rsre_core.match(r, "{{a}}{{b}}")
-        assert match.group(1) == "a"
+        m = match(r, "{{a}}{{b}}")
+        assert m.group(1) == "a"
 
     def test_fullmatch_1(self):
         r = get_code(r"ab*c")
@@ -307,4 +308,4 @@
         r = get_code(u"[\U00010428-\U0001044f]", re.I)
         assert r.count(27) == 1       # OPCODE_RANGE
         r[r.index(27)] = 32           # => OPCODE_RANGE_IGNORE
-        assert rsre_core.match(r, u"\U00010428")
+        assert match(r, u"\U00010428")
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to