Author: Armin Rigo <ar...@tunes.org>
Branch: unicode-utf8-re
Changeset: r93291:fc5e50bec2b2
Date: 2017-12-07 09:01 +0100
http://bitbucket.org/pypy/pypy/changeset/fc5e50bec2b2/

Log:    Refix and test the standard StrMatchContext

diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -138,8 +138,40 @@
         """Similar to str()."""
         raise NotImplementedError
 
-    def debug_check_pos(self, pos):
-        pass
+    # The following methods are provided to be overriden in
+    # Utf8MatchContext.  The non-utf8 implementation is provided
+    # by the FixedMatchContext abstract subclass, in order to use
+    # the same @not_rpython safety trick as above.
+    @not_rpython
+    def next(self, position):
+        raise NotImplementedError
+    @not_rpython
+    def prev(self, position):
+        raise NotImplementedError
+    @not_rpython
+    def next_n(self, position, n):
+        raise NotImplementedError
+    @not_rpython
+    def prev_n(self, position, n, start_position):
+        raise NotImplementedError
+    @not_rpython
+    def slowly_convert_byte_pos_to_index(self, position):
+        raise NotImplementedError
+    @not_rpython
+    def debug_check_pos(self, position):
+        raise NotImplementedError
+    @not_rpython
+    def maximum_distance(self, position_low, position_high):
+        raise NotImplementedError
+    @not_rpython
+    def bytes_difference(self, position1, position2):
+        raise NotImplementedError
+    @not_rpython
+    def get_single_byte(self, base_position, index):
+        raise NotImplementedError
+    @not_rpython
+    def go_forward_by_bytes(self, base_position, index):
+        raise NotImplementedError
 
     def get_mark(self, gid):
         mark = find_mark(self.match_marks, gid)
@@ -186,13 +218,56 @@
     def fresh_copy(self, start):
         raise NotImplementedError
 
-class BufMatchContext(AbstractMatchContext):
+
+class FixedMatchContext(AbstractMatchContext):
+    """Abstract subclass to introduce the default implementation for
+    these position methods.  The Utf8 subclass doesn't inherit from here."""
+
+    ZERO = 0
+
+    def next(self, position):
+        return position + 1
+
+    def prev(self, position):
+        if position == 0:
+            raise EndOfString
+        return position - 1
+
+    def next_n(self, position, n, end_position):
+        position += n
+        if position > end_position:
+            raise EndOfString
+        return position
+
+    def prev_n(self, position, n, start_position):
+        position -= n
+        if position < start_position:
+            raise EndOfString
+        return position
+
+    def slowly_convert_byte_pos_to_index(self, position):
+        return position
+
+    def debug_check_pos(self, position):
+        pass
+
+    def maximum_distance(self, position_low, position_high):
+        return position_high - position_low
+
+    def bytes_difference(self, position1, position2):
+        return position1 - position2
+
+    def go_forward_by_bytes(self, base_position, index):
+        return base_position + index
+
+
+class BufMatchContext(FixedMatchContext):
     """Concrete subclass for matching in a buffer."""
 
     _immutable_fields_ = ["_buffer"]
 
     def __init__(self, pattern, buf, match_start, end, flags):
-        AbstractMatchContext.__init__(self, pattern, match_start, end, flags)
+        FixedMatchContext.__init__(self, pattern, match_start, end, flags)
         self._buffer = buf
 
     def str(self, index):
@@ -203,17 +278,20 @@
         c = self.str(index)
         return rsre_char.getlower(c, self.flags)
 
+    def get_single_byte(self, base_position, index):
+        return self.str(base_position + index)
+
     def fresh_copy(self, start):
         return BufMatchContext(self.pattern, self._buffer, start,
                                self.end, self.flags)
 
-class StrMatchContext(AbstractMatchContext):
+class StrMatchContext(FixedMatchContext):
     """Concrete subclass for matching in a plain string."""
 
     _immutable_fields_ = ["_string"]
 
     def __init__(self, pattern, string, match_start, end, flags):
-        AbstractMatchContext.__init__(self, pattern, match_start, end, flags)
+        FixedMatchContext.__init__(self, pattern, match_start, end, flags)
         self._string = string
         if not we_are_translated() and isinstance(string, unicode):
             self.flags |= rsre_char.SRE_FLAG_UNICODE   # for rsre_re.py
@@ -226,17 +304,20 @@
         c = self.str(index)
         return rsre_char.getlower(c, self.flags)
 
+    def get_single_byte(self, base_position, index):
+        return self.str(base_position + index)
+
     def fresh_copy(self, start):
         return StrMatchContext(self.pattern, self._string, start,
                                self.end, self.flags)
 
-class UnicodeMatchContext(AbstractMatchContext):
+class UnicodeMatchContext(FixedMatchContext):
     """Concrete subclass for matching in a unicode string."""
 
     _immutable_fields_ = ["_unicodestr"]
 
     def __init__(self, pattern, unicodestr, match_start, end, flags):
-        AbstractMatchContext.__init__(self, pattern, match_start, end, flags)
+        FixedMatchContext.__init__(self, pattern, match_start, end, flags)
         self._unicodestr = unicodestr
 
     def str(self, index):
@@ -247,6 +328,9 @@
         c = self.str(index)
         return rsre_char.getlower(c, self.flags)
 
+    def get_single_byte(self, base_position, index):
+        return self.str(base_position + index)
+
     def fresh_copy(self, start):
         return UnicodeMatchContext(self.pattern, self._unicodestr, start,
                                    self.end, self.flags)
diff --git a/rpython/rlib/rsre/test/support.py 
b/rpython/rlib/rsre/test/support.py
--- a/rpython/rlib/rsre/test/support.py
+++ b/rpython/rlib/rsre/test/support.py
@@ -25,7 +25,6 @@
     """Concrete subclass for matching in a plain string, tweaked for tests"""
 
     ZERO = Position(0)
-    EXACT_DISTANCE = False
 
     def next(self, position):
         assert isinstance(position, Position)
diff --git a/rpython/rlib/rsre/test/test_search.py 
b/rpython/rlib/rsre/test/test_search.py
--- a/rpython/rlib/rsre/test/test_search.py
+++ b/rpython/rlib/rsre/test/test_search.py
@@ -1,44 +1,45 @@
 import re, py
 from rpython.rlib.rsre.test.test_match import get_code, get_code_and_re
-from rpython.rlib.rsre.test.support import search, match, Position
+from rpython.rlib.rsre.test import support
+from rpython.rlib.rsre import rsre_core
 
 
-class TestSearch:
+class BaseTestSearch:
 
     def test_code1(self):
         r_code1 = get_code(r'[abc][def][ghi]')
-        res = search(r_code1, "fooahedixxx")
+        res = self.search(r_code1, "fooahedixxx")
         assert res is None
-        res = search(r_code1, "fooahcdixxx")
+        res = self.search(r_code1, "fooahcdixxx")
         assert res is not None
         assert res.span() == (5, 8)
 
     def test_code2(self):
         r_code2 = get_code(r'<item>\s*<title>(.*?)</title>')
-        res = search(r_code2, "foo bar <item>  <title>abc</title>def")
+        res = self.search(r_code2, "foo bar <item>  <title>abc</title>def")
         assert res is not None
         assert res.span() == (8, 34)
 
     def test_pure_literal(self):
         r_code3 = get_code(r'foobar')
-        res = search(r_code3, "foo bar foobar baz")
+        res = self.search(r_code3, "foo bar foobar baz")
         assert res is not None
         assert res.span() == (8, 14)
 
     def test_code3(self):
         r_code1 = get_code(r'<item>\s*<title>(.*?)</title>')
-        res = match(r_code1, "<item>  <title>abc</title>def")
+        res = self.match(r_code1, "<item>  <title>abc</title>def")
         assert res is not None
 
     def test_max_until_0_65535(self):
         r_code2 = get_code(r'<abc>(?:xy)*xy</abc>')
-        #res = match(r_code2, '<abc></abc>def')
+        #res = self.match(r_code2, '<abc></abc>def')
         #assert res is None
-        #res = match(r_code2, '<abc>xy</abc>def')
+        #res = self.match(r_code2, '<abc>xy</abc>def')
         #assert res is not None
-        res = match(r_code2, '<abc>xyxyxy</abc>def')
+        res = self.match(r_code2, '<abc>xyxyxy</abc>def')
         assert res is not None
-        res = match(r_code2, '<abc>' + 'xy'*1000 + '</abc>def')
+        res = self.match(r_code2, '<abc>' + 'xy'*1000 + '</abc>def')
         assert res is not None
 
     def test_max_until_3_5(self):
@@ -46,18 +47,18 @@
         for i in range(8):
             s = '<abc>' + 'xy'*i + '</abc>defdefdefdefdef'
             assert (r.match(s) is not None) is (3 <= i-1 <= 5)
-            res = match(r_code2, s)
+            res = self.match(r_code2, s)
             assert (res is not None) is (3 <= i-1 <= 5)
 
     def test_min_until_0_65535(self):
         r_code2 = get_code(r'<abc>(?:xy)*?xy</abc>')
-        res = match(r_code2, '<abc></abc>def')
+        res = self.match(r_code2, '<abc></abc>def')
         assert res is None
-        res = match(r_code2, '<abc>xy</abc>def')
+        res = self.match(r_code2, '<abc>xy</abc>def')
         assert res is not None
-        res = match(r_code2, '<abc>xyxyxy</abc>def')
+        res = self.match(r_code2, '<abc>xyxyxy</abc>def')
         assert res is not None
-        res = match(r_code2, '<abc>' + 'xy'*1000 + '</abc>def')
+        res = self.match(r_code2, '<abc>' + 'xy'*1000 + '</abc>def')
         assert res is not None
 
     def test_min_until_3_5(self):
@@ -65,44 +66,44 @@
         for i in range(8):
             s = '<abc>' + 'xy'*i + '</abc>defdefdefdefdef'
             assert (r.match(s) is not None) is (3 <= i-1 <= 5)
-            res = match(r_code2, s)
+            res = self.match(r_code2, s)
             assert (res is not None) is (3 <= i-1 <= 5)
 
     def test_min_repeat_one(self):
         r_code3 = get_code(r'<abc>.{3,5}?y')
         for i in range(8):
-            res = match(r_code3, '<abc>' + 'x'*i + 'y')
+            res = self.match(r_code3, '<abc>' + 'x'*i + 'y')
             assert (res is not None) is (3 <= i <= 5)
 
     def test_simple_group(self):
         r_code4 = get_code(r'<abc>(x.)</abc>')
-        res = match(r_code4, '<abc>xa</abc>def')
+        res = self.match(r_code4, '<abc>xa</abc>def')
         assert res is not None
         assert res.get_mark(0) == 5
         assert res.get_mark(1) == 7
 
     def test_max_until_groups(self):
         r_code4 = get_code(r'<abc>(x.)*xy</abc>')
-        res = match(r_code4, '<abc>xaxbxy</abc>def')
+        res = self.match(r_code4, '<abc>xaxbxy</abc>def')
         assert res is not None
         assert res.get_mark(0) == 7
         assert res.get_mark(1) == 9
 
     def test_group_branch(self):
         r_code5 = get_code(r'<abc>(ab|c)</abc>')
-        res = match(r_code5, '<abc>ab</abc>def')
+        res = self.match(r_code5, '<abc>ab</abc>def')
         assert (res.get_mark(0), res.get_mark(1)) == (5, 7)
-        res = match(r_code5, '<abc>c</abc>def')
+        res = self.match(r_code5, '<abc>c</abc>def')
         assert (res.get_mark(0), res.get_mark(1)) == (5, 6)
-        res = match(r_code5, '<abc>de</abc>def')
+        res = self.match(r_code5, '<abc>de</abc>def')
         assert res is None
 
     def test_group_branch_max_until(self):
         r_code6 = get_code(r'<abc>(ab|c)*a</abc>')
-        res = match(r_code6, '<abc>ccabcccaba</abc>def')
+        res = self.match(r_code6, '<abc>ccabcccaba</abc>def')
         assert (res.get_mark(0), res.get_mark(1)) == (12, 14)
         r_code7 = get_code(r'<abc>((ab)|(c))*a</abc>')
-        res = match(r_code7, '<abc>ccabcccaba</abc>def')
+        res = self.match(r_code7, '<abc>ccabcccaba</abc>def')
         assert (res.get_mark(0), res.get_mark(1)) == (12, 14)
         assert (res.get_mark(2), res.get_mark(3)) == (12, 14)
         assert (res.get_mark(4), res.get_mark(5)) == (11, 12)
@@ -113,7 +114,7 @@
         assert m.span(1) == (12, 13)
         assert m.span(3) == (12, 13)
         assert m.span(2) == (8, 9)
-        res = match(r_code7, '<abc>bbbabbbb</abc>')
+        res = self.match(r_code7, '<abc>bbbabbbb</abc>')
         assert (res.get_mark(0), res.get_mark(1)) == (12, 13)
         assert (res.get_mark(4), res.get_mark(5)) == (12, 13)
         assert (res.get_mark(2), res.get_mark(3)) == (8, 9)
@@ -124,7 +125,7 @@
         assert m.span(1) == (6, 7)
         assert m.span(3) == (6, 7)
         assert m.span(2) == (5, 6)
-        res = match(r_code8, '<abc>ab</abc>')
+        res = self.match(r_code8, '<abc>ab</abc>')
         assert (res.get_mark(0), res.get_mark(1)) == (6, 7)
         assert (res.get_mark(4), res.get_mark(5)) == (6, 7)
         assert (res.get_mark(2), res.get_mark(3)) == (5, 6)
@@ -134,7 +135,7 @@
         m = r9.match('xyzxc')
         assert m.span(1) == (3, 4)
         assert m.span(2) == (-1, -1)
-        res = match(r_code9, 'xyzxc')
+        res = self.match(r_code9, 'xyzxc')
         assert (res.get_mark(0), res.get_mark(1)) == (3, 4)
         assert (res.get_mark(2), res.get_mark(3)) == (-1, -1)
 
@@ -142,8 +143,8 @@
         r_code9, r9 = get_code_and_re(r'((x|yz)+?(y)??c)*')
         m = r9.match('xycxyzxc')
         assert m.span(2) == (6, 7)
-        #assert match.span(3) == (1, 2) --- bug of CPython
-        res = match(r_code9, 'xycxyzxc')
+        #assert self.match.span(3) == (1, 2) --- bug of CPython
+        res = self.match(r_code9, 'xycxyzxc')
         assert (res.get_mark(2), res.get_mark(3)) == (6, 7)
         assert (res.get_mark(4), res.get_mark(5)) == (1, 2)
 
@@ -151,19 +152,19 @@
         r_code, r = get_code_and_re(r'(a?)+y')
         assert r.match('y')
         assert r.match('aaayaaay').span() == (0, 4)
-        res = match(r_code, 'y')
+        res = self.match(r_code, 'y')
         assert res
-        res = match(r_code, 'aaayaaay')
+        res = self.match(r_code, 'aaayaaay')
         assert res and res.span() == (0, 4)
         #
         r_code, r = get_code_and_re(r'(a?){4,6}y')
         assert r.match('y')
-        res = match(r_code, 'y')
+        res = self.match(r_code, 'y')
         assert res
         #
         r_code, r = get_code_and_re(r'(a?)*y')
         assert r.match('y')
-        res = match(r_code, 'y')
+        res = self.match(r_code, 'y')
         assert res
 
     def test_empty_maxuntil_2(self):
@@ -173,24 +174,24 @@
             py.test.skip("older version of the stdlib: %s" % (e,))
         assert r.match('XfooXbarX').span() == (0, 5)
         assert r.match('XfooXbarX').span(1) == (4, 4)
-        res = match(r_code, 'XfooXbarX')
+        res = self.match(r_code, 'XfooXbarX')
         assert res.span() == (0, 5)
         assert res.span(1) == (4, 4)
 
     def test_empty_minuntil(self):
         r_code, r = get_code_and_re(r'(a?)+?y')
         #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory
-        res = match(r_code, 'z')
+        res = self.match(r_code, 'z')
         assert not res
         #
         r_code, r = get_code_and_re(r'(a?){4,6}?y')
         assert not r.match('z')
-        res = match(r_code, 'z')
+        res = self.match(r_code, 'z')
         assert not res
         #
         r_code, r = get_code_and_re(r'(a?)*?y')
         #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory
-        res = match(r_code, 'z')
+        res = self.match(r_code, 'z')
         assert not res
 
     def test_empty_search(self):
@@ -198,15 +199,26 @@
         for j in range(-2, 6):
             for i in range(-2, 6):
                 match = r.search('abc', i, j)
-                res = search(r_code, 'abc', i, j)
+                res = self.search(r_code, 'abc', i, j)
                 jk = min(max(j, 0), 3)
                 ik = min(max(i, 0), 3)
                 if ik <= jk:
                     assert match is not None
                     assert match.span() == (ik, ik)
                     assert res is not None
-                    assert res.match_start == Position(ik)
-                    assert res.match_end == Position(ik)
+                    assert res.match_start == self.Position(ik)
+                    assert res.match_end == self.Position(ik)
                 else:
                     assert match is None
                     assert res is None
+
+
+class TestSearchCustom(BaseTestSearch):
+    search = staticmethod(support.search)
+    match = staticmethod(support.match)
+    Position = support.Position
+
+class TestSearchStr(BaseTestSearch):
+    search = staticmethod(rsre_core.search)
+    match = staticmethod(rsre_core.match)
+    Position = staticmethod(lambda n: n)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to