Author: Armin Rigo <ar...@tunes.org>
Branch: unicode-utf8-re
Changeset: r93242:dd8e0cdfa795
Date: 2017-12-03 15:20 +0100
http://bitbucket.org/pypy/pypy/changeset/dd8e0cdfa795/

Log:    in-progress. test_match passes again

diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -149,7 +149,8 @@
         # for testing
         if self.match_marks_flat is None:
             self._compute_flattened_marks()
-        return self.match_marks_flat
+        return [self.slowly_convert_byte_pos_to_index(i)
+                for i in self.match_marks_flat]
 
     def _compute_flattened_marks(self):
         self.match_marks_flat = [self.match_start, self.match_end]
@@ -371,7 +372,7 @@
         ptr = self.start_ptr
         if not self.next_char_ok(ctx, ptr, self.ppos3):
             return
-        self.start_ptr = ptr + 1
+        self.start_ptr = ctx.next(ptr)
         return self.find_first_result(ctx)
 
     def next_char_ok(self, ctx, ptr, ppos):
@@ -717,7 +718,7 @@
             if ptr >= ctx.end or ctx.str(ptr) == ctx.pat(ppos):
                 return
             ppos += 1
-            ptr += 1
+            ptr = ctx.next(ptr)
 
         elif op == OPCODE_NOT_LITERAL_IGNORE:
             # match if it's not a literal string, ignoring case
@@ -725,7 +726,7 @@
             if ptr >= ctx.end or ctx.lowstr(ptr) == ctx.pat(ppos):
                 return
             ppos += 1
-            ptr += 1
+            ptr = ctx.next(ptr)
 
         elif op == OPCODE_REPEAT:
             # general repeat.  in this version of the re module, all the work
@@ -786,9 +787,10 @@
             start = ptr
             min = ctx.pat(ppos+1)
             if min > 0:
-                min_count = ptr + min
-                if minptr > ctx.end:
-                    return   # cannot match
+                try:
+                    minptr = ctx.next_n(ptr, min, ctx.end)
+                except EndOfString:
+                    return    # cannot match
                 # count using pattern min as the maximum
                 ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks)
                 if ptr < minptr:
@@ -990,11 +992,12 @@
 def sre_at(ctx, atcode, ptr):
     if (atcode == AT_BEGINNING or
         atcode == AT_BEGINNING_STRING):
-        return ptr == 0
+        return ptr == ctx.ZERO
 
     elif atcode == AT_BEGINNING_LINE:
-        prevptr = ptr - 1
-        return prevptr < 0 or rsre_char.is_linebreak(ctx.str(prevptr))
+        if ptr <= ctx.ZERO:
+            return True
+        return rsre_char.is_linebreak(ctx.str(ctx.prev(ptr)))
 
     elif atcode == AT_BOUNDARY:
         return at_boundary(ctx, ptr)
diff --git a/rpython/rlib/rsre/test/support.py 
b/rpython/rlib/rsre/test/support.py
--- a/rpython/rlib/rsre/test/support.py
+++ b/rpython/rlib/rsre/test/support.py
@@ -56,6 +56,8 @@
         return Position(r)
 
     def slowly_convert_byte_pos_to_index(self, position):
+        if type(position) is int and position == -1:
+            return -1
         assert isinstance(position, Position)
         return position._p
 
@@ -107,3 +109,6 @@
         return ctx
     else:
         return None
+
+def fullmatch(pattern, string, start=0, end=sys.maxint, flags=0):
+    return match(pattern, string, start, end, flags, fullmatch=True)
diff --git a/rpython/rlib/rsre/test/test_match.py 
b/rpython/rlib/rsre/test/test_match.py
--- a/rpython/rlib/rsre/test/test_match.py
+++ b/rpython/rlib/rsre/test/test_match.py
@@ -1,7 +1,7 @@
 import re, random, py
 from rpython.rlib.rsre import rsre_char
 from rpython.rlib.rsre.rpy import get_code, VERSION
-from rpython.rlib.rsre.test.support import match
+from rpython.rlib.rsre.test.support import match, fullmatch, Position
 
 
 def get_code_and_re(regexp):
@@ -267,7 +267,7 @@
         print r
         m = match(r, "abbbbbbbbbcdef")
         assert m
-        assert m.match_end == 11
+        assert m.match_end == Position(11)
 
     def test_empty_maxuntil(self):
         r = get_code("\\{\\{((?:.*?)+)\\}\\}")
@@ -276,30 +276,30 @@
 
     def test_fullmatch_1(self):
         r = get_code(r"ab*c")
-        assert not rsre_core.fullmatch(r, "abbbcdef")
-        assert rsre_core.fullmatch(r, "abbbc")
+        assert not fullmatch(r, "abbbcdef")
+        assert fullmatch(r, "abbbc")
 
     def test_fullmatch_2(self):
         r = get_code(r"a(b*?)")
-        match = rsre_core.fullmatch(r, "abbb")
+        match = fullmatch(r, "abbb")
         assert match.group(1) == "bbb"
-        assert not rsre_core.fullmatch(r, "abbbc")
+        assert not fullmatch(r, "abbbc")
 
     def test_fullmatch_3(self):
         r = get_code(r"a((bp)*?)c")
-        match = rsre_core.fullmatch(r, "abpbpbpc")
+        match = fullmatch(r, "abpbpbpc")
         assert match.group(1) == "bpbpbp"
 
     def test_fullmatch_4(self):
         r = get_code(r"a((bp)*)c")
-        match = rsre_core.fullmatch(r, "abpbpbpc")
+        match = fullmatch(r, "abpbpbpc")
         assert match.group(1) == "bpbpbp"
 
     def test_fullmatch_assertion(self):
         r = get_code(r"(?=a).b")
-        assert rsre_core.fullmatch(r, "ab")
+        assert fullmatch(r, "ab")
         r = get_code(r"(?!a)..")
-        assert not rsre_core.fullmatch(r, "ab")
+        assert not fullmatch(r, "ab")
 
     def test_range_ignore(self):
         from rpython.rlib.unicodedata import unicodedb
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to