Author: Armin Rigo <[email protected]>
Branch: unicode-utf8-re
Changeset: r93242:dd8e0cdfa795
Date: 2017-12-03 15:20 +0100
http://bitbucket.org/pypy/pypy/changeset/dd8e0cdfa795/
Log: in-progress. test_match passes again
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -149,7 +149,8 @@
# for testing
if self.match_marks_flat is None:
self._compute_flattened_marks()
- return self.match_marks_flat
+ return [self.slowly_convert_byte_pos_to_index(i)
+ for i in self.match_marks_flat]
def _compute_flattened_marks(self):
self.match_marks_flat = [self.match_start, self.match_end]
@@ -371,7 +372,7 @@
ptr = self.start_ptr
if not self.next_char_ok(ctx, ptr, self.ppos3):
return
- self.start_ptr = ptr + 1
+ self.start_ptr = ctx.next(ptr)
return self.find_first_result(ctx)
def next_char_ok(self, ctx, ptr, ppos):
@@ -717,7 +718,7 @@
if ptr >= ctx.end or ctx.str(ptr) == ctx.pat(ppos):
return
ppos += 1
- ptr += 1
+ ptr = ctx.next(ptr)
elif op == OPCODE_NOT_LITERAL_IGNORE:
# match if it's not a literal string, ignoring case
@@ -725,7 +726,7 @@
if ptr >= ctx.end or ctx.lowstr(ptr) == ctx.pat(ppos):
return
ppos += 1
- ptr += 1
+ ptr = ctx.next(ptr)
elif op == OPCODE_REPEAT:
# general repeat. in this version of the re module, all the work
@@ -786,9 +787,10 @@
start = ptr
min = ctx.pat(ppos+1)
if min > 0:
- min_count = ptr + min
- if minptr > ctx.end:
- return # cannot match
+ try:
+ minptr = ctx.next_n(ptr, min, ctx.end)
+ except EndOfString:
+ return # cannot match
# count using pattern min as the maximum
ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks)
if ptr < minptr:
@@ -990,11 +992,12 @@
def sre_at(ctx, atcode, ptr):
if (atcode == AT_BEGINNING or
atcode == AT_BEGINNING_STRING):
- return ptr == 0
+ return ptr == ctx.ZERO
elif atcode == AT_BEGINNING_LINE:
- prevptr = ptr - 1
- return prevptr < 0 or rsre_char.is_linebreak(ctx.str(prevptr))
+ if ptr <= ctx.ZERO:
+ return True
+ return rsre_char.is_linebreak(ctx.str(ctx.prev(ptr)))
elif atcode == AT_BOUNDARY:
return at_boundary(ctx, ptr)
diff --git a/rpython/rlib/rsre/test/support.py
b/rpython/rlib/rsre/test/support.py
--- a/rpython/rlib/rsre/test/support.py
+++ b/rpython/rlib/rsre/test/support.py
@@ -56,6 +56,8 @@
return Position(r)
def slowly_convert_byte_pos_to_index(self, position):
+ if type(position) is int and position == -1:
+ return -1
assert isinstance(position, Position)
return position._p
@@ -107,3 +109,6 @@
return ctx
else:
return None
+
+def fullmatch(pattern, string, start=0, end=sys.maxint, flags=0):
+ return match(pattern, string, start, end, flags, fullmatch=True)
diff --git a/rpython/rlib/rsre/test/test_match.py
b/rpython/rlib/rsre/test/test_match.py
--- a/rpython/rlib/rsre/test/test_match.py
+++ b/rpython/rlib/rsre/test/test_match.py
@@ -1,7 +1,7 @@
import re, random, py
from rpython.rlib.rsre import rsre_char
from rpython.rlib.rsre.rpy import get_code, VERSION
-from rpython.rlib.rsre.test.support import match
+from rpython.rlib.rsre.test.support import match, fullmatch, Position
def get_code_and_re(regexp):
@@ -267,7 +267,7 @@
print r
m = match(r, "abbbbbbbbbcdef")
assert m
- assert m.match_end == 11
+ assert m.match_end == Position(11)
def test_empty_maxuntil(self):
r = get_code("\\{\\{((?:.*?)+)\\}\\}")
@@ -276,30 +276,30 @@
def test_fullmatch_1(self):
r = get_code(r"ab*c")
- assert not rsre_core.fullmatch(r, "abbbcdef")
- assert rsre_core.fullmatch(r, "abbbc")
+ assert not fullmatch(r, "abbbcdef")
+ assert fullmatch(r, "abbbc")
def test_fullmatch_2(self):
r = get_code(r"a(b*?)")
- match = rsre_core.fullmatch(r, "abbb")
+ match = fullmatch(r, "abbb")
assert match.group(1) == "bbb"
- assert not rsre_core.fullmatch(r, "abbbc")
+ assert not fullmatch(r, "abbbc")
def test_fullmatch_3(self):
r = get_code(r"a((bp)*?)c")
- match = rsre_core.fullmatch(r, "abpbpbpc")
+ match = fullmatch(r, "abpbpbpc")
assert match.group(1) == "bpbpbp"
def test_fullmatch_4(self):
r = get_code(r"a((bp)*)c")
- match = rsre_core.fullmatch(r, "abpbpbpc")
+ match = fullmatch(r, "abpbpbpc")
assert match.group(1) == "bpbpbp"
def test_fullmatch_assertion(self):
r = get_code(r"(?=a).b")
- assert rsre_core.fullmatch(r, "ab")
+ assert fullmatch(r, "ab")
r = get_code(r"(?!a)..")
- assert not rsre_core.fullmatch(r, "ab")
+ assert not fullmatch(r, "ab")
def test_range_ignore(self):
from rpython.rlib.unicodedata import unicodedb
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit