Author: Armin Rigo <ar...@tunes.org> Branch: unicode-utf8-re Changeset: r93242:dd8e0cdfa795 Date: 2017-12-03 15:20 +0100 http://bitbucket.org/pypy/pypy/changeset/dd8e0cdfa795/
Log: in-progress. test_match passes again diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -149,7 +149,8 @@ # for testing if self.match_marks_flat is None: self._compute_flattened_marks() - return self.match_marks_flat + return [self.slowly_convert_byte_pos_to_index(i) + for i in self.match_marks_flat] def _compute_flattened_marks(self): self.match_marks_flat = [self.match_start, self.match_end] @@ -371,7 +372,7 @@ ptr = self.start_ptr if not self.next_char_ok(ctx, ptr, self.ppos3): return - self.start_ptr = ptr + 1 + self.start_ptr = ctx.next(ptr) return self.find_first_result(ctx) def next_char_ok(self, ctx, ptr, ppos): @@ -717,7 +718,7 @@ if ptr >= ctx.end or ctx.str(ptr) == ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_NOT_LITERAL_IGNORE: # match if it's not a literal string, ignoring case @@ -725,7 +726,7 @@ if ptr >= ctx.end or ctx.lowstr(ptr) == ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_REPEAT: # general repeat. in this version of the re module, all the work @@ -786,9 +787,10 @@ start = ptr min = ctx.pat(ppos+1) if min > 0: - min_count = ptr + min - if minptr > ctx.end: - return # cannot match + try: + minptr = ctx.next_n(ptr, min, ctx.end) + except EndOfString: + return # cannot match # count using pattern min as the maximum ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks) if ptr < minptr: @@ -990,11 +992,12 @@ def sre_at(ctx, atcode, ptr): if (atcode == AT_BEGINNING or atcode == AT_BEGINNING_STRING): - return ptr == 0 + return ptr == ctx.ZERO elif atcode == AT_BEGINNING_LINE: - prevptr = ptr - 1 - return prevptr < 0 or rsre_char.is_linebreak(ctx.str(prevptr)) + if ptr <= ctx.ZERO: + return True + return rsre_char.is_linebreak(ctx.str(ctx.prev(ptr))) elif atcode == AT_BOUNDARY: return at_boundary(ctx, ptr) diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -56,6 +56,8 @@ return Position(r) def slowly_convert_byte_pos_to_index(self, position): + if type(position) is int and position == -1: + return -1 assert isinstance(position, Position) return position._p @@ -107,3 +109,6 @@ return ctx else: return None + +def fullmatch(pattern, string, start=0, end=sys.maxint, flags=0): + return match(pattern, string, start, end, flags, fullmatch=True) diff --git a/rpython/rlib/rsre/test/test_match.py b/rpython/rlib/rsre/test/test_match.py --- a/rpython/rlib/rsre/test/test_match.py +++ b/rpython/rlib/rsre/test/test_match.py @@ -1,7 +1,7 @@ import re, random, py from rpython.rlib.rsre import rsre_char from rpython.rlib.rsre.rpy import get_code, VERSION -from rpython.rlib.rsre.test.support import match +from rpython.rlib.rsre.test.support import match, fullmatch, Position def get_code_and_re(regexp): @@ -267,7 +267,7 @@ print r m = match(r, "abbbbbbbbbcdef") assert m - assert m.match_end == 11 + assert m.match_end == Position(11) def test_empty_maxuntil(self): r = get_code("\\{\\{((?:.*?)+)\\}\\}") @@ -276,30 +276,30 @@ def test_fullmatch_1(self): r = get_code(r"ab*c") - assert not rsre_core.fullmatch(r, "abbbcdef") - assert rsre_core.fullmatch(r, "abbbc") + assert not fullmatch(r, "abbbcdef") + assert fullmatch(r, "abbbc") def test_fullmatch_2(self): r = get_code(r"a(b*?)") - match = rsre_core.fullmatch(r, "abbb") + match = fullmatch(r, "abbb") assert match.group(1) == "bbb" - assert not rsre_core.fullmatch(r, "abbbc") + assert not fullmatch(r, "abbbc") def test_fullmatch_3(self): r = get_code(r"a((bp)*?)c") - match = rsre_core.fullmatch(r, "abpbpbpc") + match = fullmatch(r, "abpbpbpc") assert match.group(1) == "bpbpbp" def test_fullmatch_4(self): r = get_code(r"a((bp)*)c") - match = rsre_core.fullmatch(r, "abpbpbpc") + match = fullmatch(r, "abpbpbpc") assert match.group(1) == "bpbpbp" def test_fullmatch_assertion(self): r = get_code(r"(?=a).b") - assert rsre_core.fullmatch(r, "ab") + assert fullmatch(r, "ab") r = get_code(r"(?!a)..") - assert not rsre_core.fullmatch(r, "ab") + assert not fullmatch(r, "ab") def test_range_ignore(self): from rpython.rlib.unicodedata import unicodedb _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit