Author: Armin Rigo <[email protected]>
Branch: unicode-utf8-re
Changeset: r93233:3c9e5313506f
Date: 2017-12-01 18:19 +0100
http://bitbucket.org/pypy/pypy/changeset/3c9e5313506f/
Log: Start of step 1
Making sure that we don't use integer arithmetic uncheckedly on
string positions, because they are going to be byte positions with
utf8. In-progress.
diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py
--- a/rpython/rlib/debug.py
+++ b/rpython/rlib/debug.py
@@ -316,14 +316,21 @@
class ExpectedRegularInt(Exception):
pass
+class NegativeArgumentNotAllowed(Exception):
+ pass
+
def check_nonneg(x):
"""Give a translation-time error if 'x' is not known to be non-negative.
To help debugging, this also gives a translation-time error if 'x' is
actually typed as an r_uint (in which case the call to check_nonneg()
is a bit strange and probably unexpected).
"""
- assert type(x)(-1) < 0 # otherwise, 'x' is a r_uint or similar
- assert x >= 0
+ try:
+ assert type(x)(-1) < 0 # otherwise, 'x' is a r_uint or similar
+ except NegativeArgumentNotAllowed:
+ pass
+ else:
+ assert x >= 0
return x
class Entry(ExtRegistryEntry):
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -83,6 +83,9 @@
def __init__(self, msg):
self.msg = msg
+class EndOfString(Exception):
+ pass
+
class AbstractMatchContext(object):
"""Abstract base class"""
_immutable_fields_ = ['pattern[*]', 'flags', 'end']
@@ -135,8 +138,12 @@
"""Similar to str()."""
raise NotImplementedError
+ def debug_check_pos(self, pos):
+ pass
+
def get_mark(self, gid):
- return find_mark(self.match_marks, gid)
+ mark = find_mark(self.match_marks, gid)
+ return self.slowly_convert_byte_pos_to_index(mark)
def flatten_marks(self):
# for testing
@@ -317,7 +324,7 @@
ctx.jitdriver_RepeatOne.jit_merge_point(
self=self, ptr=ptr, ctx=ctx, nextppos=nextppos)
result = sre_match(ctx, nextppos, ptr, self.start_marks)
- ptr -= 1
+ ptr = ctx.prev_or_minus1(ptr)
if result is not None:
self.subresult = result
self.start_ptr = ptr
@@ -331,28 +338,31 @@
reds=['ptr', 'self', 'ctx'],
debugprint=(2, 0)) # indices in 'greens'
- def __init__(self, nextppos, ppos3, maxptr, ptr, marks):
+ def __init__(self, nextppos, ppos3, max_count, ptr, marks):
self.nextppos = nextppos
self.ppos3 = ppos3
- self.maxptr = maxptr
+ self.max_count = max_count
self.start_ptr = ptr
self.start_marks = marks
def find_first_result(self, ctx):
ptr = self.start_ptr
nextppos = self.nextppos
+ max_count = self.max_count
ppos3 = self.ppos3
- while ptr <= self.maxptr:
+ while max_count >= 0:
ctx.jitdriver_MinRepeatOne.jit_merge_point(
self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3)
result = sre_match(ctx, nextppos, ptr, self.start_marks)
if result is not None:
self.subresult = result
self.start_ptr = ptr
+ self.max_count = max_count
return self
if not self.next_char_ok(ctx, ptr, ppos3):
break
- ptr += 1
+ ptr = ctx.next(ptr)
+ max_count -= 1
def find_next_result(self, ctx):
ptr = self.start_ptr
@@ -520,6 +530,7 @@
need all results; in that case we use the method move_to_next_result()
of the MatchResult."""
while True:
+ ctx.debug_check_pos(ptr)
op = ctx.pat(ppos)
ppos += 1
@@ -551,22 +562,25 @@
# <ANY>
if ptr >= ctx.end or rsre_char.is_linebreak(ctx.str(ptr)):
return
- ptr += 1
+ ptr = ctx.next(ptr)
elif op == OPCODE_ANY_ALL:
# match anything
# <ANY_ALL>
if ptr >= ctx.end:
return
- ptr += 1
+ ptr = ctx.next(ptr)
elif op == OPCODE_ASSERT:
# assert subpattern
# <ASSERT> <0=skip> <1=back> <pattern>
- ptr1 = ptr - ctx.pat(ppos+1)
+ try:
+ ptr1 = ctx.prev_n(ptr, ctx.pat(ppos+1), ctx.ZERO)
+ except EndOfString:
+ return
saved = ctx.fullmatch_only
ctx.fullmatch_only = False
- stop = ptr1 < 0 or sre_match(ctx, ppos + 2, ptr1, marks) is None
+ stop = sre_match(ctx, ppos + 2, ptr1, marks) is None
ctx.fullmatch_only = saved
if stop:
return
@@ -576,14 +590,17 @@
elif op == OPCODE_ASSERT_NOT:
# assert not subpattern
# <ASSERT_NOT> <0=skip> <1=back> <pattern>
- ptr1 = ptr - ctx.pat(ppos+1)
- saved = ctx.fullmatch_only
- ctx.fullmatch_only = False
- stop = (ptr1 >= 0 and sre_match(ctx, ppos + 2, ptr1, marks)
- is not None)
- ctx.fullmatch_only = saved
- if stop:
- return
+ try:
+ ptr1 = ctx.prev_n(ptr, ctx.pat(ppos+1), ctx.ZERO)
+ except EndOfString:
+ pass
+ else:
+ saved = ctx.fullmatch_only
+ ctx.fullmatch_only = False
+ stop = sre_match(ctx, ppos + 2, ptr1, marks) is not None
+ ctx.fullmatch_only = saved
+ if stop:
+ return
ppos += ctx.pat(ppos)
elif op == OPCODE_AT:
@@ -661,7 +678,7 @@
elif op == OPCODE_INFO:
# optimization info block
# <INFO> <0=skip> <1=flags> <2=min> ...
- if (ctx.end - ptr) < ctx.pat(ppos+2):
+ if ctx.maximum_distance(ptr, ctx.end) < ctx.pat(ppos+2):
return
ppos += ctx.pat(ppos)
@@ -674,7 +691,7 @@
if ptr >= ctx.end or ctx.str(ptr) != ctx.pat(ppos):
return
ppos += 1
- ptr += 1
+ ptr = ctx.next(ptr)
elif op == OPCODE_LITERAL_IGNORE:
# match literal string, ignoring case
@@ -743,8 +760,9 @@
# use the MAX_REPEAT operator.
# <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
start = ptr
- minptr = start + ctx.pat(ppos+1)
- if minptr > ctx.end:
+ try:
+ minptr = ctx.next_n(start, ctx.pat(ppos+1), ctx.end)
+ except EndOfString:
return # cannot match
ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2),
marks)
@@ -765,7 +783,7 @@
start = ptr
min = ctx.pat(ppos+1)
if min > 0:
- minptr = ptr + min
+ min_count = ptr + min
if minptr > ctx.end:
return # cannot match
# count using pattern min as the maximum
@@ -773,14 +791,12 @@
if ptr < minptr:
return # did not match minimum number of times
- maxptr = ctx.end
+ max_count = sys.maxint
max = ctx.pat(ppos+2)
if max != rsre_char.MAXREPEAT:
- maxptr1 = start + max
- if maxptr1 <= maxptr:
- maxptr = maxptr1
+ max_count = max
nextppos = ppos + ctx.pat(ppos)
- result = MinRepeatOneMatchResult(nextppos, ppos+3, maxptr,
+ result = MinRepeatOneMatchResult(nextppos, ppos+3, max_count,
ptr, marks)
return result.find_first_result(ctx)
@@ -818,7 +834,7 @@
@specializectx
def find_repetition_end(ctx, ppos, ptr, maxcount, marks):
end = ctx.end
- ptrp1 = ptr + 1
+ ptrp1 = ctx.next(ptr)
# First get rid of the cases where we don't have room for any match.
if maxcount <= 0 or ptrp1 > end:
return ptr
@@ -904,7 +920,7 @@
ctx.jitdriver_MatchIn.jit_merge_point(ctx=ctx, ptr=ptr,
end=end, ppos=ppos)
if ptr < end and checkerfn(ctx, ptr, ppos):
- ptr += 1
+ ptr = ctx.next(ptr)
else:
return ptr
elif checkerfn == match_IN_IGNORE:
@@ -927,7 +943,7 @@
@specializectx
def fre(ctx, ptr, end, ppos):
while ptr < end and checkerfn(ctx, ptr, ppos):
- ptr += 1
+ ptr = ctx.next(ptr)
return ptr
fre = func_with_new_name(fre, 'fre_' + checkerfn.__name__)
return fre
diff --git a/rpython/rlib/rsre/test/support.py
b/rpython/rlib/rsre/test/support.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rsre/test/support.py
@@ -0,0 +1,102 @@
+import sys, random
+from rpython.rlib import debug
+from rpython.rlib.rsre.rsre_core import _adjust, match_context
+from rpython.rlib.rsre.rsre_core import StrMatchContext, EndOfString
+
+
+class Position(object):
+ def __init__(self, p):
+ assert isinstance(p, int)
+ if p < 0:
+ raise debug.NegativeArgumentNotAllowed(
+ "making a Position with byte index %r" % p)
+ self._p = p
+ def __repr__(self):
+ return '<Position %d>' % (self._p)
+ def __cmp__(self, other):
+ if not isinstance(other, (Position, MinusOnePosition)):
+ raise TypeError("cannot compare %r with %r" % (self, other))
+ return cmp(self._p, other._p)
+
+class MinusOnePosition(object):
+ _p = -1
+ def __repr__(self):
+ return '<MinusOnePosition>'
+ def __cmp__(self, other):
+ if not isinstance(other, (Position, MinusOnePosition)):
+ raise TypeError("cannot compare %r with %r" % (self, other))
+ return cmp(self._p, other._p)
+
+
+class MatchContextForTests(StrMatchContext):
+ """Concrete subclass for matching in a plain string, tweaked for tests"""
+
+ ZERO = Position(0)
+ MINUS1 = MinusOnePosition()
+ EXACT_DISTANCE = False
+
+ def next(self, position):
+ assert isinstance(position, Position)
+ return Position(position._p + 1)
+
+ def prev_or_minus1(self, position):
+ assert isinstance(position, Position)
+ if position._p == 0:
+ return self.MINUS1
+ return Position(position._p - 1)
+
+ def next_n(self, position, n, end_position):
+ assert isinstance(position, Position)
+ assert isinstance(end_position, Position)
+ assert position._p <= end_position._p
+ r = position._p + n
+ if r > end_position._p:
+ raise EndOfString
+ return Position(r)
+
+ def prev_n(self, position, n, start_position):
+ assert isinstance(position, Position)
+ assert isinstance(start_position, Position)
+ assert position._p >= start_position._p
+ r = position._p - n
+ if r < start_position._p:
+ raise EndOfString
+ return Position(r)
+
+ def slowly_convert_byte_pos_to_index(self, position):
+ assert isinstance(position, Position)
+ return position._p
+
+ def str(self, position):
+ assert isinstance(position, Position)
+ return ord(self._string[position._p])
+
+ def debug_check_pos(self, position):
+ assert isinstance(position, Position)
+
+ #def minimum_distance(self, position_low, position_high):
+ # """Return an estimate. The real value may be higher."""
+ # assert isinstance(position_low, Position)
+ # assert isinstance(position_high, Position)
+ # dist = position_high._p - position_low._p
+ # if dist == 0:
+ # return 0
+ # return random.randrange(1, dist + 1)
+
+ def maximum_distance(self, position_low, position_high):
+ """Return an estimate. The real value may be lower."""
+ assert isinstance(position_low, Position)
+ assert isinstance(position_high, Position)
+ return position_high._p - position_low._p + random.randrange(0, 10)
+
+
+def match(pattern, string, start=0, end=sys.maxint, flags=0, fullmatch=False):
+ start, end = _adjust(start, end, len(string))
+ start = Position(start)
+ end = Position(end)
+ ctx = MatchContextForTests(pattern, string, start, end, flags)
+ ctx.fullmatch_only = fullmatch
+ if match_context(ctx):
+ return ctx
+ else:
+ return None
diff --git a/rpython/rlib/rsre/test/test_match.py
b/rpython/rlib/rsre/test/test_match.py
--- a/rpython/rlib/rsre/test/test_match.py
+++ b/rpython/rlib/rsre/test/test_match.py
@@ -1,6 +1,7 @@
import re, random, py
-from rpython.rlib.rsre import rsre_core, rsre_char
+from rpython.rlib.rsre import rsre_char
from rpython.rlib.rsre.rpy import get_code, VERSION
+from rpython.rlib.rsre.test.support import match
def get_code_and_re(regexp):
@@ -16,61 +17,61 @@
def test_or(self):
r = get_code(r"a|bc|def")
- assert rsre_core.match(r, "a")
- assert rsre_core.match(r, "bc")
- assert rsre_core.match(r, "def")
- assert not rsre_core.match(r, "ghij")
+ assert match(r, "a")
+ assert match(r, "bc")
+ assert match(r, "def")
+ assert not match(r, "ghij")
def test_any(self):
r = get_code(r"ab.cd")
- assert rsre_core.match(r, "abXcdef")
- assert not rsre_core.match(r, "ab\ncdef")
- assert not rsre_core.match(r, "abXcDef")
+ assert match(r, "abXcdef")
+ assert not match(r, "ab\ncdef")
+ assert not match(r, "abXcDef")
def test_any_repetition(self):
r = get_code(r"ab.*cd")
- assert rsre_core.match(r, "abXXXXcdef")
- assert rsre_core.match(r, "abcdef")
- assert not rsre_core.match(r, "abX\nXcdef")
- assert not rsre_core.match(r, "abXXXXcDef")
+ assert match(r, "abXXXXcdef")
+ assert match(r, "abcdef")
+ assert not match(r, "abX\nXcdef")
+ assert not match(r, "abXXXXcDef")
def test_any_all(self):
r = get_code(r"(?s)ab.cd")
- assert rsre_core.match(r, "abXcdef")
- assert rsre_core.match(r, "ab\ncdef")
- assert not rsre_core.match(r, "ab\ncDef")
+ assert match(r, "abXcdef")
+ assert match(r, "ab\ncdef")
+ assert not match(r, "ab\ncDef")
def test_any_all_repetition(self):
r = get_code(r"(?s)ab.*cd")
- assert rsre_core.match(r, "abXXXXcdef")
- assert rsre_core.match(r, "abcdef")
- assert rsre_core.match(r, "abX\nXcdef")
- assert not rsre_core.match(r, "abX\nXcDef")
+ assert match(r, "abXXXXcdef")
+ assert match(r, "abcdef")
+ assert match(r, "abX\nXcdef")
+ assert not match(r, "abX\nXcDef")
def test_assert(self):
r = get_code(r"abc(?=def)(.)")
- res = rsre_core.match(r, "abcdefghi")
+ res = match(r, "abcdefghi")
assert res is not None and res.get_mark(1) == 4
- assert not rsre_core.match(r, "abcdeFghi")
+ assert not match(r, "abcdeFghi")
def test_assert_not(self):
r = get_code(r"abc(?!def)(.)")
- res = rsre_core.match(r, "abcdeFghi")
+ res = match(r, "abcdeFghi")
assert res is not None and res.get_mark(1) == 4
- assert not rsre_core.match(r, "abcdefghi")
+ assert not match(r, "abcdefghi")
def test_lookbehind(self):
r = get_code(r"([a-z]*)(?<=de)")
- assert rsre_core.match(r, "ade")
- res = rsre_core.match(r, "adefg")
+ assert match(r, "ade")
+ res = match(r, "adefg")
assert res is not None and res.get_mark(1) == 3
- assert not rsre_core.match(r, "abc")
- assert not rsre_core.match(r, "X")
- assert not rsre_core.match(r, "eX")
+ assert not match(r, "abc")
+ assert not match(r, "X")
+ assert not match(r, "eX")
def test_negative_lookbehind(self):
def found(s):
- res = rsre_core.match(r, s)
+ res = match(r, s)
assert res is not None
return res.get_mark(1)
r = get_code(r"([a-z]*)(?<!dd)")
@@ -84,125 +85,125 @@
def test_at(self):
r = get_code(r"abc$")
- assert rsre_core.match(r, "abc")
- assert not rsre_core.match(r, "abcd")
- assert not rsre_core.match(r, "ab")
+ assert match(r, "abc")
+ assert not match(r, "abcd")
+ assert not match(r, "ab")
def test_repeated_set(self):
r = get_code(r"[a0x]+f")
- assert rsre_core.match(r, "a0af")
- assert not rsre_core.match(r, "a0yaf")
+ assert match(r, "a0af")
+ assert not match(r, "a0yaf")
def test_category(self):
r = get_code(r"[\sx]")
- assert rsre_core.match(r, "x")
- assert rsre_core.match(r, " ")
- assert not rsre_core.match(r, "n")
+ assert match(r, "x")
+ assert match(r, " ")
+ assert not match(r, "n")
def test_groupref(self):
r = get_code(r"(xx+)\1+$") # match non-prime numbers of x
- assert not rsre_core.match(r, "xx")
- assert not rsre_core.match(r, "xxx")
- assert rsre_core.match(r, "xxxx")
- assert not rsre_core.match(r, "xxxxx")
- assert rsre_core.match(r, "xxxxxx")
- assert not rsre_core.match(r, "xxxxxxx")
- assert rsre_core.match(r, "xxxxxxxx")
- assert rsre_core.match(r, "xxxxxxxxx")
+ assert not match(r, "xx")
+ assert not match(r, "xxx")
+ assert match(r, "xxxx")
+ assert not match(r, "xxxxx")
+ assert match(r, "xxxxxx")
+ assert not match(r, "xxxxxxx")
+ assert match(r, "xxxxxxxx")
+ assert match(r, "xxxxxxxxx")
def test_groupref_ignore(self):
r = get_code(r"(?i)(xx+)\1+$") # match non-prime numbers of x
- assert not rsre_core.match(r, "xX")
- assert not rsre_core.match(r, "xxX")
- assert rsre_core.match(r, "Xxxx")
- assert not rsre_core.match(r, "xxxXx")
- assert rsre_core.match(r, "xXxxxx")
- assert not rsre_core.match(r, "xxxXxxx")
- assert rsre_core.match(r, "xxxxxxXx")
- assert rsre_core.match(r, "xxxXxxxxx")
+ assert not match(r, "xX")
+ assert not match(r, "xxX")
+ assert match(r, "Xxxx")
+ assert not match(r, "xxxXx")
+ assert match(r, "xXxxxx")
+ assert not match(r, "xxxXxxx")
+ assert match(r, "xxxxxxXx")
+ assert match(r, "xxxXxxxxx")
def test_groupref_exists(self):
r = get_code(r"((a)|(b))c(?(2)d)$")
- assert not rsre_core.match(r, "ac")
- assert rsre_core.match(r, "acd")
- assert rsre_core.match(r, "bc")
- assert not rsre_core.match(r, "bcd")
+ assert not match(r, "ac")
+ assert match(r, "acd")
+ assert match(r, "bc")
+ assert not match(r, "bcd")
#
r = get_code(r"((a)|(b))c(?(2)d|e)$")
- assert not rsre_core.match(r, "ac")
- assert rsre_core.match(r, "acd")
- assert not rsre_core.match(r, "ace")
- assert not rsre_core.match(r, "bc")
- assert not rsre_core.match(r, "bcd")
- assert rsre_core.match(r, "bce")
+ assert not match(r, "ac")
+ assert match(r, "acd")
+ assert not match(r, "ace")
+ assert not match(r, "bc")
+ assert not match(r, "bcd")
+ assert match(r, "bce")
def test_in_ignore(self):
r = get_code(r"(?i)[a-f]")
- assert rsre_core.match(r, "b")
- assert rsre_core.match(r, "C")
- assert not rsre_core.match(r, "g")
+ assert match(r, "b")
+ assert match(r, "C")
+ assert not match(r, "g")
r = get_code(r"(?i)[a-f]+$")
- assert rsre_core.match(r, "bCdEf")
- assert not rsre_core.match(r, "g")
- assert not rsre_core.match(r, "aaagaaa")
+ assert match(r, "bCdEf")
+ assert not match(r, "g")
+ assert not match(r, "aaagaaa")
def test_not_literal(self):
r = get_code(r"[^a]")
- assert rsre_core.match(r, "A")
- assert not rsre_core.match(r, "a")
+ assert match(r, "A")
+ assert not match(r, "a")
r = get_code(r"[^a]+$")
- assert rsre_core.match(r, "Bx123")
- assert not rsre_core.match(r, "--a--")
+ assert match(r, "Bx123")
+ assert not match(r, "--a--")
def test_not_literal_ignore(self):
r = get_code(r"(?i)[^a]")
- assert rsre_core.match(r, "G")
- assert not rsre_core.match(r, "a")
- assert not rsre_core.match(r, "A")
+ assert match(r, "G")
+ assert not match(r, "a")
+ assert not match(r, "A")
r = get_code(r"(?i)[^a]+$")
- assert rsre_core.match(r, "Gx123")
- assert not rsre_core.match(r, "--A--")
+ assert match(r, "Gx123")
+ assert not match(r, "--A--")
def test_repeated_single_character_pattern(self):
r = get_code(r"foo(?:(?<=foo)x)+$")
- assert rsre_core.match(r, "foox")
+ assert match(r, "foox")
def test_flatten_marks(self):
r = get_code(r"a(b)c((d)(e))+$")
- res = rsre_core.match(r, "abcdedede")
+ res = match(r, "abcdedede")
assert res.flatten_marks() == [0, 9, 1, 2, 7, 9, 7, 8, 8, 9]
assert res.flatten_marks() == [0, 9, 1, 2, 7, 9, 7, 8, 8, 9]
def test_bug1(self):
# REPEAT_ONE inside REPEAT
r = get_code(r"(?:.+)?B")
- assert rsre_core.match(r, "AB") is not None
+ assert match(r, "AB") is not None
r = get_code(r"(?:AA+?)+B")
- assert rsre_core.match(r, "AAAB") is not None
+ assert match(r, "AAAB") is not None
r = get_code(r"(?:AA+)+?B")
- assert rsre_core.match(r, "AAAB") is not None
+ assert match(r, "AAAB") is not None
r = get_code(r"(?:AA+?)+?B")
- assert rsre_core.match(r, "AAAB") is not None
+ assert match(r, "AAAB") is not None
# REPEAT inside REPEAT
r = get_code(r"(?:(?:xy)+)?B")
- assert rsre_core.match(r, "xyB") is not None
+ assert match(r, "xyB") is not None
r = get_code(r"(?:xy(?:xy)+?)+B")
- assert rsre_core.match(r, "xyxyxyB") is not None
+ assert match(r, "xyxyxyB") is not None
r = get_code(r"(?:xy(?:xy)+)+?B")
- assert rsre_core.match(r, "xyxyxyB") is not None
+ assert match(r, "xyxyxyB") is not None
r = get_code(r"(?:xy(?:xy)+?)+?B")
- assert rsre_core.match(r, "xyxyxyB") is not None
+ assert match(r, "xyxyxyB") is not None
def test_assert_group(self):
r = get_code(r"abc(?=(..)f)(.)")
- res = rsre_core.match(r, "abcdefghi")
+ res = match(r, "abcdefghi")
assert res is not None
assert res.span(2) == (3, 4)
assert res.span(1) == (3, 5)
def test_assert_not_group(self):
r = get_code(r"abc(?!(de)f)(.)")
- res = rsre_core.match(r, "abcdeFghi")
+ res = match(r, "abcdeFghi")
assert res is not None
assert res.span(2) == (3, 4)
# this I definitely classify as Horrendously Implementation Dependent.
@@ -211,39 +212,39 @@
def test_match_start(self):
r = get_code(r"^ab")
- assert rsre_core.match(r, "abc")
- assert not rsre_core.match(r, "xxxabc", start=3)
- assert not rsre_core.match(r, "xx\nabc", start=3)
+ assert match(r, "abc")
+ assert not match(r, "xxxabc", start=3)
+ assert not match(r, "xx\nabc", start=3)
#
r = get_code(r"(?m)^ab")
- assert rsre_core.match(r, "abc")
- assert not rsre_core.match(r, "xxxabc", start=3)
- assert rsre_core.match(r, "xx\nabc", start=3)
+ assert match(r, "abc")
+ assert not match(r, "xxxabc", start=3)
+ assert match(r, "xx\nabc", start=3)
def test_match_end(self):
r = get_code("ab")
- assert rsre_core.match(r, "abc")
- assert rsre_core.match(r, "abc", end=333)
- assert rsre_core.match(r, "abc", end=3)
- assert rsre_core.match(r, "abc", end=2)
- assert not rsre_core.match(r, "abc", end=1)
- assert not rsre_core.match(r, "abc", end=0)
- assert not rsre_core.match(r, "abc", end=-1)
+ assert match(r, "abc")
+ assert match(r, "abc", end=333)
+ assert match(r, "abc", end=3)
+ assert match(r, "abc", end=2)
+ assert not match(r, "abc", end=1)
+ assert not match(r, "abc", end=0)
+ assert not match(r, "abc", end=-1)
def test_match_bug1(self):
r = get_code(r'(x??)?$')
- assert rsre_core.match(r, "x")
+ assert match(r, "x")
def test_match_bug2(self):
r = get_code(r'(x??)??$')
- assert rsre_core.match(r, "x")
+ assert match(r, "x")
def test_match_bug3(self):
if VERSION == "2.7.5":
py.test.skip("pattern fails to compile with exactly 2.7.5 "
"(works on 2.7.3 and on 2.7.trunk though)")
r = get_code(r'([ax]*?x*)?$')
- assert rsre_core.match(r, "aaxaa")
+ assert match(r, "aaxaa")
def test_bigcharset(self):
for i in range(100):
@@ -252,10 +253,10 @@
pattern = u'[%s]' % (u''.join(chars),)
r = get_code(pattern)
for c in chars:
- assert rsre_core.match(r, c)
+ assert match(r, c)
for i in range(200):
c = unichr(random.randrange(0x0, 0xD000))
- res = rsre_core.match(r, c)
+ res = match(r, c)
if c in chars:
assert res is not None
else:
@@ -264,14 +265,14 @@
def test_simple_match_1(self):
r = get_code(r"ab*bbbbbbbc")
print r
- match = rsre_core.match(r, "abbbbbbbbbcdef")
- assert match
- assert match.match_end == 11
+ m = match(r, "abbbbbbbbbcdef")
+ assert m
+ assert m.match_end == 11
def test_empty_maxuntil(self):
r = get_code("\\{\\{((?:.*?)+)\\}\\}")
- match = rsre_core.match(r, "{{a}}{{b}}")
- assert match.group(1) == "a"
+ m = match(r, "{{a}}{{b}}")
+ assert m.group(1) == "a"
def test_fullmatch_1(self):
r = get_code(r"ab*c")
@@ -307,4 +308,4 @@
r = get_code(u"[\U00010428-\U0001044f]", re.I)
assert r.count(27) == 1 # OPCODE_RANGE
r[r.index(27)] = 32 # => OPCODE_RANGE_IGNORE
- assert rsre_core.match(r, u"\U00010428")
+ assert match(r, u"\U00010428")
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit