Author: Armin Rigo <ar...@tunes.org> Branch: unicode-utf8-re Changeset: r93303:0fd38947b59e Date: 2017-12-08 11:45 +0100 http://bitbucket.org/pypy/pypy/changeset/0fd38947b59e/
Log: in-progress diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -6,9 +6,8 @@ from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.rarithmetic import intmask -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import Utf8StringBuilder # ____________________________________________________________ # @@ -110,11 +109,15 @@ if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): - unicodestr = space.unicode_w(w_string) - if pos > len(unicodestr): - pos = len(unicodestr) - if endpos > len(unicodestr): - endpos = len(unicodestr) + utf8str, length = space.utf8_len_w(w_string) + if pos >= length: + bytepos = len(utf8str) + else: + bytepos = rutf8.codepoint_at_index(..) + + pos = length + if endpos >= length: + endpos = length return rsre_core.UnicodeMatchContext(self.code, unicodestr, pos, endpos, self.flags) elif space.isinstance_w(w_string, space.w_bytes): diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -87,6 +87,13 @@ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_findall_unicode(self): + import re + assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000") + assert ["a", "u"] == re.findall("b(.)", "abalbus") + assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") + assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_finditer(self): import re it = re.finditer("b(.)", "brabbel") _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit