Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94839:388bbf987266
Date: 2018-07-09 06:00 -0700
http://bitbucket.org/pypy/pypy/changeset/388bbf987266/
Log: fixes for py3.5 _sre module
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -639,7 +639,7 @@
utf8len = w_arg._length
# XXX deal with func() returning length or not
result = func(w_arg._utf8.decode('utf8'), errors,
state.encode_error_handler)
- return space.newtuple([space.newbytes(result), space.newint(utf8len)])
+ return space.newtuple([space.newbytes(result.encode('utf8')),
space.newint(utf8len)])
wrap_encoder.__name__ = func.__name__
globals()[name] = wrap_encoder
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -41,7 +41,7 @@
if isinstance(ctx, rsre_core.StrMatchContext):
return space.newbytes(ctx._string[start:end])
elif isinstance(ctx, rsre_core.UnicodeMatchContext):
- return space.newunicode(ctx._unicodestr[start:end])
+ return space.newtext(ctx._unicodestr[start:end])
else:
# unreachable
raise SystemError
@@ -128,7 +128,7 @@
else:
usep = u', '
uflags = u'|'.join([item.decode('latin-1') for item in flag_items])
- return space.newunicode(u're.compile(%s%s%s)' % (u, usep, uflags))
+ return space.newtext(u're.compile(%s%s%s)' % (u, usep, uflags))
def fget_groupindex(self, space):
w_groupindex = self.w_groupindex
@@ -421,10 +421,10 @@
return space.newbytes(strbuilder.build()), n
else:
assert unicodebuilder is not None
- return space.newunicode(unicodebuilder.build()), n
+ return space.newtext(unicodebuilder.build()), n
else:
if space.isinstance_w(w_string, space.w_unicode):
- w_emptystr = space.newunicode(u'')
+ w_emptystr = space.newtext(u'')
else:
w_emptystr = space.newbytes('')
w_item = space.call_method(w_emptystr, 'join',
@@ -528,10 +528,10 @@
ctx = self.ctx
start, end = ctx.match_start, ctx.match_end
w_s = slice_w(space, ctx, start, end, space.w_None)
- u = space.realuicode_w(space.repr(w_s))
+ u = space.realunicode_w(space.repr(w_s))
if len(u) > 50:
u = u[:50]
- return space.newunicode(u'<_sre.SRE_Match object; span=(%d, %d),
match=%s>' %
+ return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d),
match=%s>' %
(start, end, u))
def cannot_copy_w(self):
@@ -681,7 +681,7 @@
elif isinstance(ctx, rsre_core.StrMatchContext):
return space.newbytes(ctx._string)
elif isinstance(ctx, rsre_core.UnicodeMatchContext):
- return space.newunicode(ctx._unicodestr)
+ return space.newtext(ctx._unicodestr)
else:
raise SystemError
diff --git a/pypy/module/_sre/test/test_app_sre.py
b/pypy/module/_sre/test/test_app_sre.py
--- a/pypy/module/_sre/test/test_app_sre.py
+++ b/pypy/module/_sre/test/test_app_sre.py
@@ -4,8 +4,6 @@
import py
from py.test import raises, skip
from pypy.interpreter.gateway import app2interp_temp
-from pypy.module._sre import interp_sre
-from rpython.rlib.rsre.test import support
def init_app_test(cls, space):
@@ -22,37 +20,6 @@
sys.path.pop(0)
""")
-def _test_sre_ctx_(self, str, start, end):
- # Use the MatchContextForTests class, which handles Position
- # instances instead of plain integers. This is used to detect when
- # we're accepting or escaping a Position to app-level, which we
- # should not: Positions are meant to be byte indexes inside a
- # possibly UTF8 string, not character indexes.
- if not isinstance(start, support.Position):
- start = support.Position(start)
- if not isinstance(end, support.Position):
- end = support.Position(end)
- return support.MatchContextForTests(str, start, end, self.flags)
-
-def _bytepos_to_charindex(self, bytepos):
- if isinstance(self.ctx, support.MatchContextForTests):
- return self.ctx._real_pos(bytepos)
- return _org_maker[1](self, bytepos)
-
-def setup_module(mod):
- mod._org_maker = (
- interp_sre.W_SRE_Pattern._make_str_match_context,
- interp_sre.W_SRE_Match.bytepos_to_charindex,
- )
- interp_sre.W_SRE_Pattern._make_str_match_context = _test_sre_ctx_
- interp_sre.W_SRE_Match.bytepos_to_charindex = _bytepos_to_charindex
-
-def teardown_module(mod):
- (
- interp_sre.W_SRE_Pattern._make_str_match_context,
- interp_sre.W_SRE_Match.bytepos_to_charindex,
- ) = mod._org_maker
-
class AppTestSrePy:
def test_magic(self):
@@ -149,9 +116,6 @@
assert ['', 'a', 'l', 'a', 'lla'] == re.split("b(a)", "balballa")
assert ['', 'a', None, 'l', 'u', None, 'lla'] == (
re.split("b([ua]|(s))", "balbulla"))
- assert ["abc"] == re.split("", "abc")
- assert ["abc"] == re.split("X?", "abc")
- assert ["a", "c"] == re.split("b?", "abc")
def test_weakref(self):
import re, _weakref
@@ -285,7 +249,6 @@
assert b"rbd\nbr\n" == re.sub(b"a(.)", br"b\1\n", b"radar")
assert (b"rbd\nbr\n", 2) == re.subn(b"a(.)", br"b\1\n", b"radar")
assert (b"bbbba", 2) == re.subn(b"a", b"b", b"ababa", 2)
- assert "XaXbXcX" == re.sub("", "X", "abc")
def test_sub_unicode(self):
import re
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit