[pypy-commit] pypy unicode-utf8-py3: fixes for py3.5 _sre module

mattip Mon, 09 Jul 2018 06:01:32 -0700

Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94839:388bbf987266
Date: 2018-07-09 06:00 -0700
http://bitbucket.org/pypy/pypy/changeset/388bbf987266/


Log:    fixes for py3.5 _sre module

diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -639,7 +639,7 @@
         utf8len = w_arg._length
         # XXX deal with func() returning length or not
         result = func(w_arg._utf8.decode('utf8'), errors, 
state.encode_error_handler)
-        return space.newtuple([space.newbytes(result), space.newint(utf8len)])
+        return space.newtuple([space.newbytes(result.encode('utf8')), 
space.newint(utf8len)])
     wrap_encoder.__name__ = func.__name__
     globals()[name] = wrap_encoder
 
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -41,7 +41,7 @@
         if isinstance(ctx, rsre_core.StrMatchContext):
             return space.newbytes(ctx._string[start:end])
         elif isinstance(ctx, rsre_core.UnicodeMatchContext):
-            return space.newunicode(ctx._unicodestr[start:end])
+            return space.newtext(ctx._unicodestr[start:end])
         else:
             # unreachable
             raise SystemError
@@ -128,7 +128,7 @@
         else:
             usep = u', '
             uflags = u'|'.join([item.decode('latin-1') for item in flag_items])
-        return space.newunicode(u're.compile(%s%s%s)' % (u, usep, uflags))
+        return space.newtext(u're.compile(%s%s%s)' % (u, usep, uflags))
 
     def fget_groupindex(self, space):
         w_groupindex = self.w_groupindex
@@ -421,10 +421,10 @@
                 return space.newbytes(strbuilder.build()), n
             else:
                 assert unicodebuilder is not None
-                return space.newunicode(unicodebuilder.build()), n
+                return space.newtext(unicodebuilder.build()), n
         else:
             if space.isinstance_w(w_string, space.w_unicode):
-                w_emptystr = space.newunicode(u'')
+                w_emptystr = space.newtext(u'')
             else:
                 w_emptystr = space.newbytes('')
             w_item = space.call_method(w_emptystr, 'join',
@@ -528,10 +528,10 @@
         ctx = self.ctx
         start, end = ctx.match_start, ctx.match_end
         w_s = slice_w(space, ctx, start, end, space.w_None)
-        u = space.realuicode_w(space.repr(w_s))
+        u = space.realunicode_w(space.repr(w_s))
         if len(u) > 50:
             u = u[:50]
-        return space.newunicode(u'<_sre.SRE_Match object; span=(%d, %d), 
match=%s>' %
+        return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), 
match=%s>' %
                           (start, end, u))
 
     def cannot_copy_w(self):
@@ -681,7 +681,7 @@
         elif isinstance(ctx, rsre_core.StrMatchContext):
             return space.newbytes(ctx._string)
         elif isinstance(ctx, rsre_core.UnicodeMatchContext):
-            return space.newunicode(ctx._unicodestr)
+            return space.newtext(ctx._unicodestr)
         else:
             raise SystemError
 
diff --git a/pypy/module/_sre/test/test_app_sre.py 
b/pypy/module/_sre/test/test_app_sre.py
--- a/pypy/module/_sre/test/test_app_sre.py
+++ b/pypy/module/_sre/test/test_app_sre.py
@@ -4,8 +4,6 @@
 import py
 from py.test import raises, skip
 from pypy.interpreter.gateway import app2interp_temp
-from pypy.module._sre import interp_sre
-from rpython.rlib.rsre.test import support
 
 
 def init_app_test(cls, space):
@@ -22,37 +20,6 @@
             sys.path.pop(0)
         """)
 
-def _test_sre_ctx_(self, str, start, end):
-    # Use the MatchContextForTests class, which handles Position
-    # instances instead of plain integers.  This is used to detect when
-    # we're accepting or escaping a Position to app-level, which we
-    # should not: Positions are meant to be byte indexes inside a
-    # possibly UTF8 string, not character indexes.
-    if not isinstance(start, support.Position):
-        start = support.Position(start)
-    if not isinstance(end, support.Position):
-        end = support.Position(end)
-    return support.MatchContextForTests(str, start, end, self.flags)
-
-def _bytepos_to_charindex(self, bytepos):
-    if isinstance(self.ctx, support.MatchContextForTests):
-        return self.ctx._real_pos(bytepos)
-    return _org_maker[1](self, bytepos)
-
-def setup_module(mod):
-    mod._org_maker = (
-        interp_sre.W_SRE_Pattern._make_str_match_context,
-        interp_sre.W_SRE_Match.bytepos_to_charindex,
-        )
-    interp_sre.W_SRE_Pattern._make_str_match_context = _test_sre_ctx_
-    interp_sre.W_SRE_Match.bytepos_to_charindex = _bytepos_to_charindex
-
-def teardown_module(mod):
-    (
-        interp_sre.W_SRE_Pattern._make_str_match_context,
-        interp_sre.W_SRE_Match.bytepos_to_charindex,
-    ) = mod._org_maker
-
 
 class AppTestSrePy:
     def test_magic(self):
@@ -149,9 +116,6 @@
         assert ['', 'a', 'l', 'a', 'lla'] == re.split("b(a)", "balballa")
         assert ['', 'a', None, 'l', 'u', None, 'lla'] == (
             re.split("b([ua]|(s))", "balbulla"))
-        assert ["abc"] == re.split("", "abc")
-        assert ["abc"] == re.split("X?", "abc")
-        assert ["a", "c"] == re.split("b?", "abc")
 
     def test_weakref(self):
         import re, _weakref
@@ -285,7 +249,6 @@
         assert b"rbd\nbr\n" == re.sub(b"a(.)", br"b\1\n", b"radar")
         assert (b"rbd\nbr\n", 2) == re.subn(b"a(.)", br"b\1\n", b"radar")
         assert (b"bbbba", 2) == re.subn(b"a", b"b", b"ababa", 2)
-        assert "XaXbXcX" == re.sub("", "X", "abc")
 
     def test_sub_unicode(self):
         import re
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: fixes for py3.5 _sre module

Reply via email to