[pypy-commit] pypy unicode-utf8-py3: remove most runicode from pypy, refactor FormatErrorW, add utf8 to SocketError

mattip Mon, 11 Feb 2019 01:12:58 -0800

Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95933:914068c8b956
Date: 2019-02-11 00:03 +0200
http://bitbucket.org/pypy/pypy/changeset/914068c8b956/


Log:    remove most runicode from pypy, refactor FormatErrorW, add utf8 to
        SocketError

diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -9,7 +9,7 @@
 from rpython.rlib.objectmodel import we_are_translated, specialize
 from rpython.rlib.objectmodel import dont_inline, not_rpython
 from rpython.rlib import rstack, rstackovf
-from rpython.rlib import rwin32, runicode
+from rpython.rlib import rwin32, rutf8
 
 from pypy.interpreter import debug
 
@@ -20,8 +20,8 @@
 def strerror(errno):
     """Translate an error code to a unicode message string."""
     from pypy.module._codecs.locale import str_decode_locale_surrogateescape
-    uni = str_decode_locale_surrogateescape(os.strerror(errno))
-    return runicode.unicode_encode_utf_8(uni, len(uni), 'strict'), len(uni)
+    utf8, lgt = str_decode_locale_surrogateescape(os.strerror(errno))
+    return utf8, lgt
 
 class OperationError(Exception):
     """Interpreter-level exception that signals an exception that should be
@@ -524,7 +524,6 @@
                         result = str(value.encode('utf-8'))
                         lgt += len(value)
                     else:
-                        from rpython.rlib import rutf8
                         result = str(value)
                         try:
                             lgt += rutf8.check_utf8(result, True)
@@ -632,13 +631,14 @@
     if rwin32.WIN32 and isinstance(e, WindowsError):
         winerror = e.winerror
         try:
-            msg = rwin32.FormatErrorW(winerror)
+            msg, lgt = rwin32.FormatErrorW(winerror)
         except ValueError:
-            msg = u'Windows Error %d' % winerror
+            msg = 'Windows Error %d' % winerror
+            lgt = len(msg)
         w_errno = space.w_None
         w_winerror = space.newint(winerror)
-        msg_utf8 = runicode.unicode_encode_utf_8(msg, len(msg), 'strict')
-        w_msg = space.newtext(msg_utf8, len(msg))
+        msg_utf8 = rutf8.str_encode_utf_8(msg, lgt, 'strict')
+        w_msg = space.newtext(msg_utf8, lgt)
     else:
         errno = e.errno
         if errno == EINTR:
diff --git a/pypy/interpreter/test/test_unicodehelper.py 
b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -9,7 +9,7 @@
 import struct
 import sys
 from pypy.interpreter.unicodehelper import (
-    encode_utf8, str_decode_utf8, utf8_encode_utf_32_be, str_decode_utf_32_be)
+    str_decode_utf8, utf8_encode_utf_32_be, str_decode_utf_32_be)
 from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp
 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
 from pypy.interpreter import unicodehelper as uh
@@ -30,35 +30,6 @@
 def decode_utf8(u):
     return str_decode_utf8(u, "strict", True, fake_eh)
 
-def test_encode_utf8():
-    space = FakeSpace()
-    assert encode_utf8(space, u"abc") == "abc"
-    assert encode_utf8(space, u"\u1234") == "\xe1\x88\xb4"
-    py.test.raises(Hit, encode_utf8, space, u"\ud800")
-    py.test.raises(Hit, encode_utf8, space, u"\udc00")
-    if option.runappdirect or sys.maxunicode > 0xFFFF:
-        # for the following test, go to lengths to avoid CPython's
-        # optimizer and .pyc file storage, which collapse the two
-        # surrogates into one
-        c = u"\udc00"
-        py.test.raises(Hit, encode_utf8, space, u"\ud800" + c)
-
-def test_encode_utf8_allow_surrogates():
-    sp = FakeSpace()
-    assert encode_utf8(sp, u"\ud800", allow_surrogates=True) == "\xed\xa0\x80"
-    assert encode_utf8(sp, u"\udc00", allow_surrogates=True) == "\xed\xb0\x80"
-    c = u"\udc00"
-    got = encode_utf8(sp, u"\ud800" + c, allow_surrogates=True)
-    assert got == "\xf0\x90\x80\x80"
-
-def test_encode_utf8sp():
-    sp = FakeSpace()
-    assert encode_utf8sp(sp, u"\ud800") == "\xed\xa0\x80"
-    assert encode_utf8sp(sp, u"\udc00") == "\xed\xb0\x80"
-    c = u"\udc00"
-    got = encode_utf8sp(sp, u"\ud800" + c)
-    assert got == "\xed\xa0\x80\xed\xb0\x80"
-
 def test_decode_utf8():
     assert decode_utf8("abc") == ("abc", 3, 3)
     assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 1, 3)
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -62,13 +62,18 @@
     if _WIN32:
         bytes = space.bytes_w(w_string)
         slen = len(bytes)
-        uni, size = runicode.str_decode_mbcs(bytes, slen, 'strict', final=True,
+        uni, lgt = runicode.str_decode_mbcs(bytes, slen, 'strict', final=True,
                            errorhandler=errorhandler, force_ignore=False)
+        
+        utf8 = uni.encode('utf-8')
+        
+        utf8 = uni.encode('utf-8')
+        
+        utf8 = uni.encode('utf-8')
     elif 0 and  _MACOSX:
         bytes = space.bytes_w(w_string)
-        utf8 = str_decode_utf8(bytes, 'surrogateescape', True, errorhandler,
-                               allow_surrogates=False)[0]
-        uni = utf8.decode('utf-8')
+        utf8, lgt, pos  = str_decode_utf8(bytes, 'surrogateescape', True,
+                                    errorhandler, allow_surrogates=False)
     elif space.sys.filesystemencoding is None or state.codec_need_encodings:
         # bootstrap check: if the filesystemencoding isn't initialized
         # or the filesystem codec is implemented in Python we cannot
@@ -77,15 +82,13 @@
         from pypy.module._codecs.locale import (
             str_decode_locale_surrogateescape)
         bytes = space.bytes_w(w_string)
-        uni = str_decode_locale_surrogateescape(bytes)
+        utf8, lgt = str_decode_locale_surrogateescape(bytes)
     else:
         from pypy.module.sys.interp_encoding import getfilesystemencoding
         return space.call_method(w_string, 'decode',
                                  getfilesystemencoding(space),
                                  space.newtext('surrogateescape'))
-    assert isinstance(uni, unicode)
-    return space.newtext(runicode.unicode_encode_utf_8(uni,
-                                 len(uni), 'strict', allow_surrogates=True), 
len(uni))
+    return space.newtext(utf8, lgt)
 
 def fsencode(space, w_uni):
     from pypy.module._codecs import interp_codecs
@@ -318,7 +321,7 @@
         from rpython.rlib import runicode
         res, size = runicode.str_decode_mbcs(s, slen, errors, final=final,
                            errorhandler=errorhandler, 
force_ignore=force_ignore)
-        res_utf8 = runicode.unicode_encode_utf_8(res, len(res), 'strict')
+        res_utf8 = unicode_encode_utf_8(res, len(res), 'strict')
         return res_utf8, len(res), len(res)
 
 def str_decode_utf8(s, errors, final, errorhandler, allow_surrogates=False):
@@ -1010,6 +1013,7 @@
 @specialize.memo()
 def _encode_unicode_error_handler(space):
     # Fast version of the "strict" errors handler.
+    # used only in (unused) encode_utf8
     from rpython.rlib import runicode
     def raise_unicode_exception_encode(errors, encoding, msg, uni,
                                        startingpos, endingpos):
@@ -1032,6 +1036,7 @@
     # which never raises UnicodeEncodeError.  Surrogate pairs are then
     # allowed, either paired or lone.  A paired surrogate is considered
     # like the non-BMP character it stands for.  See also *_utf8sp().
+    xxx
     from rpython.rlib import runicode
     assert isinstance(uni, unicode)
     return runicode.unicode_encode_utf_8(
@@ -1051,7 +1056,7 @@
 def decode_utf8sp(space, string):
     # Surrogate-preserving utf-8 decoding.  Assuming there is no
     # encoding error, it should always be reversible, and the reverse is
-    # encode_utf8sp().
+    # unused encode_utf8sp().
     return str_decode_utf8(string, "string", True, decode_never_raise,
                            allow_surrogates=True)
 
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -39,7 +39,7 @@
             so it needs to be converted by the codec
 
             Returns (str_or_none, newpos) as error
-            handlers used outside runicode return utf8
+            handlers return utf8 so we add whether they used unicode or bytes
             """
             w_errorhandler = lookup_error(space, errors)
             if decode:
@@ -455,8 +455,7 @@
             ch = 0
         if ch == 0:
             raise OperationError(space.type(w_exc), w_exc)
-        ch_utf8 = runicode.unicode_encode_utf_8(unichr(ch), 1, 'strict',
-                                                allow_surrogates=True)
+        ch_utf8 = rutf8.unichr_as_utf8(ch, allow_surrogates=True)
         return space.newtuple([space.newtext(ch_utf8, 1),
                                space.newint(start + bytelength)])
     else:
diff --git a/pypy/module/_codecs/locale.py b/pypy/module/_codecs/locale.py
--- a/pypy/module/_codecs/locale.py
+++ b/pypy/module/_codecs/locale.py
@@ -6,10 +6,12 @@
 import py
 import sys
 from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.rstring import UnicodeBuilder, assert_str0
-from rpython.rlib.runicode import (code_to_unichr,
+from rpython.rlib.rstring import StringBuilder, assert_str0
+from rpython.rlib.runicode import (
     default_unicode_error_decode, default_unicode_error_encode)
+from rpython.rlib.rutf8 import unichr_as_utf8
 from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib.rarithmetic import r_uint
 from rpython.translator import cdir
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 
@@ -38,15 +40,13 @@
                                   lltype.Void)
 
 
-def unicode_encode_locale_surrogateescape(u, errorhandler=None):
+def unicode_encode_locale_surrogateescape(u):
     """Encode unicode via the locale codecs (POSIX wcstombs) with the
     surrogateescape handler.
 
-    The optional errorhandler is only called in the case of fatal
-    errors.
+    The errorhandler is never called
     """
-    if errorhandler is None:
-        errorhandler = default_unicode_error_encode
+    errorhandler = default_unicode_error_encode
 
     with lltype.scoped_alloc(rffi.SIZE_TP.TO, 1) as errorposp:
         with scoped_unicode2rawwcharp(u) as ubuf:
@@ -64,15 +64,14 @@
             pypy_wchar2char_free(sbuf)
 
 
-def str_decode_locale_surrogateescape(s, errorhandler=None):
+def str_decode_locale_surrogateescape(s):
     """Decode strs via the locale codecs (POSIX mrbtowc) with the
     surrogateescape handler.
 
-    The optional errorhandler is only called in the case of fatal
+    The errorhandler is never called
     errors.
     """
-    if errorhandler is None:
-        errorhandler = default_unicode_error_decode
+    errorhandler = default_unicode_error_decode
 
     with lltype.scoped_alloc(rffi.SIZE_TP.TO, 1) as sizep:
         with rffi.scoped_str2charp(s) as sbuf:
@@ -82,7 +81,7 @@
                 errmsg = _errmsg("pypy_char2wchar")
                 errorhandler('strict', 'filesystemencoding', errmsg, s, 0, 1)
             size = rffi.cast(lltype.Signed, sizep[0])
-            return rawwcharp2unicoden(ubuf, size)
+            return rawwcharp2utf8en(ubuf, size), size
         finally:
             pypy_char2wchar_free(ubuf)
 
@@ -138,14 +137,17 @@
 _unicode2rawwcharp_loop._annenforceargs_ = [unicode, None]
 
 
-def rawwcharp2unicoden(wcp, maxlen):
-    b = UnicodeBuilder(maxlen)
+def rawwcharp2utf8en(wcp, maxlen):
+    b = StringBuilder(maxlen)
     i = 0
-    while i < maxlen and rffi.cast(lltype.Signed, wcp[i]) != 0:
-        b.append(code_to_unichr(wcp[i]))
+    while i < maxlen:
+        v = r_uint(wcp[i])
+        if v == 0:
+            break
+        b.append(unichr_as_utf8(v, True))
         i += 1
     return assert_str0(b.build())
-rawwcharp2unicoden._annenforceargs_ = [None, int]
+rawwcharp2utf8en._annenforceargs_ = [None, int]
 
 
 def _should_merge_surrogates():
diff --git a/pypy/module/_codecs/test/test_locale.py 
b/pypy/module/_codecs/test/test_locale.py
--- a/pypy/module/_codecs/test/test_locale.py
+++ b/pypy/module/_codecs/test/test_locale.py
@@ -57,8 +57,8 @@
         locale_decoder = str_decode_locale_surrogateescape
         utf8_decoder = self.getdecoder('utf-8')
         for val in 'foo', ' \xe6\x97\xa5\xe6\x9c\xac', '\xf0\x93\x88\x8c':
-            assert (locale_decoder(val).encode('utf8') ==
-                    utf8_decoder(val, 'strict', True, None)[0])
+            assert (locale_decoder(val) ==
+                    utf8_decoder(val, 'strict', True, None)[:2])
 
     def test_decode_locale_errorhandler(self):
         self.setlocale("en_US.UTF-8")
@@ -67,5 +67,5 @@
         decode_error_handler = self.getstate().decode_error_handler
         val = 'foo\xe3bar'
         expected = utf8_decoder(val, 'surrogateescape', True,
-                                decode_error_handler)[0]
-        assert locale_decoder(val).encode('utf8') == expected
+                                decode_error_handler)
+        assert locale_decoder(val) == expected[:2]
diff --git a/pypy/module/_socket/interp_socket.py 
b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -855,7 +855,7 @@
 
 @specialize.arg(2)
 def converted_error(space, e, eintr_retry=False):
-    message = e.get_msg_unicode()
+    message, lgt = e.get_msg_utf8()
     w_exception_class = get_error(space, e.applevelerrcls)
     if isinstance(e, SocketErrorWithErrno):
         if e.errno == errno.EINTR:
@@ -863,9 +863,10 @@
             if eintr_retry:
                 return       # only return None if eintr_retry==True
         w_exception = space.call_function(w_exception_class, 
space.newint(e.errno),
-                                      space.newtext(message))
+                                      space.newtext(message, lgt))
     else:
-        w_exception = space.call_function(w_exception_class, 
space.newtext(message))
+        w_exception = space.call_function(w_exception_class,
+                                          space.newtext(message, lgt))
     raise OperationError(w_exception_class, w_exception)
 
 def explicit_socket_error(space, msg):
diff --git a/pypy/module/array/reconstructor.py 
b/pypy/module/array/reconstructor.py
--- a/pypy/module/array/reconstructor.py
+++ b/pypy/module/array/reconstructor.py
@@ -5,7 +5,7 @@
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.interpreter.error import oefmt
 from pypy.interpreter.argument import Arguments
-from rpython.rlib import runicode, rbigint
+from rpython.rlib import rutf8, rbigint
 from rpython.rlib.rstruct import ieee
 from rpython.rtyper.lltypesystem import rffi
 
@@ -155,7 +155,7 @@
     elif typecode == 'B':
         return UNSIGNED_INT8
     elif typecode == 'u':
-        if runicode.MAXUNICODE == 0xffff:
+        if rutf8.MAXUNICODE == 0xffff:
             return UTF16_LE + IS_BIG_ENDIAN
         else:
             return UTF32_LE + IS_BIG_ENDIAN
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -1,7 +1,7 @@
 from rpython.rtyper.lltypesystem import rffi, lltype
 from rpython.rlib import rstring
 from rpython.rlib.rarithmetic import widen
-from rpython.rlib import rstring, runicode, rutf8
+from rpython.rlib import rstring, rutf8
 from rpython.tool.sourcetools import func_renamer
 
 from pypy.interpreter.error import OperationError, oefmt
@@ -83,12 +83,11 @@
     Creates the unicode in the interpreter. The PyUnicodeObject buffer must not
     be modified after this call.
     """
-    s = rffi.wcharpsize2unicode(get_wbuffer(py_obj), get_wsize(py_obj))
-    s_utf8 = runicode.unicode_encode_utf_8(s, len(s), 'strict',
-                                           allow_surrogates=True)
+    lgt = get_wsize(py_obj)
+    s_utf8 = rffi.wcharpsize2utf8(get_wbuffer(py_obj), lgt)
     w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
     w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
-    w_obj.__init__(s_utf8, len(s))
+    w_obj.__init__(s_utf8, lgt)
     track_reference(space, py_obj, w_obj)
     return w_obj
 
diff --git a/pypy/module/select/interp_select.py 
b/pypy/module/select/interp_select.py
--- a/pypy/module/select/interp_select.py
+++ b/pypy/module/select/interp_select.py
@@ -80,10 +80,10 @@
                     if timeout < 0:
                         timeout = 0
                     continue
-                message = e.get_msg_unicode()
+                message, lgt = e.get_msg_utf8()
                 raise OperationError(space.w_OSError,
                                      space.newtuple([space.newint(e.errno),
-                                                     space.newtext(message)]))
+                                                 space.newtext(message, lgt)]))
             finally:
                 self.running = False
             break
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -7,8 +7,6 @@
 from rpython.rlib.rstring import (
     StringBuilder, split, rsplit, UnicodeBuilder, replace_count, startswith,
     endswith)
-from rpython.rlib.runicode import (
-    unicode_encode_utf8_forbid_surrogates, SurrogateError)
 from rpython.rlib import rutf8, jit
 
 from pypy.interpreter import unicodehelper
@@ -1924,8 +1922,9 @@
         result.append(chr(uchr))
     return result.build()
 
+from rpython.rlib.runicode import unicode_encode_utf8_forbid_surrogates
 @jit.elidable
-def g_encode_utf8(value):
+def XXX_g_encode_utf8(value):
     """This is a global function because of jit.conditional_call_value"""
     return unicode_encode_utf8_forbid_surrogates(value, len(value))
 
diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py
--- a/rpython/rlib/_rsocket_rffi.py
+++ b/rpython/rlib/_rsocket_rffi.py
@@ -1369,8 +1369,15 @@
         return rwin32.FormatError(errno)
 
     def socket_strerror_unicode(errno):
+        return rwin32.FormatErrorW(errno)[0]
+
+    def gai_strerror_unicode(errno):
+        return rwin32.FormatErrorW(errno)[0]
+
+    def socket_strerror_utf8(errno):
         return rwin32.FormatErrorW(errno)
-    def gai_strerror_unicode(errno):
+
+    def gai_strerror_utf8(errno):
         return rwin32.FormatErrorW(errno)
 
     # WinSock does not use a bitmask in select, and uses
@@ -1386,7 +1393,16 @@
 
     def socket_strerror_unicode(errno):
         return socket_strerror_str(errno).decode('latin-1')
+
     def gai_strerror_unicode(errno):
         return gai_strerror_str(errno).decode('latin-1')
 
+    def socket_strerror_utf8(errno):
+        msg = socket_strerror_str(errno)
+        return msg, len(msg)
+
+    def gai_strerror_utf8(errno):
+        msg = gai_strerror_str(errno)
+        return msg, len(msg)
+
     MAX_FD_SIZE = FD_SETSIZE
diff --git a/rpython/rlib/rdynload.py b/rpython/rlib/rdynload.py
--- a/rpython/rlib/rdynload.py
+++ b/rpython/rlib/rdynload.py
@@ -228,18 +228,16 @@
         res = rwin32.LoadLibrary(name)
         if not res:
             err = rwin32.GetLastError_saved()
-            ustr = rwin32.FormatErrorW(err)
-            # DLOpenError unicode msg breaks translation of cpyext 
create_extension_module
-            raise DLOpenError(ustr.encode('utf-8'))
+            ustr, lgt = rwin32.FormatErrorW(err)
+            raise DLOpenError(ustr)
         return res
 
     def dlopenex(name):
         res = rwin32.LoadLibraryExA(name)
         if not res:
             err = rwin32.GetLastError_saved()
-            ustr = rwin32.FormatErrorW(err)
-            # DLOpenError unicode msg breaks translation of cpyext 
create_extension_module
-            raise DLOpenError(ustr.encode('utf-8'))
+            ustr, lgt = rwin32.FormatErrorW(err)
+            raise DLOpenError(ustr)
         return res
 
     def dlopenU(name, mode=-1):
@@ -247,9 +245,8 @@
         res = rwin32.LoadLibraryW(name)
         if not res:
             err = rwin32.GetLastError_saved()
-            ustr = rwin32.FormatErrorW(err)
-            # DLOpenError unicode msg breaks translation of cpyext 
create_extension_module
-            raise DLOpenError(ustr.encode('utf-8'))
+            ustr, lgt = rwin32.FormatErrorW(err)
+            raise DLOpenError(ustr)
         return res
 
     def dlclose(handle):
diff --git a/rpython/rlib/rpoll.py b/rpython/rlib/rpoll.py
--- a/rpython/rlib/rpoll.py
+++ b/rpython/rlib/rpoll.py
@@ -30,6 +30,8 @@
         return _c.socket_strerror_str(self.errno)
     def get_msg_unicode(self):
         return _c.socket_strerror_unicode(self.errno)
+    def get_msg_utf8(self):
+        return _c.socket_strerror_utf8(self.errno)
 
 class SelectError(Exception):
     def __init__(self, errno):
@@ -38,6 +40,8 @@
         return _c.socket_strerror_str(self.errno)
     def get_msg_unicode(self):
         return _c.socket_strerror_unicode(self.errno)
+    def get_msg_utf8(self):
+        return _c.socket_strerror_utf8(self.errno)
 
 # ____________________________________________________________
 # poll() for POSIX systems
diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py
--- a/rpython/rlib/rsocket.py
+++ b/rpython/rlib/rsocket.py
@@ -1301,6 +1301,9 @@
         return ''
     def get_msg_unicode(self):
         return self.get_msg().decode('latin-1')
+    def get_msg_utf8(self):
+        msg = self.get_msg()
+        return msg, len(msg)
     def __str__(self):
         return self.get_msg()
 
@@ -1319,6 +1322,8 @@
         return _c.socket_strerror_str(self.errno)
     def get_msg_unicode(self):
         return _c.socket_strerror_unicode(self.errno)
+    def get_msg_utf8(self):
+        return _c.socket_strerror_utf8(self.errno)
 
 def last_error():
     return CSocketError(_c.geterrno())
@@ -1329,6 +1334,8 @@
         return _c.gai_strerror_str(self.errno)
     def get_msg_unicode(self):
         return _c.gai_strerror_unicode(self.errno)
+    def get_msg_utf8(self):
+        return _c.gai_strerror_utf8(self.errno)
 
 class HSocketError(SocketError):
     applevelerrcls = 'herror'
diff --git a/rpython/rlib/rwin32.py b/rpython/rlib/rwin32.py
--- a/rpython/rlib/rwin32.py
+++ b/rpython/rlib/rwin32.py
@@ -269,6 +269,9 @@
     def FormatError(code):
         return llimpl_FormatError(code)
     def FormatErrorW(code):
+        """
+        returns utf8, n_codepoints
+        """
         return llimpl_FormatErrorW(code)
 
     def llimpl_FormatError(code):
@@ -326,7 +329,7 @@
             if buflen <= 0:
                 result = u'Windows Error %d' % (code,)
             else:
-                result = rffi.wcharpsize2unicode(s_buf, buflen)
+                result = rffi.wcharpsize2utf8(s_buf, buflen), buflen
         finally:
             LocalFree(rffi.cast(rffi.VOIDP, buf[0]))
             lltype.free(buf, flavor='raw')
diff --git a/rpython/rlib/test/test_rwin32.py b/rpython/rlib/test/test_rwin32.py
--- a/rpython/rlib/test/test_rwin32.py
+++ b/rpython/rlib/test/test_rwin32.py
@@ -90,9 +90,9 @@
     assert '%2' in msg
 
 def test_formaterror_unicode():
-    msg = rwin32.FormatErrorW(34)
-    assert type(msg) is unicode
-    assert u'%2' in msg
+    msg, lgt = rwin32.FormatErrorW(34)
+    assert type(msg) is str
+    assert '%2' in msg
 
 def test_loadlibraryA():
     # test0 can be loaded alone, but test1 requires the modified search path
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: remove most runicode from pypy, refactor FormatErrorW, add utf8 to SocketError

Reply via email to