Author: Ronan Lamy <ronan.l...@gmail.com>
Branch: unicode-utf8-test
Changeset: r93334:1bb5950b8ff5
Date: 2017-12-09 14:51 +0000
http://bitbucket.org/pypy/pypy/changeset/1bb5950b8ff5/

Log:    hg merge unicode-utf8

diff --git a/pypy/module/struct/formatiterator.py 
b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -1,6 +1,6 @@
 from rpython.rlib.rarithmetic import (r_uint, r_ulonglong, r_longlong,
                                       maxint, intmask)
-from rpython.rlib import jit
+from rpython.rlib import jit, rutf8
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rstruct.error import StructError
 from rpython.rlib.rstruct.formatiterator import FormatIterator
@@ -107,7 +107,7 @@
 
     def accept_unicode_arg(self):
         w_obj = self.accept_obj_arg()
-        return self.space.unicode_w(w_obj)
+        return self.space.utf8_len_w(w_obj)
 
     def accept_float_arg(self):
         w_obj = self.accept_obj_arg()
@@ -191,6 +191,10 @@
             assert 0, "unreachable"
         self.result_w.append(w_value)
 
+    def append_utf8(self, value):
+        w_ch = self.space.newutf8(rutf8.unichr_as_utf8(r_uint(value)), 1)
+        self.result_w.append(w_ch)
+
     def get_pos(self):
         return self.pos
 
diff --git a/pypy/module/unicodedata/interp_ucd.py 
b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -75,6 +75,7 @@
         except KeyError:
             msg = space.mod(space.newtext("undefined character name '%s'"), 
space.newtext(name))
             raise OperationError(space.w_KeyError, msg)
+        assert code >= 0
         return space.newutf8(unichr_as_utf8(code), 1)
 
     def name(self, space, w_unichr, w_default=None):
diff --git a/rpython/rlib/rstruct/nativefmttable.py 
b/rpython/rlib/rstruct/nativefmttable.py
--- a/rpython/rlib/rstruct/nativefmttable.py
+++ b/rpython/rlib/rstruct/nativefmttable.py
@@ -4,7 +4,7 @@
 """
 import struct
 
-from rpython.rlib import jit, longlong2float
+from rpython.rlib import rutf8, longlong2float
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rarithmetic import r_singlefloat, widen, intmask
 from rpython.rlib.rstruct import standardfmttable as std
@@ -139,17 +139,17 @@
 from rpython.rlib.rstruct import unichar
 
 def pack_unichar(fmtiter):
-    unistr = fmtiter.accept_unicode_arg()
-    if len(unistr) != 1:
+    utf8, lgt = fmtiter.accept_unicode_arg()
+    if lgt != 1:
         raise StructError("expected a unicode string of length 1")
-    c = unistr[0]   # string->char conversion for the annotator
-    unichar.pack_unichar(c, fmtiter.wbuf, fmtiter.pos)
+    uchr = rutf8.codepoint_at_pos(utf8, 0)
+    unichar.pack_codepoint(uchr, fmtiter.wbuf, fmtiter.pos)
     fmtiter.advance(unichar.UNICODE_SIZE)
 
 @specialize.argtype(0)
 def unpack_unichar(fmtiter):
     data = fmtiter.read(unichar.UNICODE_SIZE)
-    fmtiter.appendobj(unichar.unpack_unichar(data))
+    fmtiter.append_utf8(unichar.unpack_codepoint(data))
 
 native_fmttable['u'] = {'size': unichar.UNICODE_SIZE,
                         'alignment': unichar.UNICODE_SIZE,
diff --git a/rpython/rlib/rstruct/unichar.py b/rpython/rlib/rstruct/unichar.py
--- a/rpython/rlib/rstruct/unichar.py
+++ b/rpython/rlib/rstruct/unichar.py
@@ -3,12 +3,8 @@
 """
 
 import sys
-from rpython.rlib.runicode import MAXUNICODE
 
-if MAXUNICODE <= 65535:
-    UNICODE_SIZE = 2
-else:
-    UNICODE_SIZE = 4
+UNICODE_SIZE = 4
 BIGENDIAN = sys.byteorder == "big"
 
 def pack_unichar(unich, buf, pos):
@@ -34,7 +30,7 @@
             buf.setitem(pos+2, chr((unich >> 16) & 0xFF))
             buf.setitem(pos+3, chr(unich >> 24))
 
-def unpack_unichar(rawstring):
+def unpack_codepoint(rawstring):
     assert len(rawstring) == UNICODE_SIZE
     if UNICODE_SIZE == 2:
         if BIGENDIAN:
@@ -54,4 +50,7 @@
                  ord(rawstring[1]) << 8 |
                  ord(rawstring[2]) << 16 |
                  ord(rawstring[3]) << 24)
-    return unichr(n)
+    return n
+
+def unpack_unichar(rawstring):
+    return unichr(unpack_codepoint(rawstring))
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to