Author: Ronan Lamy <[email protected]>
Branch: unicode-utf8-test
Changeset: r93334:1bb5950b8ff5
Date: 2017-12-09 14:51 +0000
http://bitbucket.org/pypy/pypy/changeset/1bb5950b8ff5/
Log: hg merge unicode-utf8
diff --git a/pypy/module/struct/formatiterator.py
b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -1,6 +1,6 @@
from rpython.rlib.rarithmetic import (r_uint, r_ulonglong, r_longlong,
maxint, intmask)
-from rpython.rlib import jit
+from rpython.rlib import jit, rutf8
from rpython.rlib.objectmodel import specialize
from rpython.rlib.rstruct.error import StructError
from rpython.rlib.rstruct.formatiterator import FormatIterator
@@ -107,7 +107,7 @@
def accept_unicode_arg(self):
w_obj = self.accept_obj_arg()
- return self.space.unicode_w(w_obj)
+ return self.space.utf8_len_w(w_obj)
def accept_float_arg(self):
w_obj = self.accept_obj_arg()
@@ -191,6 +191,10 @@
assert 0, "unreachable"
self.result_w.append(w_value)
+ def append_utf8(self, value):
+ w_ch = self.space.newutf8(rutf8.unichr_as_utf8(r_uint(value)), 1)
+ self.result_w.append(w_ch)
+
def get_pos(self):
return self.pos
diff --git a/pypy/module/unicodedata/interp_ucd.py
b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -75,6 +75,7 @@
except KeyError:
msg = space.mod(space.newtext("undefined character name '%s'"),
space.newtext(name))
raise OperationError(space.w_KeyError, msg)
+ assert code >= 0
return space.newutf8(unichr_as_utf8(code), 1)
def name(self, space, w_unichr, w_default=None):
diff --git a/rpython/rlib/rstruct/nativefmttable.py
b/rpython/rlib/rstruct/nativefmttable.py
--- a/rpython/rlib/rstruct/nativefmttable.py
+++ b/rpython/rlib/rstruct/nativefmttable.py
@@ -4,7 +4,7 @@
"""
import struct
-from rpython.rlib import jit, longlong2float
+from rpython.rlib import rutf8, longlong2float
from rpython.rlib.objectmodel import specialize
from rpython.rlib.rarithmetic import r_singlefloat, widen, intmask
from rpython.rlib.rstruct import standardfmttable as std
@@ -139,17 +139,17 @@
from rpython.rlib.rstruct import unichar
def pack_unichar(fmtiter):
- unistr = fmtiter.accept_unicode_arg()
- if len(unistr) != 1:
+ utf8, lgt = fmtiter.accept_unicode_arg()
+ if lgt != 1:
raise StructError("expected a unicode string of length 1")
- c = unistr[0] # string->char conversion for the annotator
- unichar.pack_unichar(c, fmtiter.wbuf, fmtiter.pos)
+ uchr = rutf8.codepoint_at_pos(utf8, 0)
+ unichar.pack_codepoint(uchr, fmtiter.wbuf, fmtiter.pos)
fmtiter.advance(unichar.UNICODE_SIZE)
@specialize.argtype(0)
def unpack_unichar(fmtiter):
data = fmtiter.read(unichar.UNICODE_SIZE)
- fmtiter.appendobj(unichar.unpack_unichar(data))
+ fmtiter.append_utf8(unichar.unpack_codepoint(data))
native_fmttable['u'] = {'size': unichar.UNICODE_SIZE,
'alignment': unichar.UNICODE_SIZE,
diff --git a/rpython/rlib/rstruct/unichar.py b/rpython/rlib/rstruct/unichar.py
--- a/rpython/rlib/rstruct/unichar.py
+++ b/rpython/rlib/rstruct/unichar.py
@@ -3,12 +3,8 @@
"""
import sys
-from rpython.rlib.runicode import MAXUNICODE
-if MAXUNICODE <= 65535:
- UNICODE_SIZE = 2
-else:
- UNICODE_SIZE = 4
+UNICODE_SIZE = 4
BIGENDIAN = sys.byteorder == "big"
def pack_unichar(unich, buf, pos):
@@ -34,7 +30,7 @@
buf.setitem(pos+2, chr((unich >> 16) & 0xFF))
buf.setitem(pos+3, chr(unich >> 24))
-def unpack_unichar(rawstring):
+def unpack_codepoint(rawstring):
assert len(rawstring) == UNICODE_SIZE
if UNICODE_SIZE == 2:
if BIGENDIAN:
@@ -54,4 +50,7 @@
ord(rawstring[1]) << 8 |
ord(rawstring[2]) << 16 |
ord(rawstring[3]) << 24)
- return unichr(n)
+ return n
+
+def unpack_unichar(rawstring):
+ return unichr(unpack_codepoint(rawstring))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit