[pypy-commit] pypy py3.5: Merged in PEP393 (pull request #513)

rlamy Sat, 04 Feb 2017 10:10:53 -0800

Author: Ronan Lamy <[email protected]>
Branch: py3.5
Changeset: r89923:c36e0e41d514
Date: 2017-02-04 18:09 +0000
http://bitbucket.org/pypy/pypy/changeset/c36e0e41d514/


Log:    Merged in PEP393 (pull request #513)

        PEP 393

diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst
--- a/pypy/doc/whatsnew-pypy3-head.rst
+++ b/pypy/doc/whatsnew-pypy3-head.rst
@@ -36,3 +36,7 @@
 .. branch: py3.5-time
 
 .. branch: py3.5-ssl
+
+.. branch: PEP393
+
+Implement some level of compatibility with PEP 393 APIs.
diff --git a/pypy/module/cpyext/include/unicodeobject.h 
b/pypy/module/cpyext/include/unicodeobject.h
--- a/pypy/module/cpyext/include/unicodeobject.h
+++ b/pypy/module/cpyext/include/unicodeobject.h
@@ -7,12 +7,318 @@
 
 #include <cpyext_unicodeobject.h>
 
-PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char *format, va_list 
vargs);
-PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char *format, ...);
+/* Fast access macros */
+#ifndef Py_LIMITED_API
 
+#define PyUnicode_WSTR_LENGTH(op) \
+    (PyUnicode_IS_COMPACT_ASCII(op) ?                  \
+     ((PyASCIIObject*)op)->length :                    \
+     ((PyCompactUnicodeObject*)op)->wstr_length)
+
+/* Returns the deprecated Py_UNICODE representation's size in code units
+   (this includes surrogate pairs as 2 units).
+   If the Py_UNICODE representation is not available, it will be computed
+   on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
+
+#define PyUnicode_GET_SIZE(op)                       \
+    (assert(PyUnicode_Check(op)),                    \
+     (((PyASCIIObject *)(op))->wstr) ?               \
+      PyUnicode_WSTR_LENGTH(op) :                    \
+      ((void)PyUnicode_AsUnicode((PyObject *)(op)),  \
+       assert(((PyASCIIObject *)(op))->wstr),        \
+       PyUnicode_WSTR_LENGTH(op)))
+
+#define PyUnicode_GET_DATA_SIZE(op) \
+    (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
+
+/* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
+   representation on demand.  Using this macro is very inefficient now,
+   try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
+   use PyUnicode_WRITE() and PyUnicode_READ(). */
+
+#define PyUnicode_AS_UNICODE(op) \
+    (assert(PyUnicode_Check(op)), \
+     (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
+      PyUnicode_AsUnicode((PyObject *)(op)))
+
+#define PyUnicode_AS_DATA(op) \
+    ((const char *)(PyUnicode_AS_UNICODE(op)))
+
+
+/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
+
+/* Values for PyASCIIObject.state: */
+
+/* Interning state. */
+#define SSTATE_NOT_INTERNED 0
+#define SSTATE_INTERNED_MORTAL 1
+#define SSTATE_INTERNED_IMMORTAL 2
+
+/* Return true if the string contains only ASCII characters, or 0 if not. The
+   string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
+   ready. */
+#define PyUnicode_IS_ASCII(op)                   \
+    (assert(PyUnicode_Check(op)),                \
+     assert(PyUnicode_IS_READY(op)),             \
+     ((PyASCIIObject*)op)->state.ascii)
+
+/* Return true if the string is compact or 0 if not.
+   No type checks or Ready calls are performed. */
+#define PyUnicode_IS_COMPACT(op) \
+    (((PyASCIIObject*)(op))->state.compact)
+
+/* Return true if the string is a compact ASCII string (use PyASCIIObject
+   structure), or 0 if not.  No type checks or Ready calls are performed. */
+#define PyUnicode_IS_COMPACT_ASCII(op)                 \
+    (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
+
+enum PyUnicode_Kind {
+/* String contains only wstr byte characters.  This is only possible
+   when the string was created with a legacy API and _PyUnicode_Ready()
+   has not been called yet.  */
+    PyUnicode_WCHAR_KIND = 0,
+/* Return values of the PyUnicode_KIND() macro: */
+    PyUnicode_1BYTE_KIND = 1,
+    PyUnicode_2BYTE_KIND = 2,
+    PyUnicode_4BYTE_KIND = 4
+};
+
+/* Return pointers to the canonical representation cast to unsigned char,
+   Py_UCS2, or Py_UCS4 for direct character access.
+   No checks are performed, use PyUnicode_KIND() before to ensure
+   these will work correctly. */
+
+#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
+#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
+#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
+
+/* Return one of the PyUnicode_*_KIND values defined above. */
+#define PyUnicode_KIND(op) \
+    (assert(PyUnicode_Check(op)), \
+     assert(PyUnicode_IS_READY(op)),            \
+     ((PyASCIIObject *)(op))->state.kind)
+
+/* Return a void pointer to the raw unicode buffer. */
+#define _PyUnicode_COMPACT_DATA(op)                     \
+    (PyUnicode_IS_ASCII(op) ?                   \
+     ((void*)((PyASCIIObject*)(op) + 1)) :              \
+     ((void*)((PyCompactUnicodeObject*)(op) + 1)))
+
+#define _PyUnicode_NONCOMPACT_DATA(op)                  \
+    (assert(((PyUnicodeObject*)(op))->data),        \
+     ((((PyUnicodeObject *)(op))->data)))
+
+#define PyUnicode_DATA(op) \
+    (assert(PyUnicode_Check(op)), \
+     PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
+     _PyUnicode_NONCOMPACT_DATA(op))
+
+/* In the access macros below, "kind" may be evaluated more than once.
+   All other macro parameters are evaluated exactly once, so it is safe
+   to put side effects into them (such as increasing the index). */
+
+/* Write into the canonical representation, this macro does not do any sanity
+   checks and is intended for usage in loops.  The caller should cache the
+   kind and data pointers obtained from other macro calls.
+   index is the index in the string (starts at 0) and value is the new
+   code point value which should be written to that location. */
+#define PyUnicode_WRITE(kind, data, index, value) \
+    do { \
+        switch ((kind)) { \
+        case PyUnicode_1BYTE_KIND: { \
+            ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
+            break; \
+        } \
+        case PyUnicode_2BYTE_KIND: { \
+            ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
+            break; \
+        } \
+        default: { \
+            assert((kind) == PyUnicode_4BYTE_KIND); \
+            ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
+        } \
+        } \
+    } while (0)
+
+/* Read a code point from the string's canonical representation.  No checks
+   or ready calls are performed. */
+#define PyUnicode_READ(kind, data, index) \
+    ((Py_UCS4) \
+    ((kind) == PyUnicode_1BYTE_KIND ? \
+        ((const Py_UCS1 *)(data))[(index)] : \
+        ((kind) == PyUnicode_2BYTE_KIND ? \
+            ((const Py_UCS2 *)(data))[(index)] : \
+            ((const Py_UCS4 *)(data))[(index)] \
+        ) \
+    ))
+
+/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
+   calls PyUnicode_KIND() and might call it twice.  For single reads, use
+   PyUnicode_READ_CHAR, for multiple consecutive reads callers should
+   cache kind and use PyUnicode_READ instead. */
+#define PyUnicode_READ_CHAR(unicode, index) \
+    (assert(PyUnicode_Check(unicode)),          \
+     assert(PyUnicode_IS_READY(unicode)),       \
+     (Py_UCS4)                                  \
+        (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
+            ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
+            (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
+                ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
+                ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
+            ) \
+        ))
+
+/* Returns the length of the unicode string. The caller has to make sure that
+   the string has it's canonical representation set before calling
+   this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
+#define PyUnicode_GET_LENGTH(op)                \
+    (assert(PyUnicode_Check(op)),               \
+     assert(PyUnicode_IS_READY(op)),            \
+     ((PyASCIIObject *)(op))->length)
+
+
+/* Fast check to determine whether an object is ready. Equivalent to
+   PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
+
+#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
+
+/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
+   case.  If the canonical representation is not yet set, it will still call
+   _PyUnicode_Ready().
+   Returns 0 on success and -1 on errors. */
+#define PyUnicode_READY(op)                        \
+    (assert(PyUnicode_Check(op)),                       \
+     (PyUnicode_IS_READY(op) ?                          \
+      0 : _PyUnicode_Ready((PyObject *)(op))))
+
+/* Return a maximum character value which is suitable for creating another
+   string based on op.  This is always an approximation but more efficient
+   than iterating over the string. */
+#define PyUnicode_MAX_CHAR_VALUE(op) \
+    (assert(PyUnicode_IS_READY(op)),                                    \
+     (PyUnicode_IS_ASCII(op) ?                                          \
+      (0x7f) :                                                          \
+      (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
+       (0xffU) :                                                        \
+       (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
+        (0xffffU) :                                                     \
+        (0x10ffffU)))))
+
+#endif
+
+/* --- Constants ---------------------------------------------------------- */
+
+/* This Unicode character will be used as replacement character during
+   decoding if the errors argument is set to "replace". Note: the
+   Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
+   Unicode 3.0. */
+
+#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
+
+/* === Public API ========================================================= */
+
+PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
+    const char *format,   /* ASCII-encoded string  */
+    va_list vargs
+    );
+PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
+    const char *format,   /* ASCII-encoded string  */
+    ...
+    );
+
+/* Use only if you know it's a string */
+#define PyUnicode_CHECK_INTERNED(op) \
+    (((PyASCIIObject *)(op))->state.interned)
+
+/* --- wchar_t support for platforms which support it --------------------- */
+
+#ifdef HAVE_WCHAR_H
+
+/* Convert the Unicode object to a wide character string. The output string
+   always ends with a nul character. If size is not NULL, write the number of
+   wide characters (excluding the null character) into *size.
+
+   Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
+   on success. On error, returns NULL, *size is undefined and raises a
+   MemoryError. */
+
+PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
+    PyObject *unicode,          /* Unicode object */
+    Py_ssize_t *size            /* number of characters of the result */
+    );
+
+#endif
+
+/* === Builtin Codecs =====================================================
+
+   Many of these APIs take two arguments encoding and errors. These
+   parameters encoding and errors have the same semantics as the ones
+   of the builtin str() API.
+
+   Setting encoding to NULL causes the default encoding (UTF-8) to be used.
+
+   Error handling is set by errors which may also be set to NULL
+   meaning to use the default handling defined for the codec. Default
+   error handling for all builtin codecs is "strict" (ValueErrors are
+   raised).
+
+   The codecs all use a similar interface. Only deviation from the
+   generic ones are documented.
+
+*/
+
+/* --- Manage the default encoding ---------------------------------------- */
+
+/* Returns a pointer to the default encoding (UTF-8) of the
+   Unicode object unicode and the size of the encoded representation
+   in bytes stored in *size.
+
+   In case of an error, no *size is set.
+
+   This function caches the UTF-8 encoded string in the unicodeobject
+   and subsequent calls will return the same string.  The memory is released
+   when the unicodeobject is deallocated.
+
+   _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
+   support the previous internal function with the same behaviour.
+
+   *** This API is for interpreter INTERNAL USE ONLY and will likely
+   *** be removed or changed in the future.
+
+   *** If you need to access the Unicode object as UTF-8 bytes string,
+   *** please use PyUnicode_AsUTF8String() instead.
+*/
+
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
+    PyObject *unicode,
+    Py_ssize_t *size);
+#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
+#endif
+
+/* Returns a pointer to the default encoding (UTF-8) of the
+   Unicode object unicode.
+
+   Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
+   in the unicodeobject.
+
+   _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
+   support the previous internal function with the same behaviour.
+
+   Use of this API is DEPRECATED since no size information can be
+   extracted from the returned data.
+
+   *** This API is for interpreter INTERNAL USE ONLY and will likely
+   *** be removed or changed for Python 3.1.
+
+   *** If you need to access the Unicode object as UTF-8 bytes string,
+   *** please use PyUnicode_AsUTF8String() instead.
+
+*/
+
+#ifndef Py_LIMITED_API
 #define _PyUnicode_AsString PyUnicode_AsUTF8
-
-PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(PyObject *unicode, Py_ssize_t 
*size);
+#endif
 
 Py_LOCAL_INLINE(size_t) Py_UNICODE_strlen(const Py_UNICODE *u)
 {
diff --git a/pypy/module/cpyext/parse/cpyext_unicodeobject.h 
b/pypy/module/cpyext/parse/cpyext_unicodeobject.h
--- a/pypy/module/cpyext/parse/cpyext_unicodeobject.h
+++ b/pypy/module/cpyext/parse/cpyext_unicodeobject.h
@@ -1,13 +1,199 @@
+/* --- Internal Unicode Format -------------------------------------------- */
+
+
+/* Py_UNICODE was the native Unicode storage format (code unit) used by
+   Python and represents a single Unicode element in the Unicode type.
+   With PEP 393, Py_UNICODE is deprecated and replaced with a
+   typedef to wchar_t. */
+
+#define PY_UNICODE_TYPE wchar_t
+typedef wchar_t Py_UNICODE;
+
+/* Py_UCS4 and Py_UCS2 are typedefs for the respective
+   unicode representations. */
 typedef unsigned int Py_UCS4;
-/* On PyPy, Py_UNICODE is always wchar_t */
-#define PY_UNICODE_TYPE wchar_t
-typedef PY_UNICODE_TYPE Py_UNICODE;
+typedef unsigned short Py_UCS2;
+typedef unsigned char Py_UCS1;
 
-#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
+/* --- Unicode Type ------------------------------------------------------- */
 
 typedef struct {
+        /*
+           SSTATE_NOT_INTERNED (0)
+           SSTATE_INTERNED_MORTAL (1)
+           SSTATE_INTERNED_IMMORTAL (2)
+
+           If interned != SSTATE_NOT_INTERNED, the two references from the
+           dictionary to this object are *not* counted in ob_refcnt.
+         */
+        unsigned int interned;
+        /* Character size:
+
+           - PyUnicode_WCHAR_KIND (0):
+
+             * character type = wchar_t (16 or 32 bits, depending on the
+               platform)
+
+           - PyUnicode_1BYTE_KIND (1):
+
+             * character type = Py_UCS1 (8 bits, unsigned)
+             * all characters are in the range U+0000-U+00FF (latin1)
+             * if ascii is set, all characters are in the range U+0000-U+007F
+               (ASCII), otherwise at least one character is in the range
+               U+0080-U+00FF
+
+           - PyUnicode_2BYTE_KIND (2):
+
+             * character type = Py_UCS2 (16 bits, unsigned)
+             * all characters are in the range U+0000-U+FFFF (BMP)
+             * at least one character is in the range U+0100-U+FFFF
+
+           - PyUnicode_4BYTE_KIND (4):
+
+             * character type = Py_UCS4 (32 bits, unsigned)
+             * all characters are in the range U+0000-U+10FFFF
+             * at least one character is in the range U+10000-U+10FFFF
+         */
+        unsigned int kind;
+        /* Compact is with respect to the allocation scheme. Compact unicode
+           objects only require one memory block while non-compact objects use
+           one block for the PyUnicodeObject struct and another for its data
+           buffer. */
+        unsigned int compact;
+        /* The string only contains characters in the range U+0000-U+007F 
(ASCII)
+           and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
+           set, use the PyASCIIObject structure. */
+        unsigned int ascii;
+        /* The ready flag indicates whether the object layout is initialized
+           completely. This means that this is either a compact object, or
+           the data pointer is filled out. The bit is redundant, and helps
+           to minimize the test in PyUnicode_IS_READY(). */
+        unsigned int ready;
+        /* Padding to ensure that PyUnicode_DATA() is always aligned to
+           4 bytes (see issue #19537 on m68k). */
+        /* not on PyPy */
+    } _PyASCIIObject_state_t;
+
+/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
+   structure. state.ascii and state.compact are set, and the data
+   immediately follow the structure. utf8_length and wstr_length can be found
+   in the length field; the utf8 pointer is equal to the data pointer. */
+typedef struct {
+    /* There are 4 forms of Unicode strings:
+
+       - compact ascii:
+
+         * structure = PyASCIIObject
+         * test: PyUnicode_IS_COMPACT_ASCII(op)
+         * kind = PyUnicode_1BYTE_KIND
+         * compact = 1
+         * ascii = 1
+         * ready = 1
+         * (length is the length of the utf8 and wstr strings)
+         * (data starts just after the structure)
+         * (since ASCII is decoded from UTF-8, the utf8 string are the data)
+
+       - compact:
+
+         * structure = PyCompactUnicodeObject
+         * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
+         * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
+           PyUnicode_4BYTE_KIND
+         * compact = 1
+         * ready = 1
+         * ascii = 0
+         * utf8 is not shared with data
+         * utf8_length = 0 if utf8 is NULL
+         * wstr is shared with data and wstr_length=length
+           if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
+         * wstr_length = 0 if wstr is NULL
+         * (data starts just after the structure)
+
+       - legacy string, not ready:
+
+         * structure = PyUnicodeObject
+         * test: kind == PyUnicode_WCHAR_KIND
+         * length = 0 (use wstr_length)
+         * hash = -1
+         * kind = PyUnicode_WCHAR_KIND
+         * compact = 0
+         * ascii = 0
+         * ready = 0
+         * interned = SSTATE_NOT_INTERNED
+         * wstr is not NULL
+         * data.any is NULL
+         * utf8 is NULL
+         * utf8_length = 0
+
+       - legacy string, ready:
+
+         * structure = PyUnicodeObject structure
+         * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
+         * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
+           PyUnicode_4BYTE_KIND
+         * compact = 0
+         * ready = 1
+         * data.any is not NULL
+         * utf8 is shared and utf8_length = length with data.any if ascii = 1
+         * utf8_length = 0 if utf8 is NULL
+         * wstr is shared with data.any and wstr_length = length
+           if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
+         * wstr_length = 0 if wstr is NULL
+
+       Compact strings use only one memory block (structure + characters),
+       whereas legacy strings use one block for the structure and one block
+       for characters.
+
+       Legacy strings are created by PyUnicode_FromUnicode() and
+       PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
+       when PyUnicode_READY() is called.
+
+       See also _PyUnicode_CheckConsistency().
+    */
     PyObject_HEAD
-    Py_UNICODE *buffer;
-    Py_ssize_t length;
-    char *utf8buffer;
+    Py_ssize_t length;          /* Number of code points in the string */
+    //Py_hash_t hash;             /* Hash value; -1 if not set */
+    _PyASCIIObject_state_t state;
+    wchar_t *wstr;              /* wchar_t representation (null-terminated) */
+} PyASCIIObject;
+
+/* Non-ASCII strings allocated through PyUnicode_New use the
+   PyCompactUnicodeObject structure. state.compact is set, and the data
+   immediately follow the structure. */
+typedef struct {
+    PyASCIIObject _base;
+    Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
+                                 * terminating \0. */
+    char *utf8;                 /* UTF-8 representation (null-terminated) */
+    Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
+                                 * surrogates count as two code points. */
+} PyCompactUnicodeObject;
+
+/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
+   PyUnicodeObject structure. The actual string data is initially in the wstr
+   block, and copied into the data block using _PyUnicode_Ready. */
+typedef struct {
+    PyCompactUnicodeObject _base;
+    void* data;                     /* Canonical, smallest-form Unicode buffer 
*/
 } PyUnicodeObject;
+
+
+/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
+
+/* Values for PyASCIIObject.state: */
+
+/* Interning state. */
+#define SSTATE_NOT_INTERNED 0
+#define SSTATE_INTERNED_MORTAL 1
+#define SSTATE_INTERNED_IMMORTAL 2
+
+/* --- Constants ---------------------------------------------------------- */
+
+/* This Unicode character will be used as replacement character during
+   decoding if the errors argument is set to "replace". Note: the
+   Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
+   Unicode 3.0. */
+
+#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py
--- a/pypy/module/cpyext/slotdefs.py
+++ b/pypy/module/cpyext/slotdefs.py
@@ -471,7 +471,7 @@
                             readonly=widen(pybuf.c_readonly),
                             releasebufferproc = rbp)
         fq.register_finalizer(buf)
-        return space.newbuffer(buf)
+        return space.newbuffer(buf, itemsize=buf.itemsize)
 
 def get_richcmp_func(OP_CONST):
     def inner(space, w_self, w_args, func):
diff --git a/pypy/module/cpyext/test/test_arraymodule.py 
b/pypy/module/cpyext/test/test_arraymodule.py
--- a/pypy/module/cpyext/test/test_arraymodule.py
+++ b/pypy/module/cpyext/test/test_arraymodule.py
@@ -79,6 +79,12 @@
         res = [1, 2, 3] * arr
         assert res == [2, 4, 6]
 
+    def test_string_buf(self):
+        module = self.import_module(name='array')
+        arr = module.array('u', '123')
+        view = memoryview(arr)
+        assert view.itemsize == 4
+
     def test_subclass(self):
         import struct
         module = self.import_module(name='array')
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py 
b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -9,6 +9,7 @@
 from rpython.rtyper.lltypesystem import rffi, lltype
 import sys, py
 from pypy.module.cpyext.unicodeobject import *
+from pypy.module.cpyext.unicodeobject import _PyUnicode_Ready
 
 class AppTestUnicodeObject(AppTestCpythonExtensionBase):
     def test_unicodeobject(self):
@@ -28,7 +29,7 @@
                  }
 #ifdef PYPY_VERSION
                  // Slightly silly test that tp_basicsize is reasonable.
-                 if(s->ob_type->tp_basicsize != sizeof(void*)*6)
+                 if(s->ob_type->tp_basicsize != sizeof(void*)*12)
                      result = s->ob_type->tp_basicsize;
 #endif  // PYPY_VERSION
                  Py_DECREF(s);
@@ -40,7 +41,7 @@
                  int result = 0;
 
                  if(PyUnicode_GetLength(s) != 11) {
-                     result = -PyUnicode_GetSize(s);
+                     result = -PyUnicode_GetLength(s);
                  }
                  Py_DECREF(s);
                  return PyLong_FromLong(result);
@@ -212,10 +213,8 @@
 
 class TestUnicode(BaseApiTest):
     def test_unicodeobject(self, space):
-        assert PyUnicode_GET_SIZE(space, space.wrap(u'sp&#228;m')) == 4
         assert PyUnicode_GetSize(space, space.wrap(u'sp&#228;m')) == 4
         unichar = rffi.sizeof(Py_UNICODE)
-        assert PyUnicode_GET_DATA_SIZE(space, space.wrap(u'sp&#228;m')) == 4 * 
unichar
 
         encoding = rffi.charp2str(PyUnicode_GetDefaultEncoding(space, ))
         w_default_encoding = space.call_function(
@@ -225,13 +224,11 @@
 
     def test_AS(self, space):
         word = space.wrap(u'spam')
-        array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word))
-        array2 = PyUnicode_AS_UNICODE(space, word)
-        array3 = PyUnicode_AsUnicode(space, word)
+        array = rffi.cast(rffi.CWCHARP, PyUnicode_AsUnicode(space, word))
+        array2 = PyUnicode_AsUnicode(space, word)
         for (i, char) in enumerate(space.unwrap(word)):
             assert array[i] == char
             assert array2[i] == char
-            assert array3[i] == char
         with raises_w(space, TypeError):
             PyUnicode_AsUnicode(space, space.newbytes('spam'))
 
@@ -284,22 +281,23 @@
     def test_unicode_resize(self, space):
         py_uni = new_empty_unicode(space, 10)
         ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw')
-        py_uni.c_buffer[0] = u'a'
-        py_uni.c_buffer[1] = u'b'
-        py_uni.c_buffer[2] = u'c'
+        buf = get_wbuffer(py_uni)
+        buf[0] = u'a'
+        buf[1] = u'b'
+        buf[2] = u'c'
         ar[0] = rffi.cast(PyObject, py_uni)
         PyUnicode_Resize(space, ar, 3)
         py_uni = rffi.cast(PyUnicodeObject, ar[0])
-        assert py_uni.c_length == 3
-        assert py_uni.c_buffer[1] == u'b'
-        assert py_uni.c_buffer[3] == u'\x00'
+        assert get_wsize(py_uni) == 3
+        assert get_wbuffer(py_uni)[1] == u'b'
+        assert get_wbuffer(py_uni)[3] == u'\x00'
         # the same for growing
         ar[0] = rffi.cast(PyObject, py_uni)
         PyUnicode_Resize(space, ar, 10)
         py_uni = rffi.cast(PyUnicodeObject, ar[0])
-        assert py_uni.c_length == 10
-        assert py_uni.c_buffer[1] == 'b'
-        assert py_uni.c_buffer[10] == '\x00'
+        assert get_wsize(py_uni) == 10
+        assert get_wbuffer(py_uni)[1] == 'b'
+        assert get_wbuffer(py_uni)[10] == '\x00'
         Py_DecRef(space, ar[0])
         lltype.free(ar, flavor='raw')
 
@@ -623,13 +621,13 @@
         count1 = space.int_w(space.len(w_x))
         target_chunk = lltype.malloc(rffi.CWCHARP.TO, count1, flavor='raw')
 
-        x_chunk = PyUnicode_AS_UNICODE(space, w_x)
+        x_chunk = PyUnicode_AsUnicode(space, w_x)
         Py_UNICODE_COPY(space, target_chunk, x_chunk, 4)
         w_y = space.wrap(rffi.wcharpsize2unicode(target_chunk, 4))
 
         assert space.eq_w(w_y, space.wrap(u"abcd"))
 
-        size = PyUnicode_GET_SIZE(space, w_x)
+        size = PyUnicode_GetSize(space, w_x)
         Py_UNICODE_COPY(space, target_chunk, x_chunk, size)
         w_y = space.wrap(rffi.wcharpsize2unicode(target_chunk, size))
 
@@ -758,3 +756,32 @@
                 PyUnicode_Splitlines(space, w_str, 0)))
         assert r"['a\n', 'b\n', 'c\n', 'd']" == space.unwrap(space.repr(
                 PyUnicode_Splitlines(space, w_str, 1)))
+
+    def test_Ready(self, space):
+        w_str = space.wrap(u'abc')  # ASCII
+        py_str = as_pyobj(space, w_str)
+        assert get_kind(py_str) == 0
+        _PyUnicode_Ready(space, w_str)
+        assert get_kind(py_str) == 1
+        assert get_ascii(py_str) == 1
+
+        w_str = space.wrap(u'caf&#233;')  # latin1
+        py_str = as_pyobj(space, w_str)
+        assert get_kind(py_str) == 0
+        _PyUnicode_Ready(space, w_str)
+        assert get_kind(py_str) == 1
+        assert get_ascii(py_str) == 0
+
+        w_str = 
space.wrap(u'&#1056;&#1086;&#1089;&#1089;&#1080;&#769;&#1103;')  # UCS2
+        py_str = as_pyobj(space, w_str)
+        assert get_kind(py_str) == 0
+        _PyUnicode_Ready(space, w_str)
+        assert get_kind(py_str) == 2
+        assert get_ascii(py_str) == 0
+
+        w_str = space.wrap(u'***\U0001f4a9***')  # UCS4
+        py_str = as_pyobj(space, w_str)
+        assert get_kind(py_str) == 0
+        _PyUnicode_Ready(space, w_str)
+        assert get_kind(py_str) == 4
+        assert get_ascii(py_str) == 0
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -1,5 +1,8 @@
 from pypy.interpreter.error import OperationError, oefmt
 from rpython.rtyper.lltypesystem import rffi, lltype
+from rpython.rlib.runicode import unicode_encode_latin_1, unicode_encode_utf_16
+from rpython.rlib.rarithmetic import widen
+
 from pypy.module.unicodedata import unicodedb
 from pypy.module.cpyext.api import (
     CANNOT_FAIL, Py_ssize_t, build_type_checkers, cpython_api,
@@ -8,7 +11,7 @@
 from pypy.module.cpyext.pyerrors import PyErr_BadArgument
 from pypy.module.cpyext.pyobject import (
     PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
-    make_typedescr, get_typedescr)
+    make_typedescr, get_typedescr, as_pyobj)
 from pypy.module.cpyext.bytesobject import PyBytes_Check, PyBytes_FromObject
 from pypy.module._codecs.interp_codecs import CodecState
 from pypy.objspace.std import unicodeobject
@@ -37,6 +40,12 @@
 
 PyUnicode_Check, PyUnicode_CheckExact = build_type_checkers("Unicode", 
"w_unicode")
 
+MAX_UNICODE = 1114111
+WCHAR_KIND = 0
+_1BYTE_KIND = 1
+_2BYTE_KIND = 2
+_4BYTE_KIND = 4
+
 
 def new_empty_unicode(space, length):
     """
@@ -46,28 +55,26 @@
     """
     typedescr = get_typedescr(space.w_unicode.layout.typedef)
     py_obj = typedescr.allocate(space, space.w_unicode)
-    py_uni = rffi.cast(PyUnicodeObject, py_obj)
 
     buflen = length + 1
-    py_uni.c_length = length
-    py_uni.c_buffer = lltype.malloc(rffi.CWCHARP.TO, buflen,
-                                    flavor='raw', zero=True,
-                                    add_memory_pressure=True)
-    return py_uni
+    set_wsize(py_obj, length)
+    set_wbuffer(py_obj,
+        lltype.malloc(
+            rffi.CWCHARP.TO, buflen, flavor='raw', zero=True,
+            add_memory_pressure=True))
+    return py_obj
 
 def unicode_attach(space, py_obj, w_obj, w_userdata=None):
     "Fills a newly allocated PyUnicodeObject with a unicode string"
-    py_unicode = rffi.cast(PyUnicodeObject, py_obj)
-    py_unicode.c_length = len(space.unicode_w(w_obj))
-    py_unicode.c_buffer = lltype.nullptr(rffi.CWCHARP.TO)
+    set_wsize(py_obj, len(space.unicode_w(w_obj)))
+    set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO))
 
 def unicode_realize(space, py_obj):
     """
     Creates the unicode in the interpreter. The PyUnicodeObject buffer must not
     be modified after this call.
     """
-    py_uni = rffi.cast(PyUnicodeObject, py_obj)
-    s = rffi.wcharpsize2unicode(py_uni.c_buffer, py_uni.c_length)
+    s = rffi.wcharpsize2unicode(get_wbuffer(py_obj), get_wsize(py_obj))
     w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
     w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
     w_obj.__init__(s)
@@ -76,14 +83,81 @@
 
 @slot_function([PyObject], lltype.Void)
 def unicode_dealloc(space, py_obj):
-    py_unicode = rffi.cast(PyUnicodeObject, py_obj)
-    if py_unicode.c_buffer:
-        lltype.free(py_unicode.c_buffer, flavor="raw")
-    if py_unicode.c_utf8buffer:
-        lltype.free(py_unicode.c_utf8buffer, flavor="raw")
+    if get_wbuffer(py_obj):
+        lltype.free(get_wbuffer(py_obj), flavor="raw")
+    if get_utf8(py_obj):
+        lltype.free(get_utf8(py_obj), flavor="raw")
     from pypy.module.cpyext.object import _dealloc
     _dealloc(space, py_obj)
 
+def get_len(py_obj):
+    py_obj = cts.cast('PyASCIIObject*', py_obj)
+    return py_obj.c_length
+
+def set_len(py_obj, n):
+    py_obj = cts.cast('PyASCIIObject*', py_obj)
+    py_obj.c_length = n
+
+def get_state(py_obj):
+    py_obj = cts.cast('PyASCIIObject*', py_obj)
+    return py_obj.c_state
+
+def get_kind(py_obj):
+    return get_state(py_obj).c_kind
+
+def set_kind(py_obj, value):
+    get_state(py_obj).c_kind = cts.cast('unsigned int', value)
+
+def get_ascii(py_obj):
+    return get_state(py_obj).c_ascii
+
+def set_ascii(py_obj, value):
+    get_state(py_obj).c_ascii = cts.cast('unsigned int', value)
+
+def set_ready(py_obj, value):
+    get_state(py_obj).c_ready = cts.cast('unsigned int', value)
+
+def get_wbuffer(py_obj):
+    py_obj = cts.cast('PyASCIIObject*', py_obj)
+    return py_obj.c_wstr
+
+def set_wbuffer(py_obj, wbuf):
+    py_obj = cts.cast('PyASCIIObject*', py_obj)
+    py_obj.c_wstr = wbuf
+
+def get_utf8_len(py_obj):
+    py_obj = cts.cast('PyCompactUnicodeObject*', py_obj)
+    return py_obj.c_utf8_length
+
+def set_utf8_len(py_obj, n):
+    py_obj = cts.cast('PyCompactUnicodeObject*', py_obj)
+    py_obj.c_utf8_length = n
+
+def get_utf8(py_obj):
+    py_obj = cts.cast('PyCompactUnicodeObject*', py_obj)
+    return py_obj.c_utf8
+
+def set_utf8(py_obj, buf):
+    py_obj = cts.cast('PyCompactUnicodeObject*', py_obj)
+    py_obj.c_utf8 = buf
+
+def get_wsize(py_obj):
+    py_obj = cts.cast('PyCompactUnicodeObject*', py_obj)
+    return py_obj.c_wstr_length
+
+def set_wsize(py_obj, value):
+    py_obj = cts.cast('PyCompactUnicodeObject*', py_obj)
+    py_obj.c_wstr_length = value
+
+def get_data(py_obj):
+    py_obj = cts.cast('PyUnicodeObject*', py_obj)
+    return py_obj.c_data
+
+def set_data(py_obj, p_data):
+    py_obj = cts.cast('PyUnicodeObject*', py_obj)
+    py_obj.c_data = p_data
+
+
 @cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
 def Py_UNICODE_ISSPACE(space, ch):
     """Return 1 or 0 depending on whether ch is a whitespace character."""
@@ -181,48 +255,52 @@
     """Get the maximum ordinal for a Unicode character."""
     return runicode.UNICHR(runicode.MAXUNICODE)
 
-@cpython_api([rffi.VOIDP], rffi.CCHARP, error=CANNOT_FAIL)
-def PyUnicode_AS_DATA(space, ref):
-    """Return a pointer to the internal buffer of the object. o has to be a
-    PyUnicodeObject (not checked)."""
-    return rffi.cast(rffi.CCHARP, PyUnicode_AS_UNICODE(space, ref))
-
-@cpython_api([rffi.VOIDP], Py_ssize_t, error=CANNOT_FAIL)
-def PyUnicode_GET_DATA_SIZE(space, w_obj):
-    """Return the size of the object's internal buffer in bytes.  o has to be a
-    PyUnicodeObject (not checked)."""
-    return rffi.sizeof(Py_UNICODE) * PyUnicode_GET_SIZE(space, w_obj)
-
-@cpython_api([rffi.VOIDP], Py_ssize_t, error=CANNOT_FAIL)
-def PyUnicode_GET_SIZE(space, w_obj):
-    """Return the size of the object.  obj is a PyUnicodeObject (not
-    checked)."""
-    return space.len_w(w_obj)
-
-@cpython_api([PyObject], Py_ssize_t, error=CANNOT_FAIL)
-def PyUnicode_GET_LENGTH(space, w_obj):
-    """Return the length of the Unicode string, in code points.
-    o has to be a Unicode object in the "canonical" representation
-    (not checked)."""
[email protected]("int _PyUnicode_Ready(PyObject *unicode)", error=-1)
+def _PyUnicode_Ready(space, w_obj):
     assert isinstance(w_obj, unicodeobject.W_UnicodeObject)
-    return space.len_w(w_obj)
-
-@cpython_api([PyObject], rffi.INT, error=CANNOT_FAIL)
-def PyUnicode_IS_READY(space, w_obj):
-    # PyPy is always ready.
-    return space.w_True
-
-@cpython_api([rffi.VOIDP], rffi.CWCHARP, error=CANNOT_FAIL)
-def PyUnicode_AS_UNICODE(space, ref):
-    """Return a pointer to the internal Py_UNICODE buffer of the object.  ref
-    has to be a PyUnicodeObject (not checked)."""
-    ref_unicode = rffi.cast(PyUnicodeObject, ref)
-    if not ref_unicode.c_buffer:
-        # Copy unicode buffer
-        w_unicode = from_ref(space, rffi.cast(PyObject, ref))
-        u = space.unicode_w(w_unicode)
-        ref_unicode.c_buffer = rffi.unicode2wcharp(u)
-    return ref_unicode.c_buffer
+    py_obj = as_pyobj(space, w_obj)
+    assert widen(get_kind(py_obj)) == WCHAR_KIND
+    maxchar = 0
+    for c in w_obj._value:
+        if ord(c) > maxchar:
+            maxchar = ord(c)
+            if maxchar > MAX_UNICODE:
+                raise oefmt(space.w_ValueError,
+                    "Character U+%d is not in range [U+0000; U+10ffff]",
+                    maxchar)
+    if maxchar < 256:
+        ucs1_data = rffi.str2charp(unicode_encode_latin_1(
+            w_obj._value, len(w_obj._value), errors='strict'))
+        set_data(py_obj, cts.cast('void*', ucs1_data))
+        set_kind(py_obj, _1BYTE_KIND)
+        if maxchar < 128:
+            set_ascii(py_obj, 1)
+            set_utf8(py_obj, cts.cast('char*', get_data(py_obj)))
+            set_utf8_len(py_obj, get_wsize(py_obj))
+        else:
+            set_ascii(py_obj, 0)
+            set_utf8(py_obj, cts.cast('char *', 0))
+            set_utf8_len(py_obj, 0)
+    elif maxchar < 65536:
+        # XXX: assumes that sizeof(wchar_t) == 4
+        ucs2_str = unicode_encode_utf_16(
+            w_obj._value, len(w_obj._value), errors='strict')
+        ucs2_data = cts.cast('Py_UCS2 *', rffi.str2charp(ucs2_str))
+        set_data(py_obj, cts.cast('void*', ucs2_data))
+        set_len(py_obj, get_wsize(py_obj))
+        set_kind(py_obj, _2BYTE_KIND)
+        set_utf8(py_obj, cts.cast('char *', 0))
+        set_utf8_len(py_obj, 0)
+    else:
+        # XXX: assumes that sizeof(wchar_t) == 4
+        ucs4_data = get_wbuffer(py_obj)
+        set_data(py_obj, cts.cast('void*', ucs4_data))
+        set_len(py_obj, get_wsize(py_obj))
+        set_kind(py_obj, _4BYTE_KIND)
+        set_utf8(py_obj, cts.cast('char *', 0))
+        set_utf8_len(py_obj, 0)
+    set_ready(py_obj, 1)
+    return 0
 
 @cpython_api([PyObject], rffi.CWCHARP)
 def PyUnicode_AsUnicode(space, ref):
@@ -232,19 +310,23 @@
     w_type = from_ref(space, rffi.cast(PyObject, ref.c_ob_type))
     if not space.issubtype_w(w_type, space.w_unicode):
         raise oefmt(space.w_TypeError, "expected unicode object")
-    return PyUnicode_AS_UNICODE(space, rffi.cast(rffi.VOIDP, ref))
+    if not get_wbuffer(ref):
+        # Copy unicode buffer
+        w_unicode = from_ref(space, rffi.cast(PyObject, ref))
+        u = space.unicode_w(w_unicode)
+        set_wbuffer(ref, rffi.unicode2wcharp(u))
+    return get_wbuffer(ref)
 
 @cts.decl("char * PyUnicode_AsUTF8(PyObject *unicode)")
 def PyUnicode_AsUTF8(space, ref):
-    ref_unicode = rffi.cast(PyUnicodeObject, ref)
-    if not ref_unicode.c_utf8buffer:
+    if not get_utf8(ref):
         # Copy unicode buffer
         w_unicode = from_ref(space, ref)
         w_encoded = unicodeobject.encode_object(space, w_unicode, "utf-8",
                                                 "strict")
         s = space.bytes_w(w_encoded)
-        ref_unicode.c_utf8buffer = rffi.str2charp(s)
-    return ref_unicode.c_utf8buffer
+        set_utf8(ref, rffi.str2charp(s))
+    return get_utf8(ref)
 
 @cpython_api([PyObject], Py_ssize_t, error=-1)
 def PyUnicode_GetSize(space, ref):
@@ -254,20 +336,19 @@
     Please migrate to using PyUnicode_GetLength().
     """
     if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_unicode:
-        ref = rffi.cast(PyUnicodeObject, ref)
-        return ref.c_length
+        return get_wsize(ref)
     else:
         w_obj = from_ref(space, ref)
         return space.len_w(w_obj)
 
 @cpython_api([PyObject], Py_ssize_t, error=-1)
-def PyUnicode_GetLength(space, w_unicode):
+def PyUnicode_GetLength(space, unicode):
     """Return the length of the Unicode object, in code points."""
     # XXX: this is a stub
-    if not PyUnicode_Check(space, w_unicode):
+    if not PyUnicode_Check(space, unicode):
         PyErr_BadArgument(space)
     #PyUnicode_READY(w_unicode)
-    return PyUnicode_GET_LENGTH(space, w_unicode)
+    return get_len(unicode)
 
 @cpython_api([PyObject, rffi.CWCHARP, Py_ssize_t], Py_ssize_t, error=-1)
 def PyUnicode_AsWideChar(space, ref, buf, size):
@@ -278,9 +359,8 @@
     string may or may not be 0-terminated.  It is the responsibility of the 
caller
     to make sure that the wchar_t string is 0-terminated in case this is
     required by the application."""
-    ref = rffi.cast(PyUnicodeObject, ref)
-    c_buffer = PyUnicode_AS_UNICODE(space, rffi.cast(rffi.VOIDP, ref))
-    c_length = ref.c_length
+    c_buffer = PyUnicode_AsUnicode(space, ref)
+    c_length = get_wsize(ref)
 
     # If possible, try to copy the 0-termination as well
     if size > c_length:
@@ -359,7 +439,7 @@
         s = rffi.wcharpsize2unicode(wchar_p, length)
         return make_ref(space, space.wrap(s))
     else:
-        return rffi.cast(PyObject, new_empty_unicode(space, length))
+        return new_empty_unicode(space, length)
 
 @cpython_api([CONST_WSTRING, Py_ssize_t], PyObject, result_is_ll=True)
 def PyUnicode_FromWideChar(space, wchar_p, length):
@@ -550,7 +630,7 @@
         return make_ref(space, PyUnicode_DecodeUTF8(
             space, s, size, lltype.nullptr(rffi.CCHARP.TO)))
     else:
-        return rffi.cast(PyObject, new_empty_unicode(space, size))
+        return new_empty_unicode(space, size)
 
 @cpython_api([rffi.INT_real], PyObject)
 def PyUnicode_FromOrdinal(space, ordinal):
@@ -565,8 +645,8 @@
 @cpython_api([PyObjectP, Py_ssize_t], rffi.INT_real, error=-1)
 def PyUnicode_Resize(space, ref, newsize):
     # XXX always create a new string so far
-    py_uni = rffi.cast(PyUnicodeObject, ref[0])
-    if not py_uni.c_buffer:
+    py_obj = ref[0]
+    if not get_wbuffer(py_obj):
         raise oefmt(space.w_SystemError,
                     "PyUnicode_Resize called on already created string")
     try:
@@ -576,11 +656,11 @@
         ref[0] = lltype.nullptr(PyObject.TO)
         raise
     to_cp = newsize
-    oldsize = py_uni.c_length
+    oldsize = get_wsize(py_obj)
     if oldsize < newsize:
         to_cp = oldsize
     for i in range(to_cp):
-        py_newuni.c_buffer[i] = py_uni.c_buffer[i]
+        get_wbuffer(py_newuni)[i] = get_wbuffer(py_obj)[i]
     Py_DecRef(space, ref[0])
     ref[0] = rffi.cast(PyObject, py_newuni)
     return 0
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -196,7 +196,7 @@
     def newseqiter(self, x):
         return w_some_obj()
 
-    def newbuffer(self, x):
+    def newbuffer(self, x, itemsize=1):
         return w_some_obj()
 
     def marshal_w(self, w_obj):
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -295,7 +295,7 @@
         return W_LongObject.fromint(self, val)
 
     @specialize.argtype(1)
-    def newlong_from_rarith_int(self, val): # val is an rarithmetic type 
+    def newlong_from_rarith_int(self, val): # val is an rarithmetic type
         return W_LongObject.fromrarith_int(val)
 
     def newlong_from_rbigint(self, val):
@@ -350,8 +350,8 @@
     def newseqiter(self, w_obj):
         return W_SeqIterObject(w_obj)
 
-    def newbuffer(self, w_obj):
-        return W_MemoryView(w_obj)
+    def newbuffer(self, w_obj, itemsize=1):
+        return W_MemoryView(w_obj, itemsize=itemsize)
 
     def newbytes(self, s):
         return W_BytesObject(s)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3.5: Merged in PEP393 (pull request #513)

Reply via email to