Author: Matti Picus <[email protected]>
Branch:
Changeset: r85258:38f3eaef5d7b
Date: 2016-06-20 21:52 +0300
http://bitbucket.org/pypy/pypy/changeset/38f3eaef5d7b/
Log: merge rw-PyString_AS_STRING which allows writing to a non-forced
char *
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1202,8 +1202,6 @@
cpyext_type_init = self.cpyext_type_init
self.cpyext_type_init = None
for pto, w_type in cpyext_type_init:
- if space.is_w(w_type, space.w_str):
- pto.c_tp_itemsize = 1
finish_type_1(space, pto)
finish_type_2(space, pto, w_type)
diff --git a/pypy/module/cpyext/bytesobject.py
b/pypy/module/cpyext/bytesobject.py
--- a/pypy/module/cpyext/bytesobject.py
+++ b/pypy/module/cpyext/bytesobject.py
@@ -6,7 +6,9 @@
from pypy.module.cpyext.pyerrors import PyErr_BadArgument
from pypy.module.cpyext.pyobject import (
PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
- make_typedescr, get_typedescr, as_pyobj, Py_IncRef, get_w_obj_and_decref)
+ make_typedescr, get_typedescr, as_pyobj, Py_IncRef, get_w_obj_and_decref,
+ pyobj_has_w_obj)
+from pypy.objspace.std.bytesobject import W_BytesObject
##
## Implementation of PyStringObject
@@ -16,7 +18,7 @@
## -----------
##
## PyString_AsString() must return a (non-movable) pointer to the underlying
-## buffer, whereas pypy strings are movable. C code may temporarily store
+## ob_sval, whereas pypy strings are movable. C code may temporarily store
## this address and use it, as long as it owns a reference to the PyObject.
## There is no "release" function to specify that the pointer is not needed
## any more.
@@ -28,7 +30,7 @@
## --------
##
## PyStringObject contains two additional members: the ob_size and a pointer
to a
-## char buffer; it may be NULL.
+## char ob_sval; it may be NULL.
##
## - A string allocated by pypy will be converted into a PyStringObject with a
## NULL buffer. The first time PyString_AsString() is called, memory is
@@ -41,6 +43,9 @@
## the pypy string is created, and added to the global map of tracked
## objects. The buffer is then supposed to be immutable.
##
+##- A buffer obtained from PyString_AS_STRING() could be mutable iff
+## there is no corresponding pypy object for the string
+##
## - _PyString_Resize() works only on not-yet-pypy'd strings, and returns a
## similar object.
##
@@ -53,7 +58,7 @@
PyStringObjectStruct = lltype.ForwardReference()
PyStringObject = lltype.Ptr(PyStringObjectStruct)
PyStringObjectFields = PyVarObjectFields + \
- (("ob_shash", rffi.LONG), ("ob_sstate", rffi.INT), ("buffer", rffi.CCHARP))
+ (("ob_shash", rffi.LONG), ("ob_sstate", rffi.INT), ("ob_sval",
rffi.CArray(lltype.Char)))
cpython_struct("PyStringObject", PyStringObjectFields, PyStringObjectStruct)
@bootstrap_function
@@ -69,44 +74,43 @@
def new_empty_str(space, length):
"""
- Allocate a PyStringObject and its buffer, but without a corresponding
- interpreter object. The buffer may be mutated, until string_realize() is
+ Allocate a PyStringObject and its ob_sval, but without a corresponding
+ interpreter object. The ob_sval may be mutated, until string_realize() is
called. Refcount of the result is 1.
"""
typedescr = get_typedescr(space.w_str.layout.typedef)
- py_obj = typedescr.allocate(space, space.w_str)
+ py_obj = typedescr.allocate(space, space.w_str, length)
py_str = rffi.cast(PyStringObject, py_obj)
-
- buflen = length + 1
- py_str.c_ob_size = length
- py_str.c_buffer = lltype.malloc(rffi.CCHARP.TO, buflen,
- flavor='raw', zero=True,
- add_memory_pressure=True)
+ py_str.c_ob_shash = -1
py_str.c_ob_sstate = rffi.cast(rffi.INT, 0) # SSTATE_NOT_INTERNED
return py_str
def string_attach(space, py_obj, w_obj):
"""
- Fills a newly allocated PyStringObject with the given string object. The
- buffer must not be modified.
+ Copy RPython string object contents to a PyStringObject. The
+ c_ob_sval must not be modified.
"""
py_str = rffi.cast(PyStringObject, py_obj)
- py_str.c_ob_size = len(space.str_w(w_obj))
- py_str.c_buffer = lltype.nullptr(rffi.CCHARP.TO)
+ s = space.str_w(w_obj)
+ if py_str.c_ob_size < len(s):
+ raise oefmt(space.w_ValueError,
+ "string_attach called on object with ob_size %d but trying to
store %d",
+ py_str.c_ob_size, len(s))
+ rffi.c_memcpy(py_str.c_ob_sval, rffi.str2charp(s), len(s))
+ py_str.c_ob_sval[len(s)] = '\0'
py_str.c_ob_shash = space.hash_w(w_obj)
py_str.c_ob_sstate = rffi.cast(rffi.INT, 1) # SSTATE_INTERNED_MORTAL
def string_realize(space, py_obj):
"""
- Creates the string in the interpreter. The PyStringObject buffer must not
+ Creates the string in the interpreter. The PyStringObject ob_sval must not
be modified after this call.
"""
py_str = rffi.cast(PyStringObject, py_obj)
- if not py_str.c_buffer:
- py_str.c_buffer = lltype.malloc(rffi.CCHARP.TO, py_str.c_ob_size + 1,
- flavor='raw', zero=True)
- s = rffi.charpsize2str(py_str.c_buffer, py_str.c_ob_size)
- w_obj = space.wrap(s)
+ s = rffi.charpsize2str(py_str.c_ob_sval, py_str.c_ob_size)
+ w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
+ w_obj = space.allocate_instance(W_BytesObject, w_type)
+ w_obj.__init__(s)
py_str.c_ob_shash = space.hash_w(w_obj)
py_str.c_ob_sstate = rffi.cast(rffi.INT, 1) # SSTATE_INTERNED_MORTAL
track_reference(space, py_obj, w_obj)
@@ -116,9 +120,6 @@
def string_dealloc(space, py_obj):
"""Frees allocated PyStringObject resources.
"""
- py_str = rffi.cast(PyStringObject, py_obj)
- if py_str.c_buffer:
- lltype.free(py_str.c_buffer, flavor="raw")
from pypy.module.cpyext.object import PyObject_dealloc
PyObject_dealloc(space, py_obj)
@@ -139,6 +140,9 @@
@cpython_api([PyObject], rffi.CCHARP, error=0)
def PyString_AsString(space, ref):
+ return _PyString_AsString(space, ref)
+
+def _PyString_AsString(space, ref):
if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_str:
pass # typecheck returned "ok" without forcing 'ref' at all
elif not PyString_Check(space, ref): # otherwise, use the alternate way
@@ -151,15 +155,23 @@
"expected string or Unicode object, %T found",
from_ref(space, ref))
ref_str = rffi.cast(PyStringObject, ref)
- if not ref_str.c_buffer:
- # copy string buffer
- w_str = from_ref(space, ref)
- s = space.str_w(w_str)
- ref_str.c_buffer = rffi.str2charp(s)
- return ref_str.c_buffer
+ if not pyobj_has_w_obj(ref):
+ # XXX Force the ref?
+ string_realize(space, ref)
+ return ref_str.c_ob_sval
+
+@cpython_api([rffi.VOIDP], rffi.CCHARP, error=0)
+def PyString_AS_STRING(space, void_ref):
+ ref = rffi.cast(PyObject, void_ref)
+ # if no w_str is associated with this ref,
+ # return the c-level ptr as RW
+ if not pyobj_has_w_obj(ref):
+ py_str = rffi.cast(PyStringObject, ref)
+ return py_str.c_ob_sval
+ return _PyString_AsString(space, ref)
@cpython_api([PyObject, rffi.CCHARPP, rffi.CArrayPtr(Py_ssize_t)],
rffi.INT_real, error=-1)
-def PyString_AsStringAndSize(space, ref, buffer, length):
+def PyString_AsStringAndSize(space, ref, data, length):
if not PyString_Check(space, ref):
from pypy.module.cpyext.unicodeobject import (
PyUnicode_Check, _PyUnicode_AsDefaultEncodedString)
@@ -169,18 +181,16 @@
raise oefmt(space.w_TypeError,
"expected string or Unicode object, %T found",
from_ref(space, ref))
+ if not pyobj_has_w_obj(ref):
+ # force the ref
+ string_realize(space, ref)
ref_str = rffi.cast(PyStringObject, ref)
- if not ref_str.c_buffer:
- # copy string buffer
- w_str = from_ref(space, ref)
- s = space.str_w(w_str)
- ref_str.c_buffer = rffi.str2charp(s)
- buffer[0] = ref_str.c_buffer
+ data[0] = ref_str.c_ob_sval
if length:
length[0] = ref_str.c_ob_size
else:
i = 0
- while ref_str.c_buffer[i] != '\0':
+ while ref_str.c_ob_sval[i] != '\0':
i += 1
if i != ref_str.c_ob_size:
raise oefmt(space.w_TypeError,
@@ -209,10 +219,10 @@
set to NULL, a memory exception is set, and -1 is returned.
"""
# XXX always create a new string so far
- py_str = rffi.cast(PyStringObject, ref[0])
- if not py_str.c_buffer:
+ if pyobj_has_w_obj(ref[0]):
raise oefmt(space.w_SystemError,
"_PyString_Resize called on already created string")
+ py_str = rffi.cast(PyStringObject, ref[0])
try:
py_newstr = new_empty_str(space, newsize)
except MemoryError:
@@ -224,7 +234,7 @@
if oldsize < newsize:
to_cp = oldsize
for i in range(to_cp):
- py_newstr.c_buffer[i] = py_str.c_buffer[i]
+ py_newstr.c_ob_sval[i] = py_str.c_ob_sval[i]
Py_DecRef(space, ref[0])
ref[0] = rffi.cast(PyObject, py_newstr)
return 0
diff --git a/pypy/module/cpyext/include/stringobject.h
b/pypy/module/cpyext/include/stringobject.h
--- a/pypy/module/cpyext/include/stringobject.h
+++ b/pypy/module/cpyext/include/stringobject.h
@@ -10,7 +10,6 @@
#include <stdarg.h>
#define PyString_GET_SIZE(op) PyString_Size((PyObject*)(op))
-#define PyString_AS_STRING(op) PyString_AsString((PyObject*)(op))
/*
Type PyStringObject represents a character string. An extra zero byte is
reserved at the end to ensure it is zero-terminated, but a size is
@@ -41,12 +40,11 @@
PyObject_VAR_HEAD
long ob_shash;
int ob_sstate;
- char * buffer; /* change the name from cpython so all non-api c access is
thwarted */
+ char ob_sval[1];
/* Invariants
- * (not relevant in PyPy, all stringobjects are backed by a pypy object)
- * buffer contains space for 'ob_size+1' elements.
- * buffer[ob_size] == 0.
+ * ob_sval contains space for 'ob_size+1' elements.
+ * ob_sval[ob_size] == 0.
* ob_shash is the hash of the string or -1 if not computed yet.
* ob_sstate != 0 iff the string object is in stringobject.c's
* 'interned' dictionary; in this case the two references
diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py
--- a/pypy/module/cpyext/pyobject.py
+++ b/pypy/module/cpyext/pyobject.py
@@ -7,7 +7,7 @@
from pypy.module.cpyext.api import (
cpython_api, bootstrap_function, PyObject, PyObjectP, ADDR,
CANNOT_FAIL, Py_TPFLAGS_HEAPTYPE, PyTypeObjectPtr, is_PyObject,
- INTERPLEVEL_API)
+ INTERPLEVEL_API, PyVarObject)
from pypy.module.cpyext.state import State
from pypy.objspace.std.typeobject import W_TypeObject
from pypy.objspace.std.objectobject import W_ObjectObject
@@ -47,13 +47,16 @@
size = pytype.c_tp_basicsize
else:
size = rffi.sizeof(self.basestruct)
- if itemcount and w_type is not space.w_str:
+ if pytype.c_tp_itemsize:
size += itemcount * pytype.c_tp_itemsize
assert size >= rffi.sizeof(PyObject.TO)
buf = lltype.malloc(rffi.VOIDP.TO, size,
flavor='raw', zero=True,
add_memory_pressure=True)
pyobj = rffi.cast(PyObject, buf)
+ if pytype.c_tp_itemsize:
+ pyvarobj = rffi.cast(PyVarObject, pyobj)
+ pyvarobj.c_ob_size = itemcount
pyobj.c_ob_refcnt = 1
#pyobj.c_ob_pypy_link should get assigned very quickly
pyobj.c_ob_type = pytype
@@ -152,13 +155,18 @@
class InvalidPointerException(Exception):
pass
-def create_ref(space, w_obj, itemcount=0):
+def create_ref(space, w_obj):
"""
Allocates a PyObject, and fills its fields with info from the given
interpreter object.
"""
w_type = space.type(w_obj)
+ pytype = rffi.cast(PyTypeObjectPtr, as_pyobj(space, w_type))
typedescr = get_typedescr(w_obj.typedef)
+ if pytype.c_tp_itemsize != 0:
+ itemcount = space.len_w(w_obj) # PyStringObject and subclasses
+ else:
+ itemcount = 0
py_obj = typedescr.allocate(space, w_type, itemcount=itemcount)
track_reference(space, py_obj, w_obj)
#
diff --git a/pypy/module/cpyext/src/stringobject.c
b/pypy/module/cpyext/src/stringobject.c
--- a/pypy/module/cpyext/src/stringobject.c
+++ b/pypy/module/cpyext/src/stringobject.c
@@ -107,7 +107,7 @@
if (!string)
return NULL;
- s = PyString_AsString(string);
+ s = PyString_AS_STRING(string);
for (f = format; *f; f++) {
if (*f == '%') {
diff --git a/pypy/module/cpyext/test/test_bytesobject.py
b/pypy/module/cpyext/test/test_bytesobject.py
--- a/pypy/module/cpyext/test/test_bytesobject.py
+++ b/pypy/module/cpyext/test/test_bytesobject.py
@@ -25,14 +25,13 @@
("test_Size", "METH_NOARGS",
"""
PyObject* s = PyString_FromString("Hello world");
- int result = 0;
+ int result;
size_t expected_size;
- if(PyString_Size(s) == 11) {
- result = 1;
- }
+ result = PyString_Size(s);
+
#ifdef PYPY_VERSION
- expected_size = sizeof(void*)*7;
+ expected_size = 48;
#elif defined Py_DEBUG
expected_size = 53;
#else
@@ -44,7 +43,7 @@
result = 0;
}
Py_DECREF(s);
- return PyBool_FromLong(result);
+ return PyLong_FromLong(result);
"""),
("test_Size_exception", "METH_NOARGS",
"""
@@ -60,7 +59,7 @@
""")], prologue='#include <stdlib.h>')
assert module.get_hello1() == 'Hello world'
assert module.get_hello2() == 'Hello world'
- assert module.test_Size()
+ assert module.test_Size() == 11
raises(TypeError, module.test_Size_exception)
assert module.test_is_string("")
@@ -81,7 +80,7 @@
if (t == NULL)
return NULL;
Py_DECREF(t);
- c = PyString_AsString(s);
+ c = PyString_AS_STRING(s);
c[0] = 'a';
c[1] = 'b';
c[2] = 0;
@@ -110,14 +109,23 @@
obj = (PyStringObject*)type->tp_alloc(type, 10);
if (PyString_GET_SIZE(obj) != 10)
return PyLong_FromLong(PyString_GET_SIZE(obj));
- /* cannot work, there is only RO access
- memcpy(PyString_AS_STRING(obj), "works", 6); */
+ /* cannot work, there is only RO access */
+ memcpy(PyString_AS_STRING(obj), "works", 6);
Py_INCREF(obj);
return (PyObject*)obj;
"""),
+ ('alloc_rw', "METH_NOARGS",
+ '''
+ PyObject *obj = _PyObject_NewVar(&PyString_Type, 10);
+ char * buf = PyString_AS_STRING(obj);
+ memcpy(PyString_AS_STRING(obj), "works", 6);
+ return (PyObject*)obj;
+ '''),
])
+ s = module.alloc_rw()
+ assert s == 'works' + '\x00' * 5
s = module.tpalloc()
- assert s == '\x00' * 10
+ assert s == 'works' + '\x00' * 5
def test_AsString(self):
module = self.import_extension('foo', [
@@ -332,27 +340,127 @@
# doesn't really test, but if printf is enabled will prove sstate
assert module.test_sstate()
+ def test_subclass(self):
+ # taken from PyStringArrType_Type in numpy's scalartypes.c.src
+ module = self.import_extension('bar', [
+ ("newsubstr", "METH_O",
+ """
+ PyObject * obj;
+ char * data;
+ int len;
+ PyType_Ready(&PyStringArrType_Type);
+
+ data = PyString_AS_STRING(args);
+ len = PyString_GET_SIZE(args);
+ if (data == NULL || len < 1)
+ Py_RETURN_NONE;
+ obj = PyArray_Scalar(data, len);
+ return obj;
+ """),
+ ], prologue="""
+ #include <Python.h>
+ PyTypeObject PyStringArrType_Type = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ "bar.string_", /* tp_name*/
+ sizeof(PyStringObject), /* tp_basicsize*/
+ 0 /* tp_itemsize */
+ };
+
+ static PyObject *
+ stringtype_repr(PyObject *self)
+ {
+ const char *dptr, *ip;
+ int len;
+ PyObject *new;
+ PyObject *ret;
+
+ ip = dptr = PyString_AS_STRING(self);
+ len = PyString_GET_SIZE(self);
+ dptr += len-1;
+ while(len > 0 && *dptr-- == 0) {
+ len--;
+ }
+ new = PyString_FromStringAndSize(ip, len);
+ if (new == NULL) {
+ return PyString_FromString("");
+ }
+ return new;
+ }
+
+ static PyObject *
+ stringtype_str(PyObject *self)
+ {
+ const char *dptr, *ip;
+ int len;
+ PyObject *new;
+ PyObject *ret;
+
+ ip = dptr = PyString_AS_STRING(self);
+ len = PyString_GET_SIZE(self);
+ dptr += len-1;
+ while(len > 0 && *dptr-- == 0) {
+ len--;
+ }
+ new = PyString_FromStringAndSize(ip, len);
+ if (new == NULL) {
+ return PyString_FromString("");
+ }
+ return new;
+ }
+
+ PyObject *
+ PyArray_Scalar(char *data, int n)
+ {
+ PyTypeObject *type = &PyStringArrType_Type;
+ PyObject *obj;
+ void *destptr;
+ int type_num;
+ int itemsize = n;
+ obj = type->tp_alloc(type, itemsize);
+ if (obj == NULL) {
+ return NULL;
+ }
+ destptr = PyString_AS_STRING(obj);
+ ((PyStringObject *)obj)->ob_shash = -1;
+ memcpy(destptr, data, itemsize);
+ return obj;
+ }
+ """, more_init = '''
+ PyStringArrType_Type.tp_alloc = NULL;
+ PyStringArrType_Type.tp_free = NULL;
+
+ PyStringArrType_Type.tp_repr = stringtype_repr;
+ PyStringArrType_Type.tp_str = stringtype_str;
+ PyStringArrType_Type.tp_flags =
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE;
+ PyStringArrType_Type.tp_itemsize = sizeof(char);
+ PyStringArrType_Type.tp_base = &PyString_Type;
+ ''')
+
+ a = module.newsubstr('abc')
+ assert type(a).__name__ == 'string_'
+ assert a == 'abc'
class TestString(BaseApiTest):
def test_string_resize(self, space, api):
py_str = new_empty_str(space, 10)
ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw')
- py_str.c_buffer[0] = 'a'
- py_str.c_buffer[1] = 'b'
- py_str.c_buffer[2] = 'c'
+ py_str.c_ob_sval[0] = 'a'
+ py_str.c_ob_sval[1] = 'b'
+ py_str.c_ob_sval[2] = 'c'
ar[0] = rffi.cast(PyObject, py_str)
api._PyString_Resize(ar, 3)
py_str = rffi.cast(PyStringObject, ar[0])
assert py_str.c_ob_size == 3
- assert py_str.c_buffer[1] == 'b'
- assert py_str.c_buffer[3] == '\x00'
+ assert py_str.c_ob_sval[1] == 'b'
+ assert py_str.c_ob_sval[3] == '\x00'
# the same for growing
ar[0] = rffi.cast(PyObject, py_str)
api._PyString_Resize(ar, 10)
py_str = rffi.cast(PyStringObject, ar[0])
assert py_str.c_ob_size == 10
- assert py_str.c_buffer[1] == 'b'
- assert py_str.c_buffer[10] == '\x00'
+ assert py_str.c_ob_sval[1] == 'b'
+ assert py_str.c_ob_sval[10] == '\x00'
Py_DecRef(space, ar[0])
lltype.free(ar, flavor='raw')
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -657,6 +657,8 @@
pto.c_tp_dealloc = llhelper(
subtype_dealloc.api_func.functype,
subtype_dealloc.api_func.get_wrapper(space))
+ if space.is_w(w_type, space.w_str):
+ pto.c_tp_itemsize = 1
# buffer protocol
setup_buffer_procs(space, w_type, pto)
@@ -695,6 +697,8 @@
if pto.c_tp_base:
if pto.c_tp_base.c_tp_basicsize > pto.c_tp_basicsize:
pto.c_tp_basicsize = pto.c_tp_base.c_tp_basicsize
+ if pto.c_tp_itemsize < pto.c_tp_base.c_tp_itemsize:
+ pto.c_tp_itemsize = pto.c_tp_base.c_tp_itemsize
# will be filled later on with the correct value
# may not be 0
diff --git a/pypy/module/cpyext/unicodeobject.py
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -77,7 +77,9 @@
"""
py_uni = rffi.cast(PyUnicodeObject, py_obj)
s = rffi.wcharpsize2unicode(py_uni.c_str, py_uni.c_length)
- w_obj = space.wrap(s)
+ w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
+ w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
+ w_obj.__init__(s)
py_uni.c_hash = space.hash_w(w_obj)
track_reference(space, py_obj, w_obj)
return w_obj
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit