Author: Jeff Terrace <jterr...@gmail.com> Branch: Changeset: r50592:65311ed125b7 Date: 2011-12-15 15:21 -0500 http://bitbucket.org/pypy/pypy/changeset/65311ed125b7/
Log: Merged numpy-full-fromstring diff --git a/pypy/module/micronumpy/interp_support.py b/pypy/module/micronumpy/interp_support.py --- a/pypy/module/micronumpy/interp_support.py +++ b/pypy/module/micronumpy/interp_support.py @@ -1,34 +1,90 @@ -from pypy.interpreter.error import OperationError +from pypy.interpreter.error import OperationError, operationerrfmt from pypy.interpreter.gateway import unwrap_spec -from pypy.module.micronumpy.interp_dtype import get_dtype_cache -from pypy.rlib.rstruct.runpack import runpack from pypy.rpython.lltypesystem import lltype, rffi +from pypy.module.micronumpy import interp_dtype +from pypy.objspace.std.strutil import strip_spaces FLOAT_SIZE = rffi.sizeof(lltype.Float) -@unwrap_spec(s=str) -def fromstring(space, s): +def _fromstring_text(space, s, count, sep, length, dtype): from pypy.module.micronumpy.interp_numarray import W_NDimArray + + sep_stripped = strip_spaces(sep) + skip_bad_vals = len(sep_stripped) == 0 + + items = [] + num_items = 0 + idx = 0 + + while (num_items < count or count == -1) and idx < len(s): + nextidx = s.find(sep, idx) + if nextidx < 0: + nextidx = length + piece = strip_spaces(s[idx:nextidx]) + if len(piece) > 0 or not skip_bad_vals: + if len(piece) == 0 and not skip_bad_vals: + val = dtype.itemtype.default_fromstring(space) + else: + try: + val = dtype.coerce(space, space.wrap(piece)) + except OperationError, e: + if not e.match(space, space.w_ValueError): + raise + gotit = False + while not gotit and len(piece) > 0: + piece = piece[:-1] + try: + val = dtype.coerce(space, space.wrap(piece)) + gotit = True + except OperationError, e: + if not e.match(space, space.w_ValueError): + raise + if not gotit: + val = dtype.itemtype.default_fromstring(space) + nextidx = length + items.append(val) + num_items += 1 + idx = nextidx + 1 + + if count > num_items: + raise OperationError(space.w_ValueError, space.wrap( + "string is smaller than requested size")) + + a = W_NDimArray(num_items, [num_items], dtype=dtype) + for i, val in enumerate(items): + a.dtype.setitem(a.storage, i, val) + + return space.wrap(a) + +def _fromstring_bin(space, s, count, length, dtype): + from pypy.module.micronumpy.interp_numarray import W_NDimArray + + itemsize = dtype.itemtype.get_element_size() + if count == -1: + count = length / itemsize + if length % itemsize != 0: + raise operationerrfmt(space.w_ValueError, + "string length %d not divisable by item size %d", + length, itemsize) + if count * itemsize > length: + raise OperationError(space.w_ValueError, space.wrap( + "string is smaller than requested size")) + + a = W_NDimArray(count, [count], dtype=dtype) + for i in range(count): + val = dtype.itemtype.runpack_str(s[i*itemsize:i*itemsize + itemsize]) + a.dtype.setitem(a.storage, i, val) + + return space.wrap(a) + +@unwrap_spec(s=str, count=int, sep=str) +def fromstring(space, s, w_dtype=None, count=-1, sep=''): + dtype = space.interp_w(interp_dtype.W_Dtype, + space.call_function(space.gettypefor(interp_dtype.W_Dtype), w_dtype) + ) length = len(s) - - if length % FLOAT_SIZE == 0: - number = length/FLOAT_SIZE + if sep == '': + return _fromstring_bin(space, s, count, length, dtype) else: - raise OperationError(space.w_ValueError, space.wrap( - "string length %d not divisable by %d" % (length, FLOAT_SIZE))) - - dtype = get_dtype_cache(space).w_float64dtype - a = W_NDimArray(number, [number], dtype=dtype) - - start = 0 - end = FLOAT_SIZE - i = 0 - while i < number: - part = s[start:end] - a.dtype.setitem(a.storage, i, dtype.box(runpack('d', part))) - i += 1 - start += FLOAT_SIZE - end += FLOAT_SIZE - - return space.wrap(a) + return _fromstring_text(space, s, count, sep, length, dtype) diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py --- a/pypy/module/micronumpy/test/test_numarray.py +++ b/pypy/module/micronumpy/test/test_numarray.py @@ -1194,13 +1194,107 @@ import struct BaseNumpyAppTest.setup_class.im_func(cls) cls.w_data = cls.space.wrap(struct.pack('dddd', 1, 2, 3, 4)) + cls.w_fdata = cls.space.wrap(struct.pack('f', 2.3)) + cls.w_float32val = cls.space.wrap(struct.pack('f', 5.2)) + cls.w_float64val = cls.space.wrap(struct.pack('d', 300.4)) def test_fromstring(self): - from numpypy import fromstring + from numpypy import fromstring, array, uint8, float32, int32 + import sys a = fromstring(self.data) for i in range(4): assert a[i] == i + 1 - raises(ValueError, fromstring, "abc") + b = fromstring('\x01\x02', dtype=uint8) + assert a[0] == 1 + assert a[1] == 2 + c = fromstring(self.fdata, dtype=float32) + assert c[0] == float32(2.3) + d = fromstring("1 2", sep=' ', count=2, dtype=uint8) + assert len(d) == 2 + assert d[0] == 1 + assert d[1] == 2 + e = fromstring('3, 4,5', dtype=uint8, sep=',') + assert len(e) == 3 + assert e[0] == 3 + assert e[1] == 4 + assert e[2] == 5 + f = fromstring('\x01\x02\x03\x04\x05', dtype=uint8, count=3) + assert len(f) == 3 + assert f[0] == 1 + assert f[1] == 2 + assert f[2] == 3 + g = fromstring("1 2 3 ", dtype=uint8, sep=" ") + assert len(g) == 3 + assert g[0] == 1 + assert g[1] == 2 + assert g[2] == 3 + h = fromstring("1, , 2, 3", dtype=uint8, sep=",") + assert (h == [1,0,2,3]).all() + i = fromstring("1 2 3", dtype=uint8, sep=" ") + assert (i == [1,2,3]).all() + j = fromstring("1\t\t\t\t2\t3", dtype=uint8, sep="\t") + assert (j == [1,2,3]).all() + k = fromstring("1,x,2,3", dtype=uint8, sep=",") + assert (k == [1,0]).all() + l = fromstring("1,x,2,3", dtype='float32', sep=",") + assert (l == [1.0,-1.0]).all() + m = fromstring("1,,2,3", sep=",") + assert (m == [1.0,-1.0,2.0,3.0]).all() + n = fromstring("3.4 2.0 3.8 2.2", dtype=int32, sep=" ") + assert (n == [3]).all() + o = fromstring("1.0 2f.0f 3.8 2.2", dtype=float32, sep=" ") + assert len(o) == 2 + assert o[0] == 1.0 + assert o[1] == 2.0 + p = fromstring("1.0,,2.0,3.0", sep=",") + assert (p == [1.0, -1.0, 2.0, 3.0]).all() + q = fromstring("1.0,,2.0,3.0", sep=" ") + assert (q == [1.0]).all() + r = fromstring("\x01\x00\x02", dtype='bool') + assert (r == [True, False, True]).all() + s = fromstring("1,2,3,,5", dtype=bool, sep=",") + assert (s == [True, True, True, False, True]).all() + t = fromstring("", bool) + assert (t == []).all() + u = fromstring("\x01\x00\x00\x00\x00\x00\x00\x00", dtype=int) + if sys.maxint > 2 ** 31 - 1: + assert (u == [1]).all() + else: + assert (u == [1, 0]).all() + + def test_fromstring_types(self): + from numpypy import fromstring + from numpypy import int8, int16, int32, int64 + from numpypy import uint8, uint16, uint32 + from numpypy import float32, float64 + a = fromstring('\xFF', dtype=int8) + assert a[0] == -1 + b = fromstring('\xFF', dtype=uint8) + assert b[0] == 255 + c = fromstring('\xFF\xFF', dtype=int16) + assert c[0] == -1 + d = fromstring('\xFF\xFF', dtype=uint16) + assert d[0] == 65535 + e = fromstring('\xFF\xFF\xFF\xFF', dtype=int32) + assert e[0] == -1 + f = fromstring('\xFF\xFF\xFF\xFF', dtype=uint32) + assert repr(f[0]) == '4294967295' + g = fromstring('\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF', dtype=int64) + assert g[0] == -1 + h = fromstring(self.float32val, dtype=float32) + assert h[0] == float32(5.2) + i = fromstring(self.float64val, dtype=float64) + assert i[0] == float64(300.4) + + + def test_fromstring_invalid(self): + from numpypy import fromstring, uint16, uint8, int32 + #default dtype is 64-bit float, so 3 bytes should fail + raises(ValueError, fromstring, "\x01\x02\x03") + #3 bytes is not modulo 2 bytes (int16) + raises(ValueError, fromstring, "\x01\x03\x03", dtype=uint16) + #5 bytes is larger than 3 bytes + raises(ValueError, fromstring, "\x01\x02\x03", count=5, dtype=uint8) class AppTestRepr(BaseNumpyAppTest): diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py --- a/pypy/module/micronumpy/types.py +++ b/pypy/module/micronumpy/types.py @@ -8,6 +8,7 @@ from pypy.rlib.objectmodel import specialize from pypy.rlib.rarithmetic import LONG_BIT, widen from pypy.rpython.lltypesystem import lltype, rffi +from pypy.rlib.rstruct.runpack import runpack def simple_unary_op(func): @@ -55,6 +56,8 @@ class Primitive(object): _mixin_ = True + format_code = '?' + def get_element_size(self): return rffi.sizeof(self.T) @@ -84,6 +87,9 @@ def _coerce(self, space, w_item): raise NotImplementedError + def default_fromstring(self, space): + raise NotImplementedError + def read(self, storage, width, i, offset): return self.box(libffi.array_getitem(clibffi.cast_type_to_ffitype(self.T), width, storage, i, offset @@ -102,6 +108,9 @@ width, storage, i, offset, value ) + def runpack_str(self, s): + return self.box(runpack(self.format_code, s)) + @simple_binary_op def add(self, v1, v2): return v1 + v2 @@ -164,6 +173,7 @@ class Bool(BaseType, Primitive): T = lltype.Bool BoxType = interp_boxes.W_BoolBox + format_code = '?' True = BoxType(True) False = BoxType(False) @@ -192,6 +202,9 @@ def for_computation(self, v): return int(v) + + def default_fromstring(self, space): + return self.box(False) class Integer(Primitive): _mixin_ = True @@ -205,6 +218,9 @@ def for_computation(self, v): return widen(v) + + def default_fromstring(self, space): + return self.box(0) @simple_binary_op def div(self, v1, v2): @@ -241,30 +257,37 @@ class Int8(BaseType, Integer): T = rffi.SIGNEDCHAR BoxType = interp_boxes.W_Int8Box + format_code = "b" class UInt8(BaseType, Integer): T = rffi.UCHAR BoxType = interp_boxes.W_UInt8Box + format_code = "B" class Int16(BaseType, Integer): T = rffi.SHORT BoxType = interp_boxes.W_Int16Box + format_code = "h" class UInt16(BaseType, Integer): T = rffi.USHORT BoxType = interp_boxes.W_UInt16Box + format_code = "H" class Int32(BaseType, Integer): T = rffi.INT BoxType = interp_boxes.W_Int32Box + format_code = "i" class UInt32(BaseType, Integer): T = rffi.UINT BoxType = interp_boxes.W_UInt32Box + format_code = "I" class Long(BaseType, Integer): T = rffi.LONG BoxType = interp_boxes.W_LongBox + format_code = 'l' class ULong(BaseType, Integer): T = rffi.ULONG @@ -273,10 +296,12 @@ class Int64(BaseType, Integer): T = rffi.LONGLONG BoxType = interp_boxes.W_Int64Box + format_code = "q" class UInt64(BaseType, Integer): T = rffi.ULONGLONG BoxType = interp_boxes.W_UInt64Box + format_code = "Q" def _coerce(self, space, w_item): try: @@ -304,6 +329,9 @@ def for_computation(self, v): return float(v) + def default_fromstring(self, space): + return self.box(-1.0) + @simple_binary_op def div(self, v1, v2): try: @@ -403,7 +431,9 @@ class Float32(BaseType, Float): T = rffi.FLOAT BoxType = interp_boxes.W_Float32Box + format_code = "f" class Float64(BaseType, Float): T = rffi.DOUBLE - BoxType = interp_boxes.W_Float64Box \ No newline at end of file + BoxType = interp_boxes.W_Float64Box + format_code = "d" \ No newline at end of file _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit