[pypy-commit] pypy indexing: hg merge default

rlamy Fri, 17 Jul 2015 08:14:35 -0700

Author: Ronan Lamy <ronan.l...@gmail.com>
Branch: indexing
Changeset: r78583:d579a53d3316
Date: 2015-07-17 16:12 +0100
http://bitbucket.org/pypy/pypy/changeset/d579a53d3316/


Log:    hg merge default

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -37,3 +37,9 @@
 .. branch: unicode-dtype
 
 Partial implementation of unicode dtype and unicode scalars.
+
+.. branch: dtypes-compatability
+
+Improve compatibility with numpy dtypes; handle offsets to create unions,
+fix str() and repr(), allow specifying itemsize, metadata and titles, add 
flags,
+allow subclassing dtype
diff --git a/pypy/module/_vmprof/src/getpc.h b/pypy/module/_vmprof/src/getpc.h
--- a/pypy/module/_vmprof/src/getpc.h
+++ b/pypy/module/_vmprof/src/getpc.h
@@ -132,7 +132,7 @@
   }
 };
 
-inline void* GetPC(ucontext_t *signal_ucontext) {
+void* GetPC(ucontext_t *signal_ucontext) {
   // See comment above struct CallUnrollInfo.  Only try instruction
   // flow matching if both eip and esp looks reasonable.
   const int eip = signal_ucontext->uc_mcontext.gregs[REG_EIP];
@@ -168,7 +168,7 @@
 typedef int ucontext_t;
 #endif
 
-inline void* GetPC(ucontext_t *signal_ucontext) {
+void* GetPC(ucontext_t *signal_ucontext) {
   RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n");
   return NULL;
 }
@@ -178,7 +178,7 @@
 // the right value for your system, and add it to the list in
 // configure.ac (or set it manually in your config.h).
 #else
-inline void* GetPC(ucontext_t *signal_ucontext) {
+void* GetPC(ucontext_t *signal_ucontext) {
   return (void*)signal_ucontext->PC_FROM_UCONTEXT;   // defined in config.h
 }
 
diff --git a/pypy/module/_vmprof/src/vmprof.c b/pypy/module/_vmprof/src/vmprof.c
--- a/pypy/module/_vmprof/src/vmprof.c
+++ b/pypy/module/_vmprof/src/vmprof.c
@@ -262,13 +262,31 @@
        int marker = MARKER_TRAILER;
        write(profile_file, &marker, 1);
 
+#ifdef __linux__
     // copy /proc/PID/maps to the end of the profile file
     sprintf(buf, "/proc/%d/maps", getpid());
-    src = fopen(buf, "r");    
+    src = fopen(buf, "r");
+    if (!src) {
+        vmprof_error = "error opening proc maps";
+        return -1;
+    }
     while ((size = fread(buf, 1, BUFSIZ, src))) {
         write(profile_file, buf, size);
     }
     fclose(src);
+#else
+    // freebsd and mac
+    sprintf(buf, "procstat -v %d", getpid());
+    src = popen(buf, "r");
+    if (!src) {
+        vmprof_error = "error calling procstat";
+        return -1;
+    }
+    while ((size = fread(buf, 1, BUFSIZ, src))) {
+        write(profile_file, buf, size);
+    }
+    pclose(src);
+#endif
     close(profile_file);
        return 0;
 }
diff --git a/pypy/module/micronumpy/appbridge.py 
b/pypy/module/micronumpy/appbridge.py
--- a/pypy/module/micronumpy/appbridge.py
+++ b/pypy/module/micronumpy/appbridge.py
@@ -8,6 +8,7 @@
     w__commastring = None
     w_array_repr = None
     w_array_str = None
+    w__usefields = None
 
     def __init__(self, space):
         pass
diff --git a/pypy/module/micronumpy/boxes.py b/pypy/module/micronumpy/boxes.py
--- a/pypy/module/micronumpy/boxes.py
+++ b/pypy/module/micronumpy/boxes.py
@@ -563,7 +563,7 @@
         elif space.isinstance_w(w_item, space.w_int):
             indx = space.int_w(w_item)
             try:
-                item = self.dtype.names[indx]
+                item = self.dtype.names[indx][0]
             except IndexError:
                 if indx < 0:
                     indx += len(self.dtype.names)
@@ -596,7 +596,7 @@
         try:
             ofs, dtype = self.dtype.fields[item]
         except KeyError:
-            raise oefmt(space.w_IndexError, "222only integers, slices (`:`), "
+            raise oefmt(space.w_IndexError, "only integers, slices (`:`), "
                 "ellipsis (`...`), numpy.newaxis (`None`) and integer or "
                 "boolean arrays are valid indices")
         dtype.store(self.arr, self.ofs, ofs,
diff --git a/pypy/module/micronumpy/compile.py 
b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -47,6 +47,9 @@
     def lookup(self, name):
         return self.getdictvalue(self, name)
 
+    def getname(self, space):
+        return self.name
+
 class FakeSpace(ObjSpace):
     w_ValueError = W_TypeObject("ValueError")
     w_TypeError = W_TypeObject("TypeError")
diff --git a/pypy/module/micronumpy/concrete.py 
b/pypy/module/micronumpy/concrete.py
--- a/pypy/module/micronumpy/concrete.py
+++ b/pypy/module/micronumpy/concrete.py
@@ -624,15 +624,17 @@
         self.impl = impl
         self.readonly = readonly
 
-    def getitem(self, item):
-        return raw_storage_getitem(lltype.Char, self.impl.storage, item)
+    def getitem(self, index):
+        return raw_storage_getitem(lltype.Char, self.impl.storage,
+                 index + self.impl.start)
 
-    def setitem(self, item, v):
-        raw_storage_setitem(self.impl.storage, item,
+    def setitem(self, index, v):
+        raw_storage_setitem(self.impl.storage, index + self.impl.start,
                             rffi.cast(lltype.Char, v))
 
     def getlength(self):
-        return self.impl.size
+        return self.impl.size - self.impl.start
 
     def get_raw_address(self):
-        return self.impl.storage
+        from rpython.rtyper.lltypesystem import rffi
+        return rffi.ptradd(self.impl.storage, self.impl.start)
diff --git a/pypy/module/micronumpy/constants.py 
b/pypy/module/micronumpy/constants.py
--- a/pypy/module/micronumpy/constants.py
+++ b/pypy/module/micronumpy/constants.py
@@ -92,6 +92,21 @@
 ARRAY_ELEMENTSTRIDES = 0x0080 # strides  are units of the dtype element size
 ARRAY_NOTSWAPPED  = 0x0200 #native byte order
 
+#dtype flags
+ITEM_REFCOUNT   = 0x01
+ITEM_HASOBJECT  = 0x01
+LIST_PICKLE     = 0x02
+ITEM_IS_POINTER = 0x04
+NEEDS_INIT      = 0x08
+NEEDS_PYAPI     = 0x10
+USE_GETITEM     = 0x20
+USE_SETITEM     = 0x40
+ALIGNED_STRUCT  = 0x80
+FROM_FIELDS     = NEEDS_INIT | LIST_PICKLE | ITEM_REFCOUNT | NEEDS_PYAPI
+OBJECT_DTYPE_FLAGS = (LIST_PICKLE | USE_GETITEM | ITEM_IS_POINTER |
+                        ITEM_REFCOUNT | NEEDS_INIT | NEEDS_PYAPI)
+
+
 LITTLE = '<'
 BIG = '>'
 NATIVE = '='
diff --git a/pypy/module/micronumpy/descriptor.py 
b/pypy/module/micronumpy/descriptor.py
--- a/pypy/module/micronumpy/descriptor.py
+++ b/pypy/module/micronumpy/descriptor.py
@@ -54,11 +54,11 @@
 class W_Dtype(W_Root):
     _immutable_fields_ = [
         "itemtype?", "w_box_type", "byteorder?", "names?", "fields?",
-        "elsize?", "alignment?", "shape?", "subdtype?", "base?"]
+        "elsize?", "alignment?", "shape?", "subdtype?", "base?", "flags?"]
 
     @enforceargs(byteorder=SomeChar())
     def __init__(self, itemtype, w_box_type, byteorder=NPY.NATIVE, names=[],
-                 fields={}, elsize=None, shape=[], subdtype=None):
+                 fields={}, elsize=-1, shape=[], subdtype=None):
         self.itemtype = itemtype
         self.w_box_type = w_box_type
         if itemtype.get_element_size() == 1 or isinstance(itemtype, 
types.ObjectType):
@@ -66,16 +66,21 @@
         self.byteorder = byteorder
         self.names = names
         self.fields = fields
-        if elsize is None:
+        if elsize < 0:
             elsize = itemtype.get_element_size()
         self.elsize = elsize
-        self.alignment = itemtype.alignment
         self.shape = shape
         self.subdtype = subdtype
+        self.flags = 0
+        self.metadata = None
+        if isinstance(itemtype, types.ObjectType):
+            self.flags = NPY.OBJECT_DTYPE_FLAGS
         if not subdtype:
             self.base = self
+            self.alignment = itemtype.get_element_size()
         else:
             self.base = subdtype.base
+            self.alignment = subdtype.itemtype.get_element_size()
 
     @property
     def num(self):
@@ -172,51 +177,149 @@
         return dtype
 
     def get_name(self):
-        name = self.w_box_type.name
-        if name.startswith('numpy.'):
-            name = name[6:]
+        name = self.w_box_type.getname(self.itemtype.space)
         if name.endswith('_'):
             name = name[:-1]
         return name
 
-    def descr_get_name(self, space):
-        name = self.get_name()
+    def descr_get_name(self, space, quote=False):
+        if quote:
+            name = "'" + self.get_name() + "'"
+        else:
+            name = self.get_name()
         if self.is_flexible() and self.elsize != 0:
             return space.wrap(name + str(self.elsize * 8))
         return space.wrap(name)
 
-    def descr_get_str(self, space):
+    def descr_get_str(self, space, ignore='|', simple=True):
+        if not simple and self.fields and len(self.fields) > 0:
+            return self.descr_get_descr(space)
+        total = 0
+        for s in self.shape:
+            total += s
+        if not simple and total > 0:
+            return space.newtuple(
+                [space.wrap(self.subdtype.get_str(ignore='')), 
+                 space.newtuple([space.wrap(s) for s in self.shape]),
+                ])
+        return space.wrap(self.get_str(ignore=ignore))
+
+    def get_str(self, ignore='|'):
         basic = self.kind
         endian = self.byteorder
         size = self.elsize
         if endian == NPY.NATIVE:
             endian = NPY.NATBYTE
+        elif endian == NPY.IGNORE:
+            endian = ignore
         if self.num == NPY.UNICODE:
             size >>= 2
-        return space.wrap("%s%s%s" % (endian, basic, size))
+        return "%s%s%s" % (endian, basic, size)
 
-    def descr_get_descr(self, space):
+    def descr_get_descr(self, space, style='descr', force_dict=False):
+        simple = False
+        if style == 'descr':
+            simple = True
         if not self.is_record():
             return space.newlist([space.newtuple([space.wrap(""),
-                                                  self.descr_get_str(space)])])
+                                                  self.descr_get_str(space, 
simple=simple)])])
+        elif (self.alignment > 1 and not style.startswith('descr')) or 
force_dict:
+            # we need to force a sorting order for the keys,
+            # so return a string instead of a dict. Also, numpy formats
+            # the lists without spaces between elements, so we cannot simply
+            # do str(names)
+            names = ["'names':["]
+            formats = ["'formats':["]
+            offsets = ["'offsets':["]
+            titles = ["'titles':["]
+            use_titles = False
+            show_offsets = False
+            offsets_n = []
+            total = 0
+            for name, title in self.names:
+                offset, subdtype = self.fields[name]
+                if subdtype.is_record():
+                    substr = [space.str_w(space.str(subdtype.descr_get_descr(
+                                                space, 
style='descr_subdtype'))), ","]
+                elif subdtype.subdtype is not None:
+                    substr = ["(", space.str_w(space.str(
+                        subdtype.subdtype.descr_get_descr(space, 
style='descr_subdtype'))),
+                        ', ',
+                        space.str_w(space.repr(space.newtuple([space.wrap(s) 
for s in subdtype.shape]))),
+                        "),"]
+                else:
+                    substr = ["'", subdtype.get_str(ignore=''), "',"]
+                formats += substr
+                offsets += [str(offset),  ',']
+                names += ["'", name, "',"]
+                titles += ["'", str(title), "',"]
+                if title is not None:
+                    use_titles = True
+                if total != offset:
+                    show_offsets = True
+                total += subdtype.elsize
+                # make sure offsets_n is sorted
+                i = 0
+                for i in range(len(offsets_n)):
+                    if offset < offsets_n[i]:
+                        break
+                offsets_n.insert(i, offset)
+            total = 0
+            for i in range(len(offsets_n)):
+                if offsets_n[i] != self.alignment * i:
+                    show_offsets = True
+            if use_titles and not show_offsets: 
+                return self.descr_get_descr(space, style='descr')
+            # replace the last , with a ]
+            formats[-1] = formats[-1][:-1] + ']'
+            offsets[-1] = offsets[-1][:-1] + ']'
+            names[-1] = names[-1][:-1] + ']'
+            titles[-1] = titles[-1][:-1] + ']'
+            if self.alignment < 2 or style.endswith('subdtype'):
+                suffix = "}"
+            elif style == 'str':
+                suffix = ", 'aligned':True}"
+            elif style == 'substr':
+                suffix = '}'
+            else:
+                suffix = "}, align=True"
+            s_as_list = ['{'] + names + [', '] + formats + [', '] + offsets + 
[', ']
+            if use_titles:
+                s_as_list += titles + [', ']
+                    
+            s_as_list += ["'itemsize':", str(self.elsize), suffix]
+            return space.wrap(''.join(s_as_list))
         else:
             descr = []
-            for name in self.names:
-                subdtype = self.fields[name][1]
-                subdescr = [space.wrap(name)]
+            total = 0
+            for name, title in self.names:
+                offset, subdtype = self.fields[name]
+                show_offsets = False
+                if total != offset and len(subdtype.shape) < 1:
+                    # whoops, need to use other format
+                    return self.descr_get_descr(space, style=style + 
'_subdtype', force_dict=True)
+                total += subdtype.elsize
+                ignore = '|'
+                if title:
+                    subdescr = [space.newtuple([space.wrap(title), 
space.wrap(name)])]
+                    ignore = ''
+                else:
+                    subdescr = [space.wrap(name)]
                 if subdtype.is_record():
-                    subdescr.append(subdtype.descr_get_descr(space))
+                    subdescr.append(subdtype.descr_get_descr(space, style))
                 elif subdtype.subdtype is not None:
-                    subdescr.append(subdtype.subdtype.descr_get_str(space))
+                    subdescr.append(subdtype.subdtype.descr_get_str(space, 
simple=False))
                 else:
-                    subdescr.append(subdtype.descr_get_str(space))
+                    subdescr.append(subdtype.descr_get_str(space, 
ignore=ignore, simple=False))
                 if subdtype.shape != []:
                     subdescr.append(subdtype.descr_get_shape(space))
                 descr.append(space.newtuple(subdescr[:]))
+            if self.alignment >= 0 and not style.endswith('subdtype'):
+                return 
space.wrap(space.str_w(space.repr(space.newlist(descr))) + ', align=True')      
           
             return space.newlist(descr)
 
     def descr_get_hasobject(self, space):
-        return space.w_False
+        return space.wrap(self.is_object())
 
     def descr_get_isbuiltin(self, space):
         if self.fields is None:
@@ -238,19 +341,28 @@
     def descr_get_shape(self, space):
         return space.newtuple([space.wrap(dim) for dim in self.shape])
 
+    def descr_get_flags(self, space):
+        return space.wrap(self.flags)
+
     def descr_get_fields(self, space):
         if not self.fields:
             return space.w_None
         w_fields = space.newdict()
-        for name, (offset, subdtype) in self.fields.iteritems():
-            space.setitem(w_fields, space.wrap(name),
+        for name, title in self.names:
+            offset, subdtype = self.fields[name]
+            if title is not None:
+                w_nt = space.newtuple([space.wrap(name), space.wrap(title)]) 
+                space.setitem(w_fields, w_nt,
+                          space.newtuple([subdtype, space.wrap(offset)]))
+            else:
+                space.setitem(w_fields, space.wrap(name),
                           space.newtuple([subdtype, space.wrap(offset)]))
         return w_fields
 
     def descr_get_names(self, space):
         if not self.fields:
             return space.w_None
-        return space.newtuple([space.wrap(name) for name in self.names])
+        return space.newtuple([space.wrap(name[0]) for name in self.names])
 
     def descr_set_names(self, space, w_names):
         if not self.fields:
@@ -262,23 +374,43 @@
                         "with a sequence of length %d",
                         len(self.names))
         names = []
-        for w_name in space.fixedview(w_names):
+        names_w = space.fixedview(w_names)
+        for i in range(len(names_w)):
+            w_name = names_w[i]
+            title = self.names[i][1]
             if not space.isinstance_w(w_name, space.w_str):
                 raise oefmt(space.w_ValueError,
                             "item #%d of names is of type %T and not string",
                             len(names), w_name)
-            names.append(space.str_w(w_name))
+            names.append((space.str_w(w_name), title))
         fields = {}
         for i in range(len(self.names)):
-            if names[i] in fields:
+            if names[i][0] in fields:
                 raise oefmt(space.w_ValueError, "Duplicate field names given.")
-            fields[names[i]] = self.fields[self.names[i]]
+            fields[names[i][0]] = self.fields[self.names[i][0]]
+            if self.names[i][1] is not None:
+                fields[self.names[i][1]] = self.fields[self.names[i][0]]
         self.fields = fields
         self.names = names
 
     def descr_del_names(self, space):
-        raise OperationError(space.w_AttributeError, space.wrap(
-            "Cannot delete dtype names attribute"))
+        raise oefmt(space.w_AttributeError, 
+            "Cannot delete dtype names attribute")
+
+    def descr_get_metadata(self, space):
+        if self.metadata is None:
+            return space.w_None
+        return self.metadata
+
+    def descr_set_metadata(self, space, w_metadata):
+        if w_metadata is None:
+            return
+        if not space.isinstance_w(w_metadata, space.w_dict):
+            raise oefmt(space.w_TypeError, "argument 4 must be dict, not str")
+        self.metadata = w_metadata
+
+    def descr_del_metadata(self, space):
+        self.metadata = None
 
     def eq(self, space, w_other):
         w_other = space.call_function(space.gettypefor(W_Dtype), w_other)
@@ -331,7 +463,8 @@
                 y = intmask((1000003 * y) ^ self.alignment)
             return intmask((1000003 * x) ^ y)
         if self.fields:
-            for name, (offset, subdtype) in self.fields.iteritems():
+            for name in self.fields.keys():
+                offset, subdtype = self.fields[name]
                 assert isinstance(subdtype, W_Dtype)
                 y = intmask(1000003 * (0x345678 ^ compute_hash(name)))
                 y = intmask(1000003 * (y ^ compute_hash(offset)))
@@ -349,7 +482,7 @@
 
     def descr_str(self, space):
         if self.fields:
-            return space.str(self.descr_get_descr(space))
+            return space.str(self.descr_get_descr(space, style='str'))
         elif self.subdtype is not None:
             return space.str(space.newtuple([
                 self.subdtype.descr_get_str(space),
@@ -362,7 +495,7 @@
 
     def descr_repr(self, space):
         if self.fields:
-            r = self.descr_get_descr(space)
+            r = self.descr_get_descr(space, style='repr')
         elif self.subdtype is not None:
             r = space.newtuple([self.subdtype.descr_get_str(space),
                                 self.descr_get_shape(space)])
@@ -375,9 +508,11 @@
                 size = self.elsize
                 if self.num == NPY.UNICODE:
                     size >>= 2
-                r = space.wrap(byteorder + self.char + str(size))
+                r = space.wrap("'" + byteorder + self.char + str(size) + "'")
             else:
-                r = self.descr_get_name(space)
+                r = self.descr_get_name(space, quote=True)
+        if space.isinstance_w(r, space.w_str):
+            return space.wrap("dtype(%s)" % space.str_w(r))
         return space.wrap("dtype(%s)" % space.str_w(space.repr(r)))
 
     def descr_getitem(self, space, w_item):
@@ -389,7 +524,7 @@
         elif space.isinstance_w(w_item, space.w_int):
             indx = space.int_w(w_item)
             try:
-                item = self.names[indx]
+                item,title = self.names[indx]
             except IndexError:
                 raise oefmt(space.w_IndexError,
                     "Field index %d out of range.", indx)
@@ -436,14 +571,17 @@
         values = self.descr_get_fields(space)
         if self.is_flexible():
             w_size = space.wrap(self.elsize)
-            alignment = space.wrap(self.alignment)
+            if self.alignment > 2:
+                w_alignment = space.wrap(self.alignment)
+            else:
+                w_alignment = space.wrap(1)
         else:
             w_size = space.wrap(-1)
-            alignment = space.wrap(-1)
-        flags = space.wrap(0)
+            w_alignment = space.wrap(-1)
+        w_flags = space.wrap(self.flags)
 
         data = space.newtuple([version, space.wrap(endian), subdescr,
-                               names, values, w_size, alignment, flags])
+                               names, values, w_size, w_alignment, w_flags])
         return space.newtuple([w_class, builder_args, data])
 
     def descr_setstate(self, space, w_data):
@@ -465,6 +603,9 @@
         w_fields = space.getitem(w_data, space.wrap(4))
         size = space.int_w(space.getitem(w_data, space.wrap(5)))
         alignment = space.int_w(space.getitem(w_data, space.wrap(6)))
+        if alignment < 2:
+            alignment = -1
+        flags = space.int_w(space.getitem(w_data, space.wrap(7)))
 
         if (w_names == space.w_None) != (w_fields == space.w_None):
             raise oefmt(space.w_ValueError, "inconsistent fields and names in 
Numpy dtype unpickling")
@@ -492,20 +633,21 @@
             self.names = []
             self.fields = {}
             for w_name in space.fixedview(w_names):
+                # XXX what happens if there is a title in the pickled dtype?
                 name = space.str_w(w_name)
                 value = space.getitem(w_fields, w_name)
 
                 dtype = space.getitem(value, space.wrap(0))
+                offset = space.int_w(space.getitem(value, space.wrap(1)))
+                self.names.append((name, None))
                 assert isinstance(dtype, W_Dtype)
-                offset = space.int_w(space.getitem(value, space.wrap(1)))
-
-                self.names.append(name)
                 self.fields[name] = offset, dtype
             self.itemtype = types.RecordType(space)
 
         if self.is_flexible():
             self.elsize = size
             self.alignment = alignment
+        self.flags = flags
 
     @unwrap_spec(new_order=str)
     def descr_newbyteorder(self, space, new_order=NPY.SWAP):
@@ -526,17 +668,24 @@
 
 
 @specialize.arg(2)
-def dtype_from_list(space, w_lst, simple, align=False):
+def dtype_from_list(space, w_lst, simple, alignment, offsets=None, itemsize=0):
     lst_w = space.listview(w_lst)
     fields = {}
-    offset = 0
-    names = []
-    maxalign = 0
+    use_supplied_offsets = True
+    if offsets is None:
+        use_supplied_offsets = False
+        offsets = [0] * len(lst_w)
+    maxalign = alignment 
+    fldnames = [''] * len(lst_w)
+    subdtypes = [None] * len(lst_w)
+    titles = [None] * len(lst_w)
+    total = 0
     for i in range(len(lst_w)):
         w_elem = lst_w[i]
         if simple:
-            subdtype = descr__new__(space, space.gettypefor(W_Dtype), w_elem)
-            fldname = 'f%d' % i
+            subdtype = make_new_dtype(space, space.gettypefor(W_Dtype), w_elem,
+                                    maxalign)
+            fldnames[i] = 'f%d' % i
         else:
             w_shape = space.newtuple([])
             if space.len_w(w_elem) == 3:
@@ -545,52 +694,213 @@
                     w_shape = space.newtuple([w_shape])
             else:
                 w_fldname, w_flddesc = space.fixedview(w_elem, 2)
-            subdtype = descr__new__(space, space.gettypefor(W_Dtype), 
w_flddesc, w_shape=w_shape)
-            fldname = space.str_w(w_fldname)
-            if fldname == '':
-                fldname = 'f%d' % i
-            if fldname in fields:
+            subdtype = make_new_dtype(space, space.gettypefor(W_Dtype),
+                                    w_flddesc, maxalign, w_shape=w_shape)
+            if space.isinstance_w(w_fldname, space.w_tuple):
+                fldlist = space.listview(w_fldname)
+                fldnames[i] = space.str_w(fldlist[0])
+                if space.is_w(fldlist[1], space.w_None):
+                    titles[i] = None
+                else:
+                    titles[i] = space.str_w(fldlist[1])
+                if len(fldlist) != 2:
+                    raise oefmt(space.w_TypeError, "data type not understood")
+            elif space.isinstance_w(w_fldname, space.w_str): 
+                fldnames[i] = space.str_w(w_fldname)
+            else:
+                raise oefmt(space.w_TypeError, "data type not understood")
+            if fldnames[i] == '':
+                fldnames[i] = 'f%d' % i
+        assert isinstance(subdtype, W_Dtype)
+        if alignment >= 0:
+            maxalign = max(subdtype.alignment, maxalign)
+            delta = subdtype.alignment
+            # Set offset to the next power-of-two above delta
+            delta = (delta + maxalign -1) & (-maxalign)
+            if not use_supplied_offsets:
+                if delta > offsets[i]:
+                    for j in range(i):
+                        offsets[j+1] = delta + offsets[j]
+                if  i + 1 < len(offsets) and offsets[i + 1] == 0:
+                    offsets[i + 1] = offsets[i] + max(delta, subdtype.elsize)
+        elif not use_supplied_offsets:
+            if  i + 1 < len(offsets) and offsets[i + 1] == 0:
+                offsets[i+1] = offsets[i] + subdtype.elsize
+        subdtypes[i] = subdtype
+        if use_supplied_offsets:
+            sz = subdtype.elsize
+        else:
+            sz = max(maxalign, subdtype.elsize)
+        if offsets[i] + sz > total:
+            total = offsets[i] + sz
+    # padding?
+    if alignment >= 0 and total % maxalign:
+        total = total // maxalign * maxalign + maxalign
+    names = []
+    for i in range(len(subdtypes)):
+        subdtype = subdtypes[i]
+        assert isinstance(subdtype, W_Dtype)
+        if alignment >=0 and subdtype.is_record():
+            subdtype.alignment = maxalign
+        if fldnames[i] in fields:
+            raise oefmt(space.w_ValueError, "two fields with the same name")
+        if maxalign > 1 and offsets[i] % subdtype.alignment:
+            raise oefmt(space.w_ValueError, "offset %d for NumPy dtype with "
+                    "fields is not divisible by the field alignment %d "
+                    "with align=True", offsets[i], maxalign)
+        fields[fldnames[i]] = offsets[i], subdtype
+        if titles[i] is not None:
+            if titles[i] in fields:
                 raise oefmt(space.w_ValueError, "two fields with the same 
name")
-        assert isinstance(subdtype, W_Dtype)
-        fields[fldname] = (offset, subdtype)
-        offset += subdtype.elsize
-        maxalign = max(subdtype.elsize, maxalign)
-        names.append(fldname)
+            fields[titles[i]] = offsets[i], subdtype
+        names.append((fldnames[i], titles[i]))
+    if itemsize > 1:
+        if total > itemsize:
+            raise oefmt(space.w_ValueError,
+                     "NumPy dtype descriptor requires %d bytes, cannot"
+                     " override to smaller itemsize of %d", total, itemsize)
+        if alignment >= 0 and itemsize % maxalign:
+            raise oefmt(space.w_ValueError,
+                    "NumPy dtype descriptor requires alignment of %d bytes, "
+                    "which is not divisible into the specified itemsize %d",
+                    maxalign, itemsize) 
+        total = itemsize
+    retval = W_Dtype(types.RecordType(space), 
space.gettypefor(boxes.W_VoidBox),
+                   names=names, fields=fields, elsize=total)
+    if alignment >=0:
+        retval.alignment = maxalign
+    else:
+        retval.alignment = -1
+    retval.flags |= NPY.NEEDS_PYAPI
+    return retval
+
+def _get_val_or_none(space, w_dict, key):
+    w_key = space.wrap(key)
+    try:
+        w_val = space.getitem(w_dict, w_key)
+    except OperationError as e:
+        if e.match(space, space.w_KeyError):
+            return None
+        else:
+            raise
+    return w_val
+
+def _get_list_or_none(space, w_dict, key):
+    w_val = _get_val_or_none(space, w_dict, key)
+    if w_val is None:
+        return None
+    if space.isinstance_w(w_val, space.w_set):
+        raise oefmt(space.w_TypeError, "'set' object does not support 
indexing")
+    return space.listview(w_val)
+
+def _usefields(space, w_dict, align):
+    # Only for testing, a shortened version of the real _usefields
+    allfields = []
+    for fname in w_dict.iterkeys().iterator:
+        obj = _get_list_or_none(space, w_dict, fname)
+        num = space.int_w(obj[1])
+        if align:
+            alignment = 0
+        else:
+            alignment = -1
+        format = dtype_from_spec(space, obj[0], alignment=alignment)
+        if len(obj) > 2:
+            title = space.wrap(obj[2])
+        else:
+            title = space.w_None
+        allfields.append((space.wrap(fname), format, num, title))
+    allfields.sort(key=lambda x: x[2])
+    names   = [space.newtuple([x[0], x[3]]) for x in allfields]
+    formats = [x[1] for x in allfields]
+    offsets = [x[2] for x in allfields]
+    aslist = []
     if align:
-        # Set offset to the next power-of-two above offset
-        offset = (offset + maxalign -1) & (-maxalign)
-    return W_Dtype(types.RecordType(space), space.gettypefor(boxes.W_VoidBox),
-                   names=names, fields=fields, elsize=offset)
+        alignment = 0
+    else:
+        alignment = -1
+    for i in range(len(names)):
+        aslist.append(space.newtuple([space.wrap(names[i]), 
space.wrap(formats[i])]))
+    return dtype_from_list(space, space.newlist(aslist), False, alignment, 
offsets=offsets)
+    
+def dtype_from_dict(space, w_dict, alignment):
+    from pypy.objspace.std.dictmultiobject import W_DictMultiObject
+    assert isinstance(w_dict, W_DictMultiObject)
+    names_w = _get_list_or_none(space, w_dict, 'names')
+    formats_w = _get_list_or_none(space, w_dict, 'formats') 
+    offsets_w = _get_list_or_none(space, w_dict, 'offsets')
+    titles_w = _get_list_or_none(space, w_dict, 'titles')
+    metadata_w = _get_val_or_none(space, w_dict, 'metadata')
+    aligned_w = _get_val_or_none(space, w_dict, 'align')
+    itemsize_w = _get_val_or_none(space, w_dict, 'itemsize')
+    if names_w is None or formats_w is None:
+        if we_are_translated():
+            return get_appbridge_cache(space).call_method(space,
+                'numpy.core._internal', '_usefields', Arguments(space, 
+                                [w_dict, space.wrap(alignment >= 0)]))
+        else:
+            return _usefields(space, w_dict, alignment >= 0)
+    n = len(names_w)
+    if (n != len(formats_w) or 
+        (offsets_w is not None and n != len(offsets_w)) or
+        (titles_w is not None and n != len(titles_w))):
+        raise oefmt(space.w_ValueError, "'names', 'formats', 'offsets', and "
+            "'titles' dicct entries must have the same length")
+    if aligned_w is not None:
+        if space.isinstance_w(aligned_w, space.w_bool) and 
space.is_true(aligned_w):
+            if alignment < 0:
+                alignment = 0 
+        else:
+            raise oefmt(space.w_ValueError,
+                    "NumPy dtype descriptor includes 'aligned' entry, "
+                    "but its value is neither True nor False");
+    if offsets_w is None:
+        offsets = None
+    else:
+        offsets = [space.int_w(i) for i in offsets_w]
+    if titles_w is not None:
+        _names_w = []
+        for i in range(min(len(names_w), len(titles_w))):
+            _names_w.append(space.newtuple([names_w[i], titles_w[i]]))
+        names_w = _names_w
+    aslist = []
+    if itemsize_w is None:
+        itemsize = 0
+    else:
+        itemsize = space.int_w(itemsize_w)
+    for i in range(min(len(names_w), len(formats_w))):
+        aslist.append(space.newtuple([names_w[i], formats_w[i]]))
+    retval = dtype_from_list(space, space.newlist(aslist), False, alignment,
+                             offsets=offsets, itemsize=itemsize)
+    if metadata_w is not None:
+        retval.descr_set_metadata(space, metadata_w)
+    retval.flags |= NPY.NEEDS_PYAPI
+    return retval 
 
-
-def dtype_from_dict(space, w_dict):
-    raise OperationError(space.w_NotImplementedError, space.wrap(
-        "dtype from dict"))
-
-
-def dtype_from_spec(space, w_spec):
+def dtype_from_spec(space, w_spec, alignment):
 
     if we_are_translated():
         w_lst = get_appbridge_cache(space).call_method(space,
             'numpy.core._internal', '_commastring', Arguments(space, [w_spec]))
     else:
-        # testing, handle manually
-        if space.eq_w(w_spec, space.wrap('u4,u4,u4')):
-            w_lst = space.newlist([space.wrap('u4')]*3)
-        if space.eq_w(w_spec, space.wrap('u4,u4,u4')):
-            w_lst = space.newlist([space.wrap('u4')]*3)
-        else:
-            raise oefmt(space.w_RuntimeError,
-                    "cannot parse w_spec")
+        # handle only simple cases for testing
+        if space.isinstance_w(w_spec, space.w_str):
+            spec = [s.strip() for s in space.str_w(w_spec).split(',')]
+            w_lst = space.newlist([space.wrap(s) for s in spec]) 
+        elif space.isinstance_w(w_spec, space.w_list):
+            w_lst = w_spec
     if not space.isinstance_w(w_lst, space.w_list) or space.len_w(w_lst) < 1:
         raise oefmt(space.w_RuntimeError,
                     "_commastring is not returning a list with len >= 1")
     if space.len_w(w_lst) == 1:
         return descr__new__(space, space.gettypefor(W_Dtype),
-                            space.getitem(w_lst, space.wrap(0)))
+                            space.getitem(w_lst, space.wrap(0)), 
align=alignment>0)
     else:
-        return dtype_from_list(space, w_lst, True)
-
+        try:
+            return dtype_from_list(space, w_lst, True, alignment)
+        except OperationError as e:
+            if e.match(space, space.w_TypeError):
+                return dtype_from_list(space, w_lst, False, alignment)
+            raise
 
 def _check_for_commastring(s):
     if s[0] in string.digits or s[0] in '<>=|' and s[1] in string.digits:
@@ -608,30 +918,83 @@
             sqbracket -= 1
     return False
 
-@unwrap_spec(align=bool)
-def descr__new__(space, w_subtype, w_dtype, align=False, w_copy=None, 
w_shape=None):
-    # align and w_copy are necessary for pickling
+def _set_metadata_and_copy(space, w_metadata, dtype, copy=False):
     cache = get_dtype_cache(space)
+    assert isinstance(dtype, W_Dtype)
+    if copy or (dtype in cache.builtin_dtypes and w_metadata is not None):
+        dtype = W_Dtype(dtype.itemtype, dtype.w_box_type, dtype.byteorder)
+    if w_metadata is not None:
+        dtype.descr_set_metadata(space, w_metadata)
+    return dtype
 
-    if w_shape is not None and (space.isinstance_w(w_shape, space.w_int) or
-                                space.len_w(w_shape) > 0):
-        subdtype = descr__new__(space, w_subtype, w_dtype, align, w_copy)
+def _get_shape(space, w_shape):
+    if w_shape is None:
+        return None
+    if space.isinstance_w(w_shape, space.w_int):
+        dim = space.int_w(w_shape)
+        if dim == 1:
+            return None
+        return [dim]
+    shape_w = space.fixedview(w_shape)
+    if len(shape_w) < 1:
+        return None
+    elif len(shape_w) == 1 and space.isinstance_w(shape_w[0], space.w_tuple):
+        # (base_dtype, new_dtype) dtype spectification
+        return None
+    shape = []
+    for w_dim in shape_w:
+        try:
+            dim = space.int_w(w_dim)
+        except OperationError as e:
+            if e.match(space, space.w_OverflowError):
+                raise oefmt(space.w_ValueError, "invalid shape in fixed-type 
tuple.")
+            else:
+                raise
+        if dim > int(0x7fffffff):
+            raise oefmt(space.w_ValueError, "invalid shape in fixed-type 
tuple: "
+                      "dimension does not fit into a C int.")
+        elif dim < 0:
+            raise oefmt(space.w_ValueError, "invalid shape in fixed-type 
tuple: "
+                  "dimension smaller than zero.")
+        shape.append(dim)
+    return shape
+
+@unwrap_spec(align=bool, copy=bool)
+def descr__new__(space, w_subtype, w_dtype, align=False, copy=False,
+                 w_shape=None, w_metadata=None):
+    if align:
+        alignment = 0
+    else:
+        alignment = -1
+    return make_new_dtype(space, w_subtype, w_dtype, alignment, copy=copy,
+                          w_shape=w_shape, w_metadata=w_metadata)
+
+def make_new_dtype(space, w_subtype, w_dtype, alignment, copy=False, 
w_shape=None, w_metadata=None):
+    cache = get_dtype_cache(space)
+    shape = _get_shape(space, w_shape)
+    if shape is not None:
+        subdtype = make_new_dtype(space, w_subtype, w_dtype, alignment, copy, 
w_metadata=w_metadata)
         assert isinstance(subdtype, W_Dtype)
-        size = 1
-        if space.isinstance_w(w_shape, space.w_int):
-            w_shape = space.newtuple([w_shape])
-        shape = []
-        for w_dim in space.fixedview(w_shape):
-            dim = space.int_w(w_dim)
-            shape.append(dim)
-            size *= dim
-        if size == 1:
-            return subdtype
+        size = support.product(shape)
         size *= subdtype.elsize
-        return W_Dtype(types.VoidType(space),
-                       space.gettypefor(boxes.W_VoidBox),
-                       shape=shape, subdtype=subdtype, elsize=size)
-
+        if size > int(0x7fffffff):
+            raise oefmt(space.w_ValueError, "invalid shape in fixed-type 
tuple: "
+                  "dtype size in bytes must fit into a C int.")
+        
+        return _set_metadata_and_copy(space, w_metadata,
+               W_Dtype(types.VoidType(space), 
space.gettypefor(boxes.W_VoidBox),
+                       shape=shape, subdtype=subdtype, elsize=size))
+    elif w_shape is not None and not space.isinstance_w(w_shape, space.w_int):
+        spec = space.listview(w_shape)
+        if len(spec) > 0:
+            # this is (base_dtype, new_dtype) so just make it a union by 
setting both
+            # parts' offset to 0
+            try:
+                dtype1 = make_new_dtype(space, w_subtype, w_shape, alignment)
+            except:
+                raise
+            raise oefmt(space.w_NotImplementedError, 
+                "(base_dtype, new_dtype) dtype spectification discouraged, not 
implemented")
     if space.is_none(w_dtype):
         return cache.w_float64dtype
     if space.isinstance_w(w_dtype, w_subtype):
@@ -641,7 +1004,8 @@
     if space.isinstance_w(w_dtype, space.w_str):
         name = space.str_w(w_dtype)
         if _check_for_commastring(name):
-            return dtype_from_spec(space, w_dtype)
+            return _set_metadata_and_copy(space, w_metadata,
+                                dtype_from_spec(space, w_dtype, alignment))
         cname = name[1:] if name[0] == NPY.OPPBYTE else name
         try:
             dtype = cache.dtypes_by_name[cname]
@@ -655,26 +1019,34 @@
             return variable_dtype(space, name)
         raise oefmt(space.w_TypeError, 'data type "%s" not understood', name)
     elif space.isinstance_w(w_dtype, space.w_list):
-        return dtype_from_list(space, w_dtype, False, align=align)
+        return _set_metadata_and_copy( space, w_metadata,
+                        dtype_from_list(space, w_dtype, False, alignment), 
copy)
     elif space.isinstance_w(w_dtype, space.w_tuple):
         w_dtype0 = space.getitem(w_dtype, space.wrap(0))
         w_dtype1 = space.getitem(w_dtype, space.wrap(1))
-        subdtype = descr__new__(space, w_subtype, w_dtype0, align, w_copy)
+        subdtype = make_new_dtype(space, w_subtype, w_dtype0, alignment, copy)
         assert isinstance(subdtype, W_Dtype)
         if subdtype.elsize == 0:
             name = "%s%d" % (subdtype.kind, space.int_w(w_dtype1))
-            return descr__new__(space, w_subtype, space.wrap(name), align, 
w_copy)
-        return descr__new__(space, w_subtype, w_dtype0, align, w_copy, 
w_shape=w_dtype1)
+            retval = make_new_dtype(space, w_subtype, space.wrap(name), 
alignment, copy)
+        else:
+            retval = make_new_dtype(space, w_subtype, w_dtype0, alignment, 
copy, w_shape=w_dtype1)
+        return _set_metadata_and_copy(space, w_metadata, retval, copy)
     elif space.isinstance_w(w_dtype, space.w_dict):
-        return dtype_from_dict(space, w_dtype)
+        return _set_metadata_and_copy(space, w_metadata,
+                dtype_from_dict(space, w_dtype, alignment), copy)
     for dtype in cache.builtin_dtypes:
         if dtype.num in cache.alternate_constructors and \
                 w_dtype in cache.alternate_constructors[dtype.num]:
-            return dtype
+            return _set_metadata_and_copy(space, w_metadata, dtype, copy)
         if w_dtype is dtype.w_box_type:
-            return dtype
+            return _set_metadata_and_copy(space, w_metadata, dtype, copy)
+        if space.isinstance_w(w_dtype, space.w_type) and \
+           space.is_true(space.issubtype(w_dtype, dtype.w_box_type)):
+            return _set_metadata_and_copy( space, w_metadata,
+                            W_Dtype(dtype.itemtype, w_dtype, elsize=0), copy)
     if space.isinstance_w(w_dtype, space.w_type):
-        return cache.w_objectdtype
+        return _set_metadata_and_copy(space, w_metadata, cache.w_objectdtype, 
copy)
     raise oefmt(space.w_TypeError, "data type not understood")
 
 
@@ -702,6 +1074,11 @@
     names = GetSetProperty(W_Dtype.descr_get_names,
                            W_Dtype.descr_set_names,
                            W_Dtype.descr_del_names),
+    metadata = GetSetProperty(W_Dtype.descr_get_metadata,
+                           #W_Dtype.descr_set_metadata,
+                           #W_Dtype.descr_del_metadata,
+                            ),
+    flags = GetSetProperty(W_Dtype.descr_get_flags),
 
     __eq__ = interp2app(W_Dtype.descr_eq),
     __ne__ = interp2app(W_Dtype.descr_ne),
diff --git a/pypy/module/micronumpy/ndarray.py 
b/pypy/module/micronumpy/ndarray.py
--- a/pypy/module/micronumpy/ndarray.py
+++ b/pypy/module/micronumpy/ndarray.py
@@ -258,7 +258,7 @@
         if field not in dtype.fields:
             raise oefmt(space.w_ValueError, "field named %s not found", field)
         arr = self.implementation
-        ofs, subdtype = arr.dtype.fields[field]
+        ofs, subdtype = arr.dtype.fields[field][:2]
         # ofs only changes start
         # create a view of the original array by extending
         # the shape, strides, backstrides of the array
diff --git a/pypy/module/micronumpy/test/test_dtypes.py 
b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -92,6 +92,7 @@
         assert d == np.dtype('i8')
         assert d.shape == ()
         d = np.dtype((np.int64, 1,))
+        assert d.shape == ()
         assert d == np.dtype('i8')
         assert d.shape == ()
         d = np.dtype((np.int64, 4))
@@ -111,6 +112,7 @@
         assert "int8" == dtype("int8")
         raises(TypeError, lambda: dtype("int8") == 3)
         assert dtype(bool) == bool
+        assert dtype('f8') != dtype(('f8', (1,)))
 
     def test_dtype_cmp(self):
         from numpy import dtype
@@ -342,10 +344,10 @@
         raises(TypeError, type, "Foo", (dtype,), {})
 
     def test_can_subclass(self):
-        import numpy
-        class xyz(numpy.void):
+        import numpy as np
+        class xyz(np.void):
             pass
-        assert True
+        assert np.dtype(xyz).name == 'xyz'
 
     def test_index(self):
         import numpy as np
@@ -413,7 +415,7 @@
         assert loads(dumps(a.dtype)) == a.dtype
         assert np.dtype('bool').__reduce__() == (dtype, ('b1', 0, 1), (3, '|', 
None, None, None, -1, -1, 0))
         assert np.dtype('|V16').__reduce__() == (dtype, ('V16', 0, 1), (3, 
'|', None, None, None, 16, 1, 0))
-        assert np.dtype(('<f8', 2)).__reduce__() == (dtype, ('V16', 0, 1), (3, 
'|', (dtype('float64'), (2,)), None, None, 16, 1, 0))
+        assert np.dtype(('<f8', 2)).__reduce__() == (dtype, ('V16', 0, 1), (3, 
'|', (dtype('float64'), (2,)), None, None, 16, 8, 0))
 
     def test_newbyteorder(self):
         import numpy as np
@@ -480,13 +482,21 @@
         class O(object):
             pass
         for o in [object, O]:
-            print np.dtype(o).byteorder
             if self.ptr_size == 4:
                 assert np.dtype(o).str == '|O4'
             elif self.ptr_size == 8:
                 assert np.dtype(o).str == '|O8'
             else:
                 assert False,'self._ptr_size unknown'
+        # Issue gh-2798
+        if '__pypy__' in sys.builtin_module_names:
+            a = np.array(['a'], dtype="O")
+            raises(NotImplementedError, a.astype, ("O", [("name", "O")]))
+            skip("(base_dtype, new_dtype) dtype specification discouraged")
+        a = np.array(['a'], dtype="O").astype(("O", [("name", "O")]))
+        assert a[0] == 'a'
+        assert a == 'a'
+        assert a['name'].dtype == a.dtype
 
 class AppTestTypes(BaseAppTestDtypes):
     def test_abstract_types(self):
@@ -686,16 +696,8 @@
                                       numpy.integer, numpy.number,
                                       numpy.generic, object]
         import sys
-        if '__pypy__' not in sys.builtin_module_names:
-            # These tests pass "by chance" on numpy, things that are larger 
than
-            # platform long (i.e. a python int), don't get put in a normal box,
-            # instead they become an object array containing a long, we don't 
have
-            # yet, so these can't pass.
-            assert numpy.uint64(9223372036854775808) == 9223372036854775808
-            assert numpy.uint64(18446744073709551615) == 18446744073709551615
-        else:
-            raises(OverflowError, numpy.int64, 9223372036854775808)
-            raises(OverflowError, numpy.int64, 18446744073709551615)
+        raises(OverflowError, numpy.int64, 9223372036854775808)
+        raises(OverflowError, numpy.int64, 18446744073709551615)
         raises(OverflowError, numpy.uint64, 18446744073709551616)
         assert numpy.uint64((2<<63) - 1) == (2<<63) - 1
 
@@ -1136,10 +1138,7 @@
         import sys
         d = {'names': ['r','g','b','a'],
              'formats': [np.uint8, np.uint8, np.uint8, np.uint8]}
-        if '__pypy__' not in sys.builtin_module_names:
-            dt = np.dtype(d)
-        else:
-            raises(NotImplementedError, np.dtype, d)
+        dt = np.dtype(d)
 
     def test_create_subarrays(self):
         from numpy import dtype
@@ -1272,7 +1271,7 @@
                      ('x', 'y', 'z', 'value'),
                      {'y': (dtype('int32'), 4), 'x': (dtype('int32'), 0),
                       'z': (dtype('int32'), 8), 'value': (dtype('float64'), 
12),
-                      }, 20, 1, 0))
+                      }, 20, 1, 16))
 
         new_d = loads(dumps(d))
 
@@ -1291,6 +1290,182 @@
 
         assert new_d.itemsize == d.itemsize == 76
 
+    def test_shape_invalid(self):
+        import numpy as np
+        # Check that the shape is valid.
+        max_int = 2 ** (8 * 4 - 1)
+        max_intp = 2 ** (8 * np.dtype('intp').itemsize - 1) - 1
+        # Too large values (the datatype is part of this)
+        raises(ValueError, np.dtype, [('a', 'f4', max_int // 4 + 1)])
+        raises(ValueError, np.dtype, [('a', 'f4', max_int + 1)])
+        raises(ValueError, np.dtype, [('a', 'f4', (max_int, 2))])
+        # Takes a different code path (fails earlier:
+        raises(ValueError, np.dtype, [('a', 'f4', max_intp + 1)])
+        # Negative values
+        raises(ValueError, np.dtype, [('a', 'f4', -1)])
+        raises(ValueError, np.dtype, [('a', 'f4', (-1, -1))])
+
+    def test_aligned_size(self):
+        import numpy as np
+        # Check that structured dtypes get padded to an aligned size
+        dt = np.dtype('i4, i1', align=True)
+        assert dt.itemsize == 8
+        dt = np.dtype([('f0', 'i4'), ('f1', 'i1')], align=True)
+        assert dt.itemsize == 8
+        dt = np.dtype({'names':['f0', 'f1'],
+                       'formats':['i4', 'u1'],
+                       'offsets':[0, 4]}, align=True)
+        assert dt.itemsize == 8
+        dt = np.dtype({'f0': ('i4', 0), 'f1':('u1', 4)}, align=True)
+        assert dt.itemsize == 8
+        assert dt.alignment == 4
+        assert str(dt) == "{'names':['f0','f1'], 'formats':['<i4','u1'], 
'offsets':[0,4], 'itemsize':8, 'aligned':True}"
+        dt = np.dtype([('f1', 'u1'), ('f0', 'i4')], align=True)
+        assert str(dt) == "{'names':['f1','f0'], 'formats':['u1','<i4'], 
'offsets':[0,4], 'itemsize':8, 'aligned':True}"
+        # Nesting should preserve that alignment
+        dt1 = np.dtype([('f0', 'i4'),
+                       ('f1', [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')]),
+                       ('f2', 'i1')], align=True)
+        assert dt1.alignment == 4
+        assert dt1['f1'].itemsize == 12
+        assert dt1.itemsize == 20
+        dt2 = np.dtype({'names':['f0', 'f1', 'f2'],
+                       'formats':['i4',
+                                  [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')],
+                                  'i1'],
+                       'offsets':[0, 4, 16]}, align=True)
+        assert dt2.itemsize == 20
+        dt3 = np.dtype({'f0': ('i4', 0),
+                       'f1': ([('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], 4),
+                       'f2': ('i1', 16)}, align=True)
+        assert dt3.itemsize == 20
+        assert dt1 == dt2
+        answer = "{'names':['f0','f1','f2'], " + \
+                    "'formats':['<i4',{'names':['f1','f2','f3'], " + \
+                                      "'formats':['i1','<i4','i1'], " + \
+                                      "'offsets':[0,4,8], 'itemsize':12}," + \
+                                 "'i1'], " + \
+                    "'offsets':[0,4,16], 'itemsize':20, 'aligned':True}"
+        assert str(dt3) == answer
+        assert dt2 == dt3
+        # Nesting should preserve packing
+        dt1 = np.dtype([('f0', 'i4'),
+                       ('f1', [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')]),
+                       ('f2', 'i1')], align=False)
+        assert dt1.itemsize == 11
+        dt2 = np.dtype({'names':['f0', 'f1', 'f2'],
+                       'formats':['i4',
+                                  [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')],
+                                  'i1'],
+                       'offsets':[0, 4, 10]}, align=False)
+        assert dt2.itemsize == 11
+        dt3 = np.dtype({'f0': ('i4', 0),
+                       'f1': ([('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], 4),
+                       'f2': ('i1', 10)}, align=False)
+        assert dt3.itemsize == 11
+        assert dt1 == dt2
+        assert dt2 == dt3
+
+    def test_bad_param(self):
+        import numpy as np
+        # Can't give a size that's too small
+        raises(ValueError, np.dtype,
+                        {'names':['f0', 'f1'],
+                         'formats':['i4', 'i1'],
+                         'offsets':[0, 4],
+                         'itemsize':4})
+        # If alignment is enabled, the alignment (4) must divide the itemsize
+        raises(ValueError, np.dtype,
+                        {'names':['f0', 'f1'],
+                         'formats':['i4', 'i1'],
+                         'offsets':[0, 4],
+                         'itemsize':9}, align=True)
+        # If alignment is enabled, the individual fields must be aligned
+        raises(ValueError, np.dtype,
+                        {'names':['f0', 'f1'],
+                         'formats':['i1', 'f4'],
+                         'offsets':[0, 2]}, align=True)
+        dt = np.dtype(np.double)
+        attr = ["subdtype", "descr", "str", "name", "base", "shape",
+                "isbuiltin", "isnative", "isalignedstruct", "fields",
+                "metadata", "hasobject"]
+        for s in attr:
+            raises(AttributeError, delattr, dt, s)
+
+        raises(TypeError, np.dtype,
+            dict(names=set(['A', 'B']), formats=['f8', 'i4']))
+        raises(TypeError, np.dtype,
+            dict(names=['A', 'B'], formats=set(['f8', 'i4'])))
+
+    def test_complex_dtype_repr(self):
+        import numpy as np
+        dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)),
+                                ('rtile', '>f4', (64, 36))], (3,)),
+                       ('bottom', [('bleft', ('>f4', (8, 64)), (1,)),
+                                   ('bright', '>f4', (8, 36))])])
+        assert repr(dt) == (
+                     "dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), "
+                     "('rtile', '>f4', (64, 36))], (3,)), "
+                     "('bottom', [('bleft', ('>f4', (8, 64)), (1,)), "
+                     "('bright', '>f4', (8, 36))])])")
+
+        # If the sticky aligned flag is set to True, it makes the
+        # str() function use a dict representation with an 'aligned' flag
+        dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)),
+                                ('rtile', '>f4', (64, 36))],
+                                (3,)),
+                       ('bottom', [('bleft', ('>f4', (8, 64)), (1,)),
+                                   ('bright', '>f4', (8, 36))])],
+                       align=True)
+        assert str(dt) == (
+                    "{'names':['top','bottom'], "
+                     "'formats':[([('tiles', ('>f4', (64, 64)), (1,)), "
+                                  "('rtile', '>f4', (64, 36))], (3,)),"
+                                 "[('bleft', ('>f4', (8, 64)), (1,)), "
+                                  "('bright', '>f4', (8, 36))]], "
+                     "'offsets':[0,76800], "
+                     "'itemsize':80000, "
+                     "'aligned':True}")
+
+        dt = np.dtype({'names': ['r', 'g', 'b'], 'formats': ['u1', 'u1', 'u1'],
+                        'offsets': [0, 1, 2],
+                        'titles': ['Red pixel', 'Green pixel', 'Blue pixel']},
+                        align=True)
+        assert repr(dt) == (
+                    "dtype([(('Red pixel', 'r'), 'u1'), "
+                    "(('Green pixel', 'g'), 'u1'), "
+                    "(('Blue pixel', 'b'), 'u1')], align=True)")
+
+        dt = np.dtype({'names': ['rgba', 'r', 'g', 'b'],
+                       'formats': ['<u4', 'u1', 'u1', 'u1'],
+                       'offsets': [0, 0, 1, 2],
+                       'titles': ['Color', 'Red pixel',
+                                  'Green pixel', 'Blue pixel']}, align=True)
+        assert repr(dt) == (
+                    "dtype({'names':['rgba','r','g','b'],"
+                    " 'formats':['<u4','u1','u1','u1'],"
+                    " 'offsets':[0,0,1,2],"
+                    " 'titles':['Color','Red pixel',"
+                              "'Green pixel','Blue pixel'],"
+                    " 'itemsize':4}, align=True)")
+
+        dt = np.dtype({'names': ['r', 'b'], 'formats': ['u1', 'u1'],
+                        'offsets': [0, 2],
+                        'titles': ['Red pixel', 'Blue pixel'],
+                        'itemsize': 4})
+        assert repr(dt) == (
+                    "dtype({'names':['r','b'], "
+                    "'formats':['u1','u1'], "
+                    "'offsets':[0,2], "
+                    "'titles':['Red pixel','Blue pixel'], "
+                    "'itemsize':4})")
+        if 'datetime64' not in dir(np):
+            skip('datetime dtype not available')
+        dt = np.dtype([('a', '<M8[D]'), ('b', '<m8[us]')])
+        assert repr(dt) == (
+                    "dtype([('a', '<M8[D]'), ('b', '<m8[us]')])")
+
+
 class AppTestNotDirect(BaseNumpyAppTest):
     def setup_class(cls):
         BaseNumpyAppTest.setup_class.im_func(cls)
@@ -1332,4 +1507,45 @@
         assert a[0] == 1
         assert (a + a)[1] == 4
 
+class AppTestMonsterType(BaseNumpyAppTest):
+    """Test deeply nested subtypes."""
+    def test1(self):
+        import numpy as np
+        simple1 = np.dtype({'names': ['r', 'b'], 'formats': ['u1', 'u1'],
+            'titles': ['Red pixel', 'Blue pixel']})
+        a = np.dtype([('yo', int), ('ye', simple1),
+            ('yi', np.dtype((int, (3, 2))))])
+        b = np.dtype([('yo', int), ('ye', simple1),
+            ('yi', np.dtype((int, (3, 2))))])
+        assert a == b
 
+        c = np.dtype([('yo', int), ('ye', simple1),
+            ('yi', np.dtype((a, (3, 2))))])
+        d = np.dtype([('yo', int), ('ye', simple1),
+            ('yi', np.dtype((a, (3, 2))))])
+        assert c == d
+
+
+class AppTestMetadata(BaseNumpyAppTest):
+    def test_no_metadata(self):
+        import numpy as np
+        d = np.dtype(int)
+        assert d.metadata is None
+
+    def test_metadata_takes_dict(self):
+        import numpy as np
+        d = np.dtype(int, metadata={'datum': 1})
+        assert d.metadata == {'datum': 1}
+
+    def test_metadata_rejects_nondict(self):
+        import numpy as np
+        raises(TypeError, np.dtype, int, metadata='datum')
+        raises(TypeError, np.dtype, int, metadata=1)
+        raises(TypeError, np.dtype, int, metadata=None)
+
+    def test_nested_metadata(self):
+        import numpy as np
+        d = np.dtype([('a', np.dtype(int, metadata={'datum': 1}))])
+        assert d['a'].metadata == {'datum': 1}
+
+
diff --git a/pypy/module/micronumpy/test/test_ndarray.py 
b/pypy/module/micronumpy/test/test_ndarray.py
--- a/pypy/module/micronumpy/test/test_ndarray.py
+++ b/pypy/module/micronumpy/test/test_ndarray.py
@@ -764,6 +764,8 @@
         assert (a[1:] == b).all()
         assert (a[1:,newaxis] == d).all()
         assert (a[newaxis,1:] == c).all()
+        assert a.strides == (8,)
+        assert a[:, newaxis].strides == (8, 0)
 
     def test_newaxis_assign(self):
         from numpy import array, newaxis
@@ -2345,6 +2347,7 @@
         assert a[1] == 0xff
         assert len(a.data) == 16
         assert type(a.data) is buffer
+        assert a[1:].data._pypy_raw_address() - a.data._pypy_raw_address() == 
a.strides[0]
 
     def test_explicit_dtype_conversion(self):
         from numpy import array
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -2396,7 +2396,7 @@
         dt = item.arr.dtype
         ret_unwrapped = []
         for name in dt.names:
-            ofs, dtype = dt.fields[name]
+            ofs, dtype = dt.fields[name[0]]
             # XXX: code duplication with W_VoidBox.descr_getitem()
             if isinstance(dtype.itemtype, VoidType):
                 read_val = dtype.itemtype.readarray(item.arr, ofs, 0, dtype)
@@ -2431,14 +2431,14 @@
                 return w_item
             else:
                 # match up the field names
-                items_w = [None] * len(dtype.fields)
-                for i in range(len(dtype.fields)):
+                items_w = [None] * len(dtype.names)
+                for i in range(len(dtype.names)):
                     name = dtype.names[i]
                     if name in w_item.dtype.names:
-                        items_w[i] = w_item.descr_getitem(space, 
space.wrap(name))
+                        items_w[i] = w_item.descr_getitem(space, 
space.wrap(name[0]))
         elif w_item is not None:
             if space.isinstance_w(w_item, space.w_tuple):
-                if len(dtype.fields) != space.len_w(w_item):
+                if len(dtype.names) != space.len_w(w_item):
                     raise OperationError(space.w_ValueError, space.wrap(
                         "size of tuple must match number of fields."))
                 items_w = space.fixedview(w_item)
@@ -2449,12 +2449,12 @@
                             "expected a readable buffer object")
             else:
                 # XXX support initializing from readable buffers
-                items_w = [w_item] * len(dtype.fields)
+                items_w = [w_item] * len(dtype.names)
         else:
             items_w = [None] * len(dtype.fields)
         arr = VoidBoxStorage(dtype.elsize, dtype)
-        for i in range(len(dtype.fields)):
-            ofs, subdtype = dtype.fields[dtype.names[i]]
+        for i in range(len(dtype.names)):
+            ofs, subdtype = dtype.fields[dtype.names[i][0]]
             try:
                 w_box = subdtype.coerce(space, items_w[i])
             except IndexError:
@@ -2492,7 +2492,7 @@
         items = []
         dtype = box.dtype
         for name in dtype.names:
-            ofs, subdtype = dtype.fields[name]
+            ofs, subdtype = dtype.fields[name[0]]
             subbox = subdtype.read(box.arr, box.ofs, ofs)
             items.append(subdtype.itemtype.to_builtin_type(space, subbox))
         return space.newtuple(items)
@@ -2503,7 +2503,7 @@
         pieces = ["("]
         first = True
         for name in box.dtype.names:
-            ofs, subdtype = box.dtype.fields[name]
+            ofs, subdtype = box.dtype.fields[name[0]]
             if first:
                 first = False
             else:
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -72,6 +72,10 @@
     def get_module(self):
         return w_some_obj()
 
+
+    def getname(self, space):
+        return self.name
+
 def w_some_obj():
     if NonConstant(False):
         return W_Root()
diff --git a/pypy/objspace/std/test/test_typeobject.py 
b/pypy/objspace/std/test/test_typeobject.py
--- a/pypy/objspace/std/test/test_typeobject.py
+++ b/pypy/objspace/std/test/test_typeobject.py
@@ -1031,6 +1031,48 @@
         A.__dict__['x'] = 5
         assert A.x == 5
 
+    def test_we_already_got_one_1(self):
+        # Issue #2079: highly obscure: CPython complains if we say
+        # ``__slots__="__dict__"`` and there is already a __dict__...
+        # but from the "best base" only.  If the __dict__ comes from
+        # another base, it doesn't complain.  Shrug and copy the logic.
+        class A(object):
+            __slots__ = ()
+        class B(object):
+            pass
+        class C(A, B):     # "best base" is A
+            __slots__ = ("__dict__",)
+        class D(A, B):     # "best base" is A
+            __slots__ = ("__weakref__",)
+        try:
+            class E(B, A):   # "best base" is B
+                __slots__ = ("__dict__",)
+        except TypeError, e:
+            assert 'we already got one' in str(e)
+        else:
+            raise AssertionError("TypeError not raised")
+        try:
+            class F(B, A):   # "best base" is B
+                __slots__ = ("__weakref__",)
+        except TypeError, e:
+            assert 'we already got one' in str(e)
+        else:
+            raise AssertionError("TypeError not raised")
+
+    def test_we_already_got_one_2(self):
+        class A(object):
+            __slots__ = ()
+        class B:
+            pass
+        class C(A, B):     # "best base" is A
+            __slots__ = ("__dict__",)
+        class D(A, B):     # "best base" is A
+            __slots__ = ("__weakref__",)
+        class C(B, A):     # "best base" is A
+            __slots__ = ("__dict__",)
+        class D(B, A):     # "best base" is A
+            __slots__ = ("__weakref__",)
+
 
 class AppTestWithMethodCacheCounter:
     spaceconfig = {"objspace.std.withmethodcachecounter": True}
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -1022,7 +1022,7 @@
     w_self.nslots = w_bestbase.nslots
     return hasoldstylebase
 
-def create_all_slots(w_self, hasoldstylebase):
+def create_all_slots(w_self, hasoldstylebase, w_bestbase):
     space = w_self.space
     dict_w = w_self.dict_w
     if '__slots__' not in dict_w:
@@ -1040,12 +1040,12 @@
         for w_slot_name in slot_names_w:
             slot_name = space.str_w(w_slot_name)
             if slot_name == '__dict__':
-                if wantdict or w_self.hasdict:
+                if wantdict or w_bestbase.hasdict:
                     raise oefmt(space.w_TypeError,
                                 "__dict__ slot disallowed: we already got one")
                 wantdict = True
             elif slot_name == '__weakref__':
-                if wantweakref or w_self.weakrefable:
+                if wantweakref or w_bestbase.weakrefable:
                     raise oefmt(space.w_TypeError,
                                 "__weakref__ slot disallowed: we already got 
one")
                 wantweakref = True
@@ -1106,7 +1106,7 @@
         w_self.flag_abstract |= w_base.flag_abstract
 
     hasoldstylebase = copy_flags_from_bases(w_self, w_bestbase)
-    create_all_slots(w_self, hasoldstylebase)
+    create_all_slots(w_self, hasoldstylebase, w_bestbase)
 
     ensure_common_attributes(w_self)
 
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py 
b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -8637,5 +8637,27 @@
         """
         self.optimize_loop(ops, expected, preamble)
 
+    def test_getfield_proven_constant(self):
+        py.test.skip("not working")
+        ops = """
+        [p0]
+        i1 = getfield_gc(p0, descr=valuedescr)
+        guard_value(i1, 13) []
+        escape(i1)
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        escape(13)
+        jump(p0)
+        """
+        expected_short = """
+        [p0]
+        i1 = getfield_gc(p0, descr=valuedescr)
+        guard_value(i1, 13) []
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected, expected_short=expected_short)
+
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
     pass
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy indexing: hg merge default

Reply via email to