Hello List,

I'm writing a scientific application, that could make good use of
Dimension Scales. Unfortunately, it is not possible to create those
with PyTables. I found some old code in the subversion repository,
but it was never included in the main trunk, I guess simply because
it was not good enough.

I sat down and wrote a new version from scratch, following a completely
different approach. Instead of using the high-level routines of the
HDF5 library, I'm building the necessary attributes myself. Unfortunately,
dimension scales use much advanced features which pytables does not
support, especially References. So I wrote some code that adds the needed
functionality to tables. I first tried to be very generic, but soon had to find
out that the necessary features are so specific, that in order to make
it generic
one would have to write an entire new library.

So I wrote it as generic as I could without adding too much code, while
making dimension scales possible. So I added the capability to make
attributes that contain lists of lists, and some weird kind of tables, which
is what is needed. I'm using python lists, not numpy arrays, firstly because
numpy arrays are not so efficient to store References (which I implemented
as python objects), and secondly this way I can avoid unwanted
incompatibilities with existing code.

I wrote a little test program that illustrates the added functionality:

------------ test.py ----------------------
from __future__ import with_statement
import tables
from tables import Reference # that's the class to make references that I added
from numpy import array

with tables.openFile("test.h5", mode="w", title="test") as file:
    group = file.createGroup("/", "G", "group")

    dataA = file.createArray(group, "A", array([1,2,3]), "data")
    dataB = file.createArray(group, "B", array([4,5,6]), "data")
    dataC = file.createArray(group, "C", array([7,8,9]), "data")
    file.flush()

    # that's how references work:
    ref = tables.Reference(dataA) # make a reference to dataA...
    dataB.attrs.dataA = ref
    print ref, ref.deref() # ... and get the referenced object back
    l = [ ref ]
    ref = tables.Reference(dataB)
    l.append(ref)
    dataC.attrs.datas = l

    # two attributes that contain lists of lists:
    dataC.attrs.list = [[1, 2, 3], [4, 5], [ ]]
    dataC.attrs.reflist = [l, [ref, ref], [ ] ]

    # and that special kind of tables used for dimension scales:
    dataC.attrs.compound = [("a", "b", "c"), (1, 2.25, ref), (2, 3.5, l[0])]

    dataC.attrs.co = [("a", "b", "c"), (1, 2.25, ref), (2, 3.5, l[0])]
    dataC.attrs.co = "sdfa"
    dataC.attrs.co = [("a", "b", "c"), (1, 2.25, ref), (2, 3.5, l[0])]

# now see that everything can be read again
with tables.openFile("test.h5", mode="r") as file:
    r = file.root
    ref = r.G.B.attrs.dataA
    print ref, ref.deref()
    print r.G.C.attrs.datas
    l = r.G.C.attrs.datas
    print l[0], l[0].deref()
    print l[1], l[1].deref()
    print r.G.C.attrs.list
    print r.G.C.attrs.reflist
    print r.G.C.attrs.compound
    print r.G.C.attrs.compound[1][2].deref()
-------------------- end -------------

And last, but certainly not least, the unidiff that contains all the
changes, at the end of this email.

Greetings

Martin

------------------ snip ----------------------
Index: tables/__init__.py
===================================================================
--- tables/__init__.py  (revision 4262)
+++ tables/__init__.py  (working copy)
@@ -64,6 +64,7 @@

 from tables.utilsExtension import (
     isHDF5File, isPyTablesFile, whichLibVersion, lrange )
+from tables.hdf5Extension import Reference

 from tables.misc.enum import Enum
 from tables.atom import *
Index: tables/hdf5Extension.pyx
===================================================================
--- tables/hdf5Extension.pyx    (revision 4262)
+++ tables/hdf5Extension.pyx    (working copy)
@@ -19,6 +19,7 @@
     Group
     Array
     VLArray
+    Reference
     UnImplemented

 Functions:
@@ -66,7 +67,7 @@
      time_t, size_t, uintptr_t, hid_t, herr_t, hsize_t, hvl_t, \
      H5S_seloper_t, H5D_FILL_VALUE_UNDEFINED, \
      H5G_GROUP, H5G_DATASET, H5G_stat_t, \
-     H5T_class_t, H5T_sign_t, H5T_NATIVE_INT, \
+     H5T_class_t, H5T_sign_t, H5T_NATIVE_INT, H5T_NATIVE_DOUBLE,\
      H5F_SCOPE_GLOBAL, H5F_ACC_TRUNC, H5F_ACC_RDONLY, H5F_ACC_RDWR, \
      H5P_DEFAULT, H5T_SGN_NONE, H5T_SGN_2, H5T_DIR_DEFAULT, \
      H5S_SELECT_SET, H5S_SELECT_AND, H5S_SELECT_NOTB, \
@@ -75,11 +76,13 @@
      H5Gcreate, H5Gopen, H5Gclose, H5Glink, H5Gunlink, H5Gmove, \
      H5Gmove2, H5Gget_objinfo, \
      H5Dopen, H5Dclose, H5Dread, H5Dwrite, H5Dget_type, \
-     H5Dget_space, H5Dvlen_reclaim, \
-     H5Tget_native_type, H5Tget_super, H5Tget_class, H5Tcopy, \
-     H5Tclose, H5Tis_variable_str, H5Tget_sign, \
+     H5Dget_space, H5Dvlen_reclaim, H5Tcreate, H5Tvlen_create,\
+     H5Tget_native_type, H5Tget_super, H5Tget_class, H5Tcopy, H5Tinsert, \
+     H5Tclose, H5Tis_variable_str, H5Tget_sign, H5Tget_member_name, \
+     H5Tget_member_offset, H5Tget_member_type, H5Tget_nmembers, H5Tget_size, \
+     H5Acreate, H5Aget_space, H5Aopen_name, \
      H5Adelete, H5Aget_num_attrs, H5Aget_name, H5Aopen_idx, \
-     H5Aread, H5Aclose, H5Pcreate, H5Pclose, \
+     H5Aread, H5Awrite, H5Aclose, H5Pcreate, H5Pclose, \
      H5Pset_cache, H5Pset_sieve_buf_size, H5Pset_fapl_log, \
      H5Sselect_all, H5Sselect_elements, H5Sselect_hyperslab, \
      H5Screate_simple, H5Sget_simple_extent_ndims, \
@@ -90,7 +93,9 @@
      H5ARRAYget_ndims, H5ARRAYget_info, \
      set_cache_size, get_objinfo, Giterate, Aiterate, H5UIget_info, \
      get_len_of_range, get_order, set_order, is_complex, \
-     conv_float64_timeval32, truncate_dset
+     conv_float64_timeval32, truncate_dset, H5Rcreate, \
+     H5R_OBJECT, H5T_STD_REF_OBJ, hobj_ref_t, H5E_auto_t, H5Eget_auto, \
+     H5Eset_auto


 # Include conversion tables
@@ -249,7 +254,74 @@
   return ntype


+cdef class Reference

+def islistoflist(l):
+  try:
+    cls = l[0][0].__class__
+    for e in l:
+      for x in e:
+        if not isinstance(x, cls):
+          return None
+    return cls
+  except (TypeError, IndexError):
+    return None
+
+def islistofcompound(l):
+  try:
+    if len(l[0]) != len(l[1]):
+        return None
+    classes = [ ]
+    for v in l[1]:
+      if not isinstance(v, (int, float, Reference)):
+        return None
+      classes.append(v.__class__)
+    for v in l[1:]:
+      for o, c in zip(v, classes):
+        if not isinstance(o, c):
+          return None
+    return classes
+  except (TypeError, IndexError):
+    return None
+
+def isreference(v):
+  return isinstance(v, Reference)
+
+cdef void storevalue(object v, void *p):
+  cdef Reference ref
+  if isinstance(v, int):
+    (<int *> p)[0] = v
+  elif isinstance(v, float):
+    (<double *> p)[0] = v
+  elif isinstance(v, Reference):
+    ref = v
+    (<hobj_ref_t *> p)[0] = ref.ref
+
+cdef object fetchvalue(H5T_class_t class_id, void *p, int i, object file):
+  cdef Reference retref
+  if class_id == H5T_INTEGER:
+    return (<int *>p)[i]
+  elif class_id == H5T_FLOAT:
+    return (<double *>p)[i]
+  elif class_id == H5T_REFERENCE:
+    retref = Reference(None)
+    retref.ref = (<hobj_ref_t *>p)[i]
+    retref.file = file
+    return retref
+
+cdef void deleteIfExisting(hid_t node, char *name):
+  cdef H5E_auto_t func
+  cdef void *data
+  H5Eget_auto(&func, &data)
+  H5Eset_auto(NULL, NULL)
+  H5Adelete(node, name)
+  H5Eset_auto(func, data)
+
+sizeOfType = {int: sizeof(int), float: sizeof(double),
+        Reference: sizeof(hobj_ref_t)}
+idOfType = {int: H5T_NATIVE_INT, float: H5T_NATIVE_DOUBLE,
+            Reference: H5T_STD_REF_OBJ}
+
 # Type extensions declarations (these are subclassed by PyTables
 # Python classes)

@@ -404,11 +476,15 @@
     type.
     """

-    cdef int ret
-    cdef hid_t dset_id, type_id
+    cdef int ret, i, j
+    cdef hid_t dset_id, type_id, space_id, attr_id
     cdef hsize_t *dims
+    cdef hvl_t *vl_buf
     cdef ndarray ndv
-    cdef object byteorder, rabyteorder, baseatom
+    cdef object byteorder, rabyteorder, baseatom, basetype
+    cdef Reference ref
+    cdef hobj_ref_t *refp
+    cdef char *c_buf

     # The dataset id of the node
     dset_id = node._v_objectID
@@ -417,6 +493,10 @@
     if isinstance(value, numpy.generic):
       value = numpy.array(value)

+    basetype = islistoflist(value)
+    if basetype is None:
+      basetype = islistofcompound(value)
+
     # Check if value is a NumPy ndarray and of a supported type
     if (isinstance(value, numpy.ndarray) and
         value.dtype.kind in ('V', 'S', 'b', 'i', 'u', 'f', 'c')):
@@ -441,6 +521,74 @@
       # Release resources
       free(<void *>dims)
       H5Tclose(type_id)
+    elif isinstance(value, Reference):
+      ref = value
+      H5ATTRset_attribute(dset_id, name, H5T_STD_REF_OBJ, 0, NULL,
+        <char *> &ref.ref)
+    elif isinstance(value, list) and all(map(isreference, value)):
+      dims = <hsize_t *> malloc(sizeof(hsize_t))
+      dims[0] = len(value)
+      refp = <hobj_ref_t *> malloc(len(value) * sizeof(hobj_ref_t))
+      ret = 0
+      for v in value:
+        ref = v
+        refp[ret] = ref.ref
+        ret += 1
+      H5ATTRset_attribute(dset_id, name, H5T_STD_REF_OBJ, 1, dims,
+            <char *> refp)
+      free(refp)
+      free(dims)
+    elif basetype in (int, float, Reference):
+      dims = <hsize_t *> malloc(sizeof(hsize_t))
+      dims[0] = len(value)
+      deleteIfExisting(dset_id, name)
+      space_id = H5Screate_simple(1, dims, NULL)
+      type_id = H5Tvlen_create(idOfType[basetype])
+      attr_id = H5Acreate(dset_id, name, type_id, space_id, H5P_DEFAULT)
+      ret = sizeOfType[basetype]
+      vl_buf = <hvl_t *> malloc(len(value) * sizeof(hvl_t))
+      i = 0
+      for l in value:
+        vl_buf[i].len = len(l)
+        vl_buf[i].p = malloc(ret * len(l))
+        j = 0
+        for o in l:
+          storevalue(o, vl_buf[i].p + j)
+          j = j + ret
+        i = i + 1
+      H5Awrite(attr_id, type_id, vl_buf)
+      H5Dvlen_reclaim(type_id, space_id, H5P_DEFAULT, vl_buf)
+      free(vl_buf)
+      H5Sclose(space_id)
+      free(dims)
+      H5Tclose(type_id)
+      H5Aclose(attr_id)
+    elif isinstance(basetype, list):
+      dims = <hsize_t *> malloc(sizeof(hsize_t))
+      dims[0] = len(value) - 1
+      deleteIfExisting(dset_id, name)
+      space_id = H5Screate_simple(1, dims, NULL)
+      ret = 0
+      for t in basetype:
+        ret = ret + sizeOfType[t]
+      type_id = H5Tcreate(H5T_COMPOUND, ret)
+      i = 0
+      for v, b in zip(value[0], basetype):
+        H5Tinsert(type_id, v, i, idOfType[b])
+        i += sizeOfType[b]
+      attr_id = H5Acreate(dset_id, name, type_id,
+            space_id, H5P_DEFAULT)
+      c_buf = <char *> malloc((len(value) - 1) * ret)
+      i = 0
+      for v in value[1:]:
+        for o in v:
+          storevalue(o, c_buf + i)
+          i = i + sizeOfType[o.__class__]
+      H5Awrite(attr_id, type_id, c_buf)
+      free(c_buf)
+      H5Sclose(space_id)
+      H5Tclose(type_id)
+      H5Aclose(attr_id)
     else:
       # Object cannot be natively represented in HDF5.
       # Unicode attributes has to be pickled until we can definitely switch
@@ -468,12 +616,15 @@
     cdef hsize_t *dims, nelements
     cdef H5T_class_t class_id
     cdef size_t type_size
-    cdef hid_t mem_type, dset_id, type_id, native_type
-    cdef int rank, ret, enumtype
+    cdef hid_t mem_type, dset_id, type_id, native_type, attr_id, space_id
+    cdef int rank, ret, enumtype, i, j
     cdef void *rbuf
+    cdef hobj_ref_t *refbuf
+    cdef hvl_t *vl_buf
     cdef char *str_value
     cdef ndarray ndvalue
     cdef object shape, stype_atom, shape_atom, retvalue
+    cdef Reference retref

     # The dataset id of the node
     dset_id = node._v_objectID
@@ -493,6 +644,28 @@
       if str_value: free(str_value)
       H5Tclose(type_id)
       return retvalue
+    elif (rank == 0 and class_id == H5T_REFERENCE):
+      retref = Reference(None)
+      H5ATTRget_attribute(dset_id, attrname, type_id, &retref.ref)
+      retref.file = node._v_file
+      H5Tclose(type_id)
+      return retref
+    elif (rank == 1 and class_id == H5T_REFERENCE):
+      ret = H5ATTRget_dims(dset_id, attrname, &nelements)
+      if ret < 0:
+        raise HDF5ExtError("Can't get dims info on attribute %s in node %s." %
+                           (attrname, self.name))
+      refbuf = <hobj_ref_t *> malloc(nelements * sizeof(hobj_ref_t))
+      H5ATTRget_attribute(dset_id, attrname, type_id, refbuf)
+      retvalue = [ ]
+      for 0 <= ret < nelements:
+        retref = Reference(None)
+        retref.ref = refbuf[ret]
+        retref.file = node._v_file
+        retvalue.append(retref)
+      free(refbuf)
+      H5Tclose(type_id)
+      return retvalue
     elif (rank == 0 and class_id in (H5T_BITFIELD, H5T_INTEGER, H5T_FLOAT)):
       dtype = get_dtype_scalar(type_id, class_id, type_size)
       if dtype is None:
@@ -502,6 +675,54 @@
         self._v_unimplemented.append(attrname)
         return None
       shape = ()
+    elif (rank == 1 and class_id == H5T_VLEN):
+      attr_id = H5Aopen_name(dset_id, attrname)
+      space_id = H5Aget_space(attr_id)
+      H5Sget_simple_extent_dims(space_id, &nelements, NULL)
+      vl_buf = <hvl_t *> malloc(nelements * sizeof(hvl_t))
+      native_type = H5Tget_super(type_id)
+      class_id = H5Tget_class(native_type)
+      H5Tclose(native_type)
+      H5Aread(attr_id, type_id, vl_buf)
+      H5Aclose(attr_id)
+      retvalue = [ ]
+      for 0 <= j < nelements:
+        l = [ ]
+        for 0 <= i < vl_buf[j].len:
+          l.append(fetchvalue(class_id, vl_buf[j].p, i, node._v_file))
+        retvalue.append(l)
+      H5Dvlen_reclaim(type_id, space_id, H5P_DEFAULT, vl_buf)
+      H5Tclose(type_id)
+      H5Sclose(space_id)
+      return retvalue
+    elif (rank == 1 and class_id == H5T_COMPOUND):
+      attr_id = H5Aopen_name(dset_id, attrname)
+      space_id = H5Aget_space(attr_id)
+      rank = H5Tget_nmembers(type_id)
+      retvalue = [ ]
+      shape = [ ]
+      for 0 <= i < rank:
+        str_value = H5Tget_member_name(type_id, i)
+        retvalue.append(str(str_value))
+        native_type = H5Tget_member_type(type_id, i)
+        shape.append(H5Tget_class(native_type))
+        H5Tclose(native_type)
+      retvalue = [ retvalue ]
+      H5Sget_simple_extent_dims(space_id, &nelements, NULL)
+      type_size = H5Tget_size(type_id)
+      rbuf = malloc(nelements * type_size)
+      H5Aread(attr_id, type_id, rbuf)
+      for 0 <= i < nelements:
+        ll = [ ]
+        for 0 <= j < rank:
+          ll.append(fetchvalue(shape[j],
+            rbuf + i * type_size + H5Tget_member_offset(type_id, j), 0,
+            node._v_file))
+        retvalue.append(ll)
+      H5Tclose(type_id)
+      H5Sclose(space_id)
+      free(rbuf)
+      return retvalue
     else:
       # General case

@@ -1568,7 +1789,36 @@
     return datalist


+cdef class Reference:
+  """ A reference to a node in a HDF5 file """
+  cdef hobj_ref_t ref
+  cdef File file

+  def __init__(self, dest):
+    """ Create a reference to node dest """
+    if dest is None:
+      return
+    self.file = dest._v_file
+    H5Rcreate(&self.ref, self.file.file_id, dest._v_pathname, H5R_OBJECT, -1)
+
+  def deref(self):
+    """ Find and return the referenced node
+
+    Attention! This is a slow function, so don't call it
+    more often than necessary. """
+    cdef hobj_ref_t tmp
+    # The following loop sounds ridiculous, unfortunately, that's
+    # the only way to retrieve the name of a referenced object.
+    # Starting HDF5 1.8.0, one could use H5Rget_name, but it
+    # also performs a tree traversal. It would be faster, though,
+    # since both Python and H5Rcreate have some overhead.
+    for node in self.file:
+      H5Rcreate(&tmp, self.file.file_id, node._v_pathname, H5R_OBJECT, -1)
+      if tmp == self.ref:
+        return node
+    raise HDF5Error("could not find referenced object")
+
+
 cdef class UnImplemented(Leaf):


Index: tables/definitions.pxd
===================================================================
--- tables/definitions.pxd      (revision 4262)
+++ tables/definitions.pxd      (working copy)
@@ -173,6 +173,7 @@
   # such an unsigned long long type.
   ctypedef long long hsize_t
   ctypedef signed long long hssize_t
+  ctypedef long long hobj_ref_t

   ctypedef struct hvl_t:
     size_t len                 # Length of VL data (in base type units)
@@ -203,6 +204,9 @@
     H5G_DATASET,                # Object is a dataset
     H5G_TYPE,                   # Object is a named data type

+  ctypedef enum H5R_type_t:
+    H5R_OBJECT
+
   # Values for fill value status
   cdef enum H5D_fill_value_t:
     H5D_FILL_VALUE_ERROR        = -1,
@@ -219,6 +223,11 @@
     size_t linklen
     #H5O_stat_t ohdr            # Object header information. New in HDF5 1.6

+  ctypedef struct H5E_auto_t_t:
+    pass
+
+  ctypedef H5E_auto_t_t *H5E_auto_t
+
   # HDF5 layouts
   cdef enum H5D_layout_t:
     H5D_LAYOUT_ERROR    = -1,
@@ -315,6 +324,10 @@
     H5T_UNIX_D32BE
     H5T_UNIX_D64BE

+  # reference types
+  cdef enum:
+    H5T_STD_REF_OBJ
+
   # The order to retrieve atomic native datatype
   cdef enum H5T_direction_t:
     H5T_DIR_DEFAULT     = 0,    #default direction is inscendent
@@ -404,6 +417,7 @@
   hid_t  H5Tcreate(H5T_class_t type, size_t size)
   hid_t  H5Tcopy(hid_t type_id)
   herr_t H5Tclose(hid_t type_id)
+  hid_t  H5Tvlen_create(hid_t base_type_id)

   # Operations defined on string data types
   htri_t H5Tis_variable_str(hid_t dtype_id)
@@ -412,6 +426,7 @@
   int    H5Tget_nmembers(hid_t type_id)
   char  *H5Tget_member_name(hid_t type_id, unsigned membno)
   hid_t  H5Tget_member_type(hid_t type_id, unsigned membno)
+  size_t H5Tget_member_offset(hid_t type_id, unsigned membno)
   hid_t  H5Tget_native_type(hid_t type_id, H5T_direction_t direction)
   herr_t H5Tget_member_value(hid_t type_id, int membno, void *value)
   int    H5Tget_offset(hid_t type_id)
@@ -434,7 +449,12 @@
   size_t H5Aget_name(hid_t attr_id, size_t buf_size, char *buf)
   hid_t  H5Aopen_idx(hid_t loc_id, unsigned int idx)
   herr_t H5Aread(hid_t attr_id, hid_t mem_type_id, void *buf)
+  herr_t H5Awrite(hid_t attr_id, hid_t mem_type_id, void *buf)
   herr_t H5Aclose(hid_t attr_id)
+  hid_t  H5Acreate(hid_t loc_id, char *attr_name, hid_t type_id,
+                   hid_t space_id, hid_t acpl_id)
+  herr_t H5Aopen_name(hid_t loc_id, char *name)
+  hid_t  H5Aget_space(hid_t attr_id)

   # Operations with properties
   hid_t  H5Pcreate(hid_t plist_id)
@@ -447,7 +467,14 @@
   H5D_layout_t H5Pget_layout(hid_t plist)
   int H5Pget_chunk(hid_t plist, int max_ndims, hsize_t *dims)

+  # error handling
+  herr_t H5Eget_auto(H5E_auto_t *func, void **client_data)
+  herr_t H5Eset_auto(H5E_auto_t func, void *client_data)

+  # Operations with references
+  herr_t H5Rcreate(void *ref, hid_t loc_id, char *name, H5R_type_t ref_type,
+        hid_t space_id)
+
 # Specific HDF5 functions for PyTables
 cdef extern from "H5ATTR.h":
   herr_t H5ATTRget_attribute(hid_t loc_id, char *attr_name,
------------------ snip ----------------------

------------------------------------------------------------------------------
Come build with us! The BlackBerry&reg; Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay 
ahead of the curve. Join us from November 9&#45;12, 2009. Register now&#33;
http://p.sf.net/sfu/devconf
_______________________________________________
Pytables-users mailing list
Pytables-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/pytables-users

Reply via email to