basemap

jswhit Fri, 08 Aug 2008 13:23:01 -0700

Revision: 6007
          http://matplotlib.svn.sourceforge.net/matplotlib/?rev=6007&view=rev
Author:   jswhit
Date:     2008-08-08 20:22:50 +0000 (Fri, 08 Aug 2008)


Log Message:
-----------
update pupynere to 1.0.1

Modified Paths:
--------------
    trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py
    trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py

Added Paths:
-----------
    trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py

Modified: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py
===================================================================
--- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py 2008-08-08 
12:19:47 UTC (rev 6006)
+++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py 2008-08-08 
20:22:50 UTC (rev 6007)
@@ -36,7 +36,7 @@
 import numpy as np
 import numpy.ma as ma
 from shapelib import ShapeFile
-import _geoslib, pupynere, netcdftime
+import _geoslib, netcdf, netcdftime
 
 # basemap data files now installed in lib/matplotlib/toolkits/basemap/data
 basemap_datadir = os.sep.join([os.path.dirname(__file__), 'data'])
@@ -3640,7 +3640,7 @@
     else:
         return corners
 
-def NetCDFFile(file, maskandscale=True):
+def NetCDFFile(file, mode='r', maskandscale=True):
     """NetCDF File reader.  API is the same as Scientific.IO.NetCDF.
     If ``file`` is a URL that starts with `http`, it is assumed
     to be a remote OPenDAP dataset, and the python dap client is used
@@ -3656,9 +3656,9 @@
     ``maskandscale`` keyword to False. 
     """
     if file.startswith('http'):
-        return pupynere._RemoteFile(file,maskandscale)
+        return netcdf._RemoteFile(file,maskandscale=maskandscale)
     else:
-        return pupynere._LocalFile(file,maskandscale)
+        return netcdf.netcdf_file(file,mode=mode,maskandscale=maskandscale)
 
 def num2date(times,units='days since 0001-01-01 
00:00:00',calendar='proleptic_gregorian'):
     """

Added: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py
===================================================================
--- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py                   
        (rev 0)
+++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py   2008-08-08 
20:22:50 UTC (rev 6007)
@@ -0,0 +1,76 @@
+from numpy import ma, squeeze
+from pupynere import netcdf_file, _maskandscale
+from dap.client import open as open_remote
+from dap.dtypes import ArrayType, GridType, typemap
+
+_typecodes = dict([[_v,_k] for _k,_v in typemap.items()])
+
+class _RemoteFile(object):
+    """A NetCDF file reader. API is the same as Scientific.IO.NetCDF."""
+
+    def __init__(self, file, maskandscale=False):
+        self._buffer = open_remote(file)
+        self._maskandscale = maskandscale
+        self._parse()
+
+    def read(self, size=-1):
+        """Alias for reading the file buffer."""
+        return self._buffer.read(size)
+
+    def _parse(self):
+        """Initial parsing of the header."""
+        # Read header info.
+        self._dim_array()
+        self._gatt_array()
+        self._var_array()
+
+    def _dim_array(self):
+        """Read a dict with dimensions names and sizes."""
+        self.dimensions = {}
+        self._dims = []
+        for k,d in self._buffer.iteritems():
+            if (isinstance(d, ArrayType) or isinstance(d, GridType)) and 
len(d.shape) == 1 and k == d.dimensions[0]:
+                name = k
+                length = len(d)
+                self.dimensions[name] = length
+                self._dims.append(name)  # preserve dim order
+
+    def _gatt_array(self):
+        """Read global attributes."""
+        self.__dict__.update(self._buffer.attributes)
+
+    def _var_array(self):
+        """Read all variables."""
+        # Read variables.
+        self.variables = {}
+        for k,d in self._buffer.iteritems():
+            if isinstance(d, GridType) or isinstance(d, ArrayType):
+                name = k
+                self.variables[name] = _RemoteVariable(d,self._maskandscale)
+
+    def close(self):
+        # this is a no-op provided for compatibility
+        pass
+
+class _RemoteVariable(object):
+    def __init__(self, var, maskandscale):
+        self._var = var
+        self._maskandscale = maskandscale
+        self.dtype = var.type
+        self.shape = var.shape
+        self.dimensions = var.dimensions
+        self.__dict__.update(var.attributes)
+
+    def __getitem__(self, index):
+        datout = squeeze(self._var.__getitem__(index))
+        # automatically
+        # - remove singleton dimensions
+        # - create a masked array using missing_value or _FillValue attribute
+        # - apply scale_factor and add_offset to packed integer data
+        if self._maskandscale:
+            return _maskandscale(self,datout)
+        else:
+            return datout
+
+    def typecode(self):
+        return _typecodes[self.dtype]

Modified: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py
===================================================================
--- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py 2008-08-08 
12:19:47 UTC (rev 6006)
+++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py 2008-08-08 
20:22:50 UTC (rev 6007)
@@ -1,45 +1,89 @@
-"""NetCDF reader.
+"""
+NetCDF reader/writer module.
 
-Pupynere implements a PUre PYthon NEtcdf REader.
+This module implements the Scientific.IO.NetCDF API to read and create
+NetCDF files. The same API is also used in the PyNIO and pynetcdf
+modules, allowing these modules to be used interchangebly when working
+with NetCDF files. The major advantage of ``scipy.io.netcdf`` over other 
+modules is that it doesn't require the code to be linked to the NetCDF
+libraries as the other modules do.
 
-Copyright (c) 2003-2006 Roberto De Almeida <[EMAIL PROTECTED]>
+The code is based on the `NetCDF file format specification
+<http://www.unidata.ucar.edu/software/netcdf/guide_15.html>`_. A NetCDF
+file is a self-describing binary format, with a header followed by
+data. The header contains metadata describing dimensions, variables
+and the position of the data in the file, so access can be done in an
+efficient manner without loading unnecessary data into memory. We use
+the ``mmap`` module to create Numpy arrays mapped to the data on disk,
+for the same purpose.
 
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject
-to the following conditions:
+The structure of a NetCDF file is as follows:
 
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
+    C D F <VERSION BYTE> <NUMBER OF RECORDS>
+    <DIMENSIONS> <GLOBAL ATTRIBUTES> <VARIABLES METADATA>
+    <NON-RECORD DATA> <RECORD DATA>
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
-ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Record data refers to data where the first axis can be expanded at
+will. All record variables share a same dimension at the first axis,
+and they are stored at the end of the file per record, ie
+
+    A[0], B[0], ..., A[1], B[1], ..., etc,
+    
+so that new data can be appended to the file without changing its original
+structure. Non-record data are padded to a 4n bytes boundary. Record data
+are also padded, unless there is exactly one record variable in the file,
+in which case the padding is dropped.  All data is stored in big endian
+byte order.
+
+The Scientific.IO.NetCDF API allows attributes to be added directly to
+instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
+between user-set attributes and instance attributes, user-set attributes
+are automatically stored in the ``_attributes`` attribute by overloading
+``__setattr__``. This is the reason why the code sometimes uses
+``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
+otherwise the key would be inserted into userspace attributes.
+
+To create a NetCDF file::
+
+    >>> import time
+    >>> f = netcdf_file('simple.nc', 'w')
+    >>> f.history = 'Created for a test'
+    >>> f.createDimension('time', 10)
+    >>> time = f.createVariable('time', 'i', ('time',))
+    >>> time[:] = range(10)
+    >>> time.units = 'days since 2008-01-01'
+    >>> f.close()
+
+To read the NetCDF file we just created::
+
+    >>> f = netcdf_file('simple.nc', 'r')
+    >>> print f.history
+    Created for a test
+    >>> time = f.variables['time']
+    >>> print time.units
+    days since 2008-01-01
+    >>> print time.shape
+    (10,)
+    >>> print time[-1]
+    9
+    >>> f.close()
+
+TODO: properly implement ``_FillValue``.
 """
 
-__author__ = "Roberto De Almeida <[EMAIL PROTECTED]>"
+__all__ = ['netcdf_file', 'netcdf_variable']
 
 
-import struct
-import itertools
-import mmap
+from operator import mul
+from mmap import mmap, ACCESS_READ
 
-from numpy import ndarray, empty, array, ma, squeeze, zeros
-import numpy
+from numpy import fromstring, ndarray, dtype, empty, array, asarray, squeeze, 
zeros, ma
+from numpy import little_endian as LITTLE_ENDIAN
 
-from dap.client import open as open_remote
-from dap.dtypes import ArrayType, GridType, typemap
 
-ABSENT       = '\x00' * 8
-ZERO         = '\x00' * 4
-NC_BYTE      = '\x00\x00\x00\x01' 
+ABSENT       = '\x00\x00\x00\x00\x00\x00\x00\x00' 
+ZERO         = '\x00\x00\x00\x00'
+NC_BYTE      = '\x00\x00\x00\x01'
 NC_CHAR      = '\x00\x00\x00\x02'
 NC_SHORT     = '\x00\x00\x00\x03'
 NC_INT       = '\x00\x00\x00\x04'
@@ -49,360 +93,531 @@
 NC_VARIABLE  = '\x00\x00\x00\x0b'
 NC_ATTRIBUTE = '\x00\x00\x00\x0c'
 
-_typecodes = dict([[_v,_k] for _k,_v in typemap.items()])
-# default _FillValue for netcdf types (apply also to corresponding
-# DAP types).
-_default_fillvals = {'c':'\0',
-                     'S':"",
-                     'b':-127,
-                     'B':-127,
-                     'h':-32767,
-                     'H':65535,
-                     'i':-2147483647L,
-                     'L':4294967295L,
-                     'q':-2147483647L,
-                     'f':9.9692099683868690e+36,
-                     'd':9.9692099683868690e+36}
 
-def NetCDFFile(file, maskandscale=True):
-    """NetCDF File reader.  API is the same as Scientific.IO.NetCDF.
-    If 'file' is a URL that starts with 'http', it is assumed
-    to be a remote OPenDAP dataset, and the python dap client is used
-    to retrieve the data. Only the OPenDAP Array and Grid data
-    types are recognized.  If file does not start with 'http', it
-    is assumed to be a local netCDF file. Data will
-    automatically be converted to masked arrays if the variable has either
-    a 'missing_value' or '_FillValue' attribute, and some data points
-    are equal to the value specified by that attribute.  In addition,
-    variables stored as integers that have the 'scale_factor' and
-    'add_offset' attribute will automatically be rescaled to floats when
-    read.  To suppress these automatic conversions, set the
-    maskandscale keyword to False. 
+TYPEMAP = { NC_BYTE:   ('b', 1),
+            NC_CHAR:   ('c', 1),
+            NC_SHORT:  ('h', 2),
+            NC_INT:    ('i', 4),
+            NC_FLOAT:  ('f', 4),
+            NC_DOUBLE: ('d', 8) }
+
+REVERSE = { 'b': NC_BYTE,
+            'c': NC_CHAR,
+            'h': NC_SHORT,
+            'i': NC_INT,
+            'f': NC_FLOAT,
+            'd': NC_DOUBLE,
+
+            # these come from asarray(1).dtype.char and 
asarray('foo').dtype.char,
+            # used when getting the types from generic attributes.
+            'l': NC_INT,
+            'S': NC_CHAR }
+
+
+class netcdf_file(object):
     """
-    if file.startswith('http'):
-        return _RemoteFile(file,maskandscale)
-    else:
-        return _LocalFile(file,maskandscale)
- 
-def _maskandscale(var,datout):
-    totalmask = zeros(datout.shape,numpy.bool)
-    fillval = None
-    if hasattr(var, 'missing_value') and (datout == var.missing_value).any():
-        fillval = var.missing_value
-        totalmask += datout==fillval
-    if hasattr(var, '_FillValue') and (datout == var._FillValue).any():
-        if fillval is None:
-            fillval = var._FillValue
-        totalmask += datout==var._FillValue
-    elif (datout == _default_fillvals[var.typecode()]).any():
-        if fillval is None:
-            fillval = _default_fillvals[var.typecode()]
-        totalmask += datout==_default_fillvals[var.dtype]
-    if fillval is not None:
-        datout = ma.masked_array(datout,mask=totalmask,fill_value=fillval)
-    try:
-        datout = var.scale_factor*datout + var.add_offset
-    except:
-        pass
-    return datout
+    A ``netcdf_file`` object has two standard attributes: ``dimensions`` and
+    ``variables``. The values of both are dictionaries, mapping dimension
+    names to their associated lengths and variable names to variables,
+    respectively. Application programs should never modify these
+    dictionaries.
 
-class _RemoteFile(object):
-    """A NetCDF file reader. API is the same as Scientific.IO.NetCDF."""
+    All other attributes correspond to global attributes defined in the
+    NetCDF file. Global file attributes are created by assigning to an
+    attribute of the ``netcdf_file`` object.
 
-    def __init__(self, file, maskandscale):
-        self._buffer = open_remote(file)
+    """
+    def __init__(self, filename, mode='r', maskandscale=False):
+
         self._maskandscale = maskandscale
-        self._parse()
 
-    def read(self, size=-1):
-        """Alias for reading the file buffer."""
-        return self._buffer.read(size)
+        self.filename = filename
 
-    def _parse(self):
-        """Initial parsing of the header."""
-        # Read header info.
-        self._dim_array()
-        self._gatt_array()
-        self._var_array()
+        assert mode in 'rw', "Mode must be either 'r' or 'w'."
+        self.mode = mode
 
-    def _dim_array(self):
-        """Read a dict with dimensions names and sizes."""
         self.dimensions = {}
+        self.variables = {}
+
         self._dims = []
-        for k,d in self._buffer.iteritems():
-            if (isinstance(d, ArrayType) or isinstance(d, GridType)) and 
len(d.shape) == 1 and k == d.dimensions[0]:
-                name = k
-                length = len(d)
-                self.dimensions[name] = length
-                self._dims.append(name)  # preserve dim order
+        self._recs = 0
+        self._recsize = 0
 
-    def _gatt_array(self):
-        """Read global attributes."""
-        self.__dict__.update(self._buffer.attributes)
+        self.fp = open(self.filename, '%sb' % mode)
 
-    def _var_array(self):
-        """Read all variables."""
-        # Read variables.
-        self.variables = {}
-        for k,d in self._buffer.iteritems():
-            if isinstance(d, GridType) or isinstance(d, ArrayType):
-                name = k
-                self.variables[name] = _RemoteVariable(d,self._maskandscale)
+        self._attributes = {}
 
+        if mode is 'r':
+            self._read()
+
+    def __setattr__(self, attr, value):
+        # Store user defined attributes in a separate dict,
+        # so we can save them to file later.
+        try:
+            self._attributes[attr] = value
+        except AttributeError:
+            pass
+        self.__dict__[attr] = value
+
     def close(self):
-        # this is a no-op provided for compatibility
-        pass
+        if not self.fp.closed:
+            try:
+                self.flush()
+            finally:
+                self.fp.close()
+    __del__ = close
 
+    def createDimension(self, name, length):
+        self.dimensions[name] = length
+        self._dims.append(name)
 
-class _RemoteVariable(object):
-    def __init__(self, var, maskandscale):
-        self._var = var
-        self._maskandscale = maskandscale
-        self.dtype = var.type
-        self.shape = var.shape
-        self.dimensions = var.dimensions
-        self.__dict__.update(var.attributes)
+    def createVariable(self, name, type, dimensions):
+        shape = tuple([self.dimensions[dim] for dim in dimensions]) 
+        shape_ = tuple([dim or 0 for dim in shape])  # replace None with 0 for 
numpy
 
-    def __getitem__(self, index):
-        datout = squeeze(self._var.__getitem__(index))
-        # automatically
-        # - remove singleton dimensions
-        # - create a masked array using missing_value or _FillValue attribute
-        # - apply scale_factor and add_offset to packed integer data
-        if self._maskandscale:
-            return _maskandscale(self,datout)
+        if isinstance(type, basestring): type = dtype(type)
+        typecode, size = type.char, type.itemsize
+        dtype_ = '>%s' % typecode
+        if size > 1: dtype_ += str(size)
+
+        data = empty(shape_, dtype=dtype_)
+        self.variables[name] = netcdf_variable(data, typecode, shape, 
dimensions)
+        return self.variables[name]
+
+    def flush(self):
+        if self.mode is 'w':
+            self._write()
+    sync = flush
+
+    def _write(self):
+        self.fp.write('CDF')
+
+        self.__dict__['version_byte'] = 1
+        self.fp.write(array(1, '>b').tostring())
+
+        # Write headers and data.
+        self._write_numrecs()
+        self._write_dim_array()
+        self._write_gatt_array()
+        self._write_var_array()
+
+    def _write_numrecs(self):
+        # Get highest record count from all record variables.
+        for var in self.variables.values():
+            if not var.shape[0] and len(var.data) > self._recs:
+                self.__dict__['_recs'] = len(var.data)
+        self._pack_int(self._recs)
+
+    def _write_dim_array(self):
+        if self.dimensions:
+            self.fp.write(NC_DIMENSION)
+            self._pack_int(len(self.dimensions))
+            for name, length in self.dimensions.items():
+                self._pack_string(name)
+                self._pack_int(length or 0)  # replace None with 0 for record 
dimension
         else:
-            return datout
+            self.fp.write(ABSENT)
 
-    def typecode(self):
-        return _typecodes[self.dtype]
+    def _write_gatt_array(self):
+        self._write_att_array(self._attributes)
 
+    def _write_att_array(self, attributes):
+        if attributes:
+            self.fp.write(NC_ATTRIBUTE)
+            self._pack_int(len(attributes))
+            for name, values in attributes.items():
+                self._pack_string(name)
+                self._write_values(values)
+        else:
+            self.fp.write(ABSENT)
 
-class _LocalFile(object):
-    """A NetCDF file reader. API is the same as Scientific.IO.NetCDF."""
+    def _write_var_array(self):
+        if self.variables:
+            self.fp.write(NC_VARIABLE)
+            self._pack_int(len(self.variables))
 
-    def __init__(self, file, maskandscale):
-        self._buffer = open(file, 'rb')
-        self._maskandscale = maskandscale
-        self._parse()
+            # Sort variables non-recs first, then recs.
+            variables = self.variables.items()
+            variables.sort(key=lambda (k, v): v.shape and v.shape[0] is not 
None)
+            variables.reverse()
+            variables = [k for (k, v) in variables]
 
-    def read(self, size=-1):
-        """Alias for reading the file buffer."""
-        return self._buffer.read(size)
+            # Set the metadata for all variables.
+            for name in variables:
+                self._write_var_metadata(name)
+            # Now that we have the metadata, we know the vsize of
+            # each record variable, so we can calculate recsize.
+            self.__dict__['_recsize'] = sum([
+                    var._vsize for var in self.variables.values()
+                    if var.shape[0] is None])
+            # Set the data for all variables.
+            for name in variables:
+                self._write_var_data(name)
+        else:
+            self.fp.write(ABSENT)
 
-    def _parse(self):
-        """Initial parsing of the header."""
-        # Check magic bytes.
-        assert self.read(3) == 'CDF'
+    def _write_var_metadata(self, name):
+        var = self.variables[name]
 
-        # Read version byte.
-        byte = self.read(1)
-        self.version_byte = struct.unpack('>b', byte)[0]
+        self._pack_string(name)
+        self._pack_int(len(var.dimensions))
+        for dimname in var.dimensions:
+            dimid = self._dims.index(dimname)
+            self._pack_int(dimid)
 
-        # Read header info.
-        self._numrecs()
-        self._dim_array()
-        self._gatt_array()
-        self._var_array()
+        self._write_att_array(var._attributes)
 
-    def _numrecs(self):
-        """Read number of records."""
-        self._nrecs = self._unpack_int()
+        nc_type = REVERSE[var.typecode()]
+        self.fp.write(nc_type)
 
-    def _dim_array(self):
-        """Read a dict with dimensions names and sizes."""
-        assert self.read(4) in [ZERO, NC_DIMENSION]
+        if var.shape[0]:
+            vsize = var.data.size * var.data.itemsize
+            vsize += -vsize % 4
+        else:  # record variable
+            vsize = var.data[0].size * var.data.itemsize
+            rec_vars = len([var for var in self.variables.values()
+                    if var.shape[0] is None])
+            if rec_vars > 1:
+                vsize += -vsize % 4
+        self.variables[name].__dict__['_vsize'] = vsize
+        self._pack_int(vsize)
+
+        # Pack a bogus begin, and set the real value later.
+        self.variables[name].__dict__['_begin'] = self.fp.tell()
+        self._pack_begin(0)
+
+    def _write_var_data(self, name):
+        var = self.variables[name]
+        
+        # Set begin in file header.
+        the_beguine = self.fp.tell()
+        self.fp.seek(var._begin)
+        self._pack_begin(the_beguine)
+        self.fp.seek(the_beguine)
+
+        # Write data.
+        if var.shape[0]:
+            self.fp.write(var.data.tostring())    
+            count = var.data.size * var.data.itemsize
+            self.fp.write('0' * (var._vsize - count))
+        else:  # record variable
+            # Handle rec vars with shape[0] < nrecs.
+            if self._recs > len(var.data):
+                shape = (self._recs,) + var.data.shape[1:]
+                var.data.resize(shape)
+
+            pos0 = pos = self.fp.tell()
+            for rec in var.data:
+                # Apparently scalars cannot be converted to big endian. If we
+                # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
+                # will remain as ``=i4``.
+                if not rec.shape and (rec.dtype.byteorder == '<' or
+                        (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):
+                    rec = rec.byteswap()
+                self.fp.write(rec.tostring())
+                # Padding
+                count = rec.size * rec.itemsize
+                self.fp.write('0' * (var._vsize - count))
+                pos += self._recsize
+                self.fp.seek(pos)
+            self.fp.seek(pos0 + var._vsize)
+
+    def _write_values(self, values):
+        values = asarray(values) 
+        values = values.astype(values.dtype.newbyteorder('>'))
+
+        nc_type = REVERSE[values.dtype.char]
+        self.fp.write(nc_type)
+
+        if values.dtype.char == 'S':
+            nelems = values.itemsize
+        else:
+            nelems = values.size
+        self._pack_int(nelems)
+
+        if not values.shape and (values.dtype.byteorder == '<' or
+                (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):
+            values = values.byteswap()
+        self.fp.write(values.tostring())
+        count = values.size * values.itemsize
+        self.fp.write('0' * (-count % 4))  # pad
+
+    def _read(self):
+        # Check magic bytes and version
+        assert self.fp.read(3) == 'CDF', "Error: %s is not a valid NetCDF 3 
file" % self.filename
+        self.__dict__['version_byte'] = fromstring(self.fp.read(1), '>b')[0]
+
+        # Read file headers and set data.
+        self._read_numrecs()
+        self._read_dim_array()
+        self._read_gatt_array()
+        self._read_var_array()
+
+    def _read_numrecs(self):
+        self.__dict__['_recs'] = self._unpack_int()
+
+    def _read_dim_array(self):
+        assert self.fp.read(4) in [ZERO, NC_DIMENSION]
         count = self._unpack_int()
 
-        self.dimensions = {}
-        self._dims = []
         for dim in range(count):
-            name = self._read_string()
-            length = self._unpack_int()
-            if length == 0: length = None # record dimension
+            name = self._unpack_string()
+            length = self._unpack_int() or None  # None for record dimension
             self.dimensions[name] = length
-            self._dims.append(name)  # preserve dim order
+            self._dims.append(name)  # preserve order
 
-    def _gatt_array(self):
-        """Read global attributes."""
-        self.attributes = self._att_array()
+    def _read_gatt_array(self):
+        for k, v in self._read_att_array().items():
+            self.__setattr__(k, v)
 
-        # Update __dict__ for compatibility with S.IO.N
-        self.__dict__.update(self.attributes)
-
-    def _att_array(self):
-        """Read a dict with attributes."""
-        assert self.read(4) in [ZERO, NC_ATTRIBUTE]
+    def _read_att_array(self):
+        assert self.fp.read(4) in [ZERO, NC_ATTRIBUTE]
         count = self._unpack_int()
 
-        # Read attributes.
         attributes = {}
-        for attribute in range(count):
-            name = self._read_string()
-            nc_type = self._unpack_int()
-            n = self._unpack_int()
+        for attr in range(count):
+            name = self._unpack_string()
+            attributes[name] = self._read_values()
+        return attributes
 
-            # Read value for attributes.
-            attributes[name] = self._read_values(n, nc_type)
+    def _read_var_array(self):
+        assert self.fp.read(4) in [ZERO, NC_VARIABLE]
 
-        return attributes
+        begin = 0
+        dtypes = {'names': [], 'formats': []}
+        rec_vars = []
+        count = self._unpack_int()
+        for var in range(count):
+            name, dimensions, shape, attributes, typecode, size, dtype_, 
begin_, vsize = self._read_var()
+            if shape and shape[0] is None:
+                rec_vars.append(name)
+                self.__dict__['_recsize'] += vsize
+                if begin == 0: begin = begin_
+                dtypes['names'].append(name)
+                dtypes['formats'].append(str(shape[1:]) + dtype_)
 
-    def _var_array(self):
-        """Read all variables."""
-        assert self.read(4) in [ZERO, NC_VARIABLE]
+                # Handle padding with a virtual variable.
+                if typecode in 'bch':
+                    actual_size = reduce(mul, (1,) + shape[1:]) * size
+                    padding = -actual_size % 4
+                    if padding:
+                        dtypes['names'].append('_padding_%d' % var)
+                        dtypes['formats'].append('(%d,)>b' % padding)
 
-        # Read size of each record, in bytes.
-        self._read_recsize()
+                # Data will be set later.
+                data = None
+            else:
+                mm = mmap(self.fp.fileno(), begin_+vsize, access=ACCESS_READ)
+                data = ndarray.__new__(ndarray, shape, dtype=dtype_,
+                        buffer=mm, offset=begin_, order=0)
 
-        # Read variables.
-        self.variables = {}
-        count = self._unpack_int()
-        for variable in range(count):
-            name = self._read_string()
-            self.variables[name] = self._read_var()
+            # Add variable.
+            self.variables[name] = netcdf_variable(
+                    data, typecode, shape, dimensions, attributes)
 
-    def _read_recsize(self):
-        """Read all variables and compute record bytes."""
-        pos = self._buffer.tell()
-        
-        recsize = 0
-        count = self._unpack_int()
-        for variable in range(count):
-            name = self._read_string()
-            n = self._unpack_int()
-            isrec = False
-            for i in range(n):
-                dimid = self._unpack_int()
-                name = self._dims[dimid]
-                dim = self.dimensions[name]
-                if dim is None and i == 0:
-                    isrec = True
-            attributes = self._att_array()
-            nc_type = self._unpack_int()
-            vsize = self._unpack_int()
-            begin = [self._unpack_int, 
self._unpack_int64][self.version_byte-1]()
+        if rec_vars:
+            # Remove padding when only one record variable.
+            if len(rec_vars) == 1:
+                dtypes['names'] = dtypes['names'][:1]
+                dtypes['formats'] = dtypes['formats'][:1]
 
-            if isrec: recsize += vsize
+            # Build rec array.
+            mm = mmap(self.fp.fileno(), begin+self._recs*self._recsize, 
access=ACCESS_READ)
+            rec_array = ndarray.__new__(ndarray, (self._recs,), dtype=dtypes,
+                    buffer=mm, offset=begin, order=0)
 
-        self._recsize = recsize
-        self._buffer.seek(pos)
+            for var in rec_vars:
+                self.variables[var].__dict__['data'] = rec_array[var]
 
     def _read_var(self):
+        name = self._unpack_string()
         dimensions = []
         shape = []
-        n = self._unpack_int()
-        isrec = False
-        for i in range(n):
+        dims = self._unpack_int()
+        
+        for i in range(dims):
             dimid = self._unpack_int()
-            name = self._dims[dimid]
-            dimensions.append(name)
-            dim = self.dimensions[name]
-            if dim is None and i == 0:
-                dim = self._nrecs
-                isrec = True
+            dimname = self._dims[dimid]
+            dimensions.append(dimname)
+            dim = self.dimensions[dimname]
             shape.append(dim)
         dimensions = tuple(dimensions)
         shape = tuple(shape)
 
-        attributes = self._att_array()
-        nc_type = self._unpack_int()
+        attributes = self._read_att_array()
+        nc_type = self.fp.read(4)
         vsize = self._unpack_int()
-        
-        # Read offset.
         begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
 
-        return _LocalVariable(self._buffer.fileno(), nc_type, vsize, begin, 
shape, dimensions, attributes, isrec, self._recsize, 
maskandscale=self._maskandscale)
+        typecode, size = TYPEMAP[nc_type]
+        if typecode is 'c':
+            dtype_ = '>c'
+        else:
+            dtype_ = '>%s' % typecode
+            if size > 1: dtype_ += str(size)
 
-    def _read_values(self, n, nc_type):
-        bytes = [1, 1, 2, 4, 4, 8]
-        typecodes = ['b', 'c', 'h', 'i', 'f', 'd']
-        
-        count = n * bytes[nc_type-1]
-        values = self.read(count)
-        padding = self.read((4 - (count % 4)) % 4)
-        
-        typecode = typecodes[nc_type-1]
-        if nc_type != 2:  # not char 
-            values = struct.unpack('>%s' % (typecode * n), values)
-            values = array(values, dtype=typecode) 
+        return name, dimensions, shape, attributes, typecode, size, dtype_, 
begin, vsize
+
+    def _read_values(self):
+        nc_type = self.fp.read(4)
+        n = self._unpack_int()
+
+        typecode, size = TYPEMAP[nc_type]
+
+        count = n*size
+        values = self.fp.read(count)
+        self.fp.read(-count % 4)  # read padding
+
+        if typecode is not 'c':
+            values = fromstring(values, dtype='>%s%d' % (typecode, size))
+            if values.shape == (1,): values = values[0]
         else:
-            # Remove EOL terminator.
-            if values.endswith('\x00'): values = values[:-1]
-
+            values = values.rstrip('\x00') 
         return values
 
+    def _pack_begin(self, begin):
+        if self.version_byte == 1:
+            self._pack_int(begin)
+        elif self.version_byte == 2:
+            self._pack_int64(begin)
+
+    def _pack_int(self, value):
+        self.fp.write(array(value, '>i').tostring())
+    _pack_int32 = _pack_int
+
     def _unpack_int(self):
-        return struct.unpack('>i', self.read(4))[0]
+        return fromstring(self.fp.read(4), '>i')[0]
     _unpack_int32 = _unpack_int
 
+    def _pack_int64(self, value):
+        self.fp.write(array(value, '>q').tostring())
+
     def _unpack_int64(self):
-        return struct.unpack('>q', self.read(8))[0]
+        return fromstring(self.fp.read(8), '>q')[0]
 
-    def _read_string(self):
-        count = struct.unpack('>i', self.read(4))[0]
-        s = self.read(count)
-        # Remove EOL terminator.
-        if s.endswith('\x00'): s = s[:-1]
-        padding = self.read((4 - (count % 4)) % 4)
+    def _pack_string(self, s):
+        count = len(s)
+        self._pack_int(count)
+        self.fp.write(s)
+        self.fp.write('0' * (-count % 4))  # pad
+
+    def _unpack_string(self):
+        count = self._unpack_int()
+        s = self.fp.read(count).rstrip('\x00')
+        self.fp.read(-count % 4)  # read padding
         return s
 
-    def close(self):
-        self._buffer.close()
 
+class netcdf_variable(object):
+    """
+    ``netcdf_variable`` objects are constructed by calling the method
+    ``createVariable`` on the netcdf_file object.
 
-class _LocalVariable(object):
-    def __init__(self, fileno, nc_type, vsize, begin, shape, dimensions, 
attributes, isrec=False, recsize=0, maskandscale=True):
-        self._nc_type = nc_type
-        self._vsize = vsize
-        self._begin = begin
-        self.shape = shape
+    ``netcdf_variable`` objects behave much like array objects defined in
+    Numpy, except that their data resides in a file. Data is read by
+    indexing and written by assigning to an indexed subset; the entire
+    array can be accessed by the index ``[:]`` or using the methods
+    ``getValue`` and ``assignValue``. ``netcdf_variable`` objects also
+    have attribute ``shape`` with the same meaning as for arrays, but
+    the shape cannot be modified. There is another read-only attribute
+    ``dimensions``, whose value is the tuple of dimension names.
+
+    All other attributes correspond to variable attributes defined in
+    the NetCDF file. Variable attributes are created by assigning to an
+    attribute of the ``netcdf_variable`` object.
+
+    """
+    def __init__(self, data, typecode, shape, dimensions, attributes=None, 
maskandscale=True):
+        self.data = data
+        self._typecode = typecode
+        self._shape = shape
         self.dimensions = dimensions
-        self.attributes = attributes  # for ``dap.plugins.netcdf``
-        self.__dict__.update(attributes)
-        self._is_record = isrec
         self._maskandscale = maskandscale
 
-        # Number of bytes and type.
-        self._bytes = [1, 1, 2, 4, 4, 8][self._nc_type-1]
-        type_ = ['i', 'S', 'i', 'i', 'f', 'f'][self._nc_type-1]
-        dtype = '>%s%d' % (type_, self._bytes)
-        bytes = self._begin + self._vsize 
+        self._attributes = attributes or {}
+        for k, v in self._attributes.items():
+            self.__dict__[k] = v
 
-        if isrec:
-            # Record variables are not stored contiguosly on disk, so we 
-            # need to create a separate array for each record.
-            self.__array_data__ = empty(shape, dtype)
-            bytes += (shape[0] - 1) * recsize
-            for n in range(shape[0]):
-                offset = self._begin + (n * recsize)
-                mm = mmap.mmap(fileno, bytes, access=mmap.ACCESS_READ)
-                self.__array_data__[n] = ndarray.__new__(ndarray, shape[1:], 
dtype=dtype, buffer=mm, offset=offset, order=0)
-        else:
-            # Create buffer and data.
-            mm = mmap.mmap(fileno, bytes, access=mmap.ACCESS_READ)
-            self.__array_data__ = ndarray.__new__(ndarray, shape, dtype=dtype, 
buffer=mm, offset=self._begin, order=0)
+    def __setattr__(self, attr, value):
+        # Store user defined attributes in a separate dict,
+        # so we can save them to file later.
+        try:
+            self._attributes[attr] = value
+        except AttributeError:
+            pass
+        self.__dict__[attr] = value
 
-        # N-D array interface
-        self.__array_interface__ = {'shape'  : shape,
-                                    'typestr': dtype,
-                                    'data'   : self.__array_data__,
-                                    'version': 3,
-                                   }
+    @property
+    def shape(self):
+        return self.data.shape
 
+    def getValue(self):
+        return self.data.item()
+
+    def assignValue(self, value):
+        self.data.itemset(value)
+
+    def typecode(self):
+        return self._typecode
+
     def __getitem__(self, index):
-        datout = squeeze(self.__array_data__.__getitem__(index))
-        # automatically
-        # - remove singleton dimensions
-        # - create a masked array using missing_value or _FillValue attribute
-        # - apply scale_factor and add_offset to packed integer data
+        datout = squeeze(self.data[index])
         if self._maskandscale:
             return _maskandscale(self,datout)
         else:
             return datout
 
-    def getValue(self):
-        """For scalars."""
-        return self.__array_data__.item()
+    def __setitem__(self, index, data):
+        # Expand data for record vars?
+        if not self._shape[0]:
+            if isinstance(index, tuple):
+                rec_index = index[0]
+            else:
+                rec_index = index
+            if isinstance(rec_index, slice):
+                recs = (rec_index.start or 0) + len(data)
+            else:
+                recs = rec_index + 1
+            if recs > len(self.data):
+                shape = (recs,) + self._shape[1:]
+                self.data.resize(shape)
+        self.data[index] = data
 
-    def typecode(self):
-        return ['b', 'c', 'h', 'i', 'f', 'd'][self._nc_type-1]
+
+NetCDFFile = netcdf_file
+NetCDFVariable = netcdf_variable
+
+# default _FillValue for netcdf types (apply also to corresponding
+# DAP types).
+_default_fillvals = {'c':'\0',
+                     'S':"",
+                     'b':-127,
+                     'B':-127,
+                     'h':-32767,
+                     'H':65535,
+                     'i':-2147483647L,
+                     'L':4294967295L,
+                     'q':-2147483647L,
+                     'f':9.9692099683868690e+36,
+                     'd':9.9692099683868690e+36}
+def _maskandscale(var,datout):
+    totalmask = zeros(datout.shape,bool)
+    fillval = None
+    if hasattr(var, 'missing_value') and (datout == var.missing_value).any():
+        fillval = var.missing_value
+        totalmask += datout==fillval
+    if hasattr(var, '_FillValue') and (datout == var._FillValue).any():
+        if fillval is None:
+            fillval = var._FillValue
+        totalmask += datout==var._FillValue
+    elif (datout == _default_fillvals[var.typecode()]).any():
+        if fillval is None:
+            fillval = _default_fillvals[var.typecode()]
+        totalmask += datout==_default_fillvals[var.dtype]
+    if fillval is not None:
+        datout = ma.masked_array(datout,mask=totalmask,fill_value=fillval)
+    try:
+        datout = var.scale_factor*datout + var.add_offset
+    except:
+        pass
+    return datout


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Matplotlib-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins

SF.net SVN: matplotlib:[6007] trunk/toolkits/basemap/lib/mpl_toolkits/ basemap

Reply via email to