Revision: 6007
http://matplotlib.svn.sourceforge.net/matplotlib/?rev=6007&view=rev
Author: jswhit
Date: 2008-08-08 20:22:50 +0000 (Fri, 08 Aug 2008)
Log Message:
-----------
update pupynere to 1.0.1
Modified Paths:
--------------
trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py
trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py
Added Paths:
-----------
trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py
Modified: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py
===================================================================
--- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py 2008-08-08
12:19:47 UTC (rev 6006)
+++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py 2008-08-08
20:22:50 UTC (rev 6007)
@@ -36,7 +36,7 @@
import numpy as np
import numpy.ma as ma
from shapelib import ShapeFile
-import _geoslib, pupynere, netcdftime
+import _geoslib, netcdf, netcdftime
# basemap data files now installed in lib/matplotlib/toolkits/basemap/data
basemap_datadir = os.sep.join([os.path.dirname(__file__), 'data'])
@@ -3640,7 +3640,7 @@
else:
return corners
-def NetCDFFile(file, maskandscale=True):
+def NetCDFFile(file, mode='r', maskandscale=True):
"""NetCDF File reader. API is the same as Scientific.IO.NetCDF.
If ``file`` is a URL that starts with `http`, it is assumed
to be a remote OPenDAP dataset, and the python dap client is used
@@ -3656,9 +3656,9 @@
``maskandscale`` keyword to False.
"""
if file.startswith('http'):
- return pupynere._RemoteFile(file,maskandscale)
+ return netcdf._RemoteFile(file,maskandscale=maskandscale)
else:
- return pupynere._LocalFile(file,maskandscale)
+ return netcdf.netcdf_file(file,mode=mode,maskandscale=maskandscale)
def num2date(times,units='days since 0001-01-01
00:00:00',calendar='proleptic_gregorian'):
"""
Added: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py
===================================================================
--- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py
(rev 0)
+++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py 2008-08-08
20:22:50 UTC (rev 6007)
@@ -0,0 +1,76 @@
+from numpy import ma, squeeze
+from pupynere import netcdf_file, _maskandscale
+from dap.client import open as open_remote
+from dap.dtypes import ArrayType, GridType, typemap
+
+_typecodes = dict([[_v,_k] for _k,_v in typemap.items()])
+
+class _RemoteFile(object):
+ """A NetCDF file reader. API is the same as Scientific.IO.NetCDF."""
+
+ def __init__(self, file, maskandscale=False):
+ self._buffer = open_remote(file)
+ self._maskandscale = maskandscale
+ self._parse()
+
+ def read(self, size=-1):
+ """Alias for reading the file buffer."""
+ return self._buffer.read(size)
+
+ def _parse(self):
+ """Initial parsing of the header."""
+ # Read header info.
+ self._dim_array()
+ self._gatt_array()
+ self._var_array()
+
+ def _dim_array(self):
+ """Read a dict with dimensions names and sizes."""
+ self.dimensions = {}
+ self._dims = []
+ for k,d in self._buffer.iteritems():
+ if (isinstance(d, ArrayType) or isinstance(d, GridType)) and
len(d.shape) == 1 and k == d.dimensions[0]:
+ name = k
+ length = len(d)
+ self.dimensions[name] = length
+ self._dims.append(name) # preserve dim order
+
+ def _gatt_array(self):
+ """Read global attributes."""
+ self.__dict__.update(self._buffer.attributes)
+
+ def _var_array(self):
+ """Read all variables."""
+ # Read variables.
+ self.variables = {}
+ for k,d in self._buffer.iteritems():
+ if isinstance(d, GridType) or isinstance(d, ArrayType):
+ name = k
+ self.variables[name] = _RemoteVariable(d,self._maskandscale)
+
+ def close(self):
+ # this is a no-op provided for compatibility
+ pass
+
+class _RemoteVariable(object):
+ def __init__(self, var, maskandscale):
+ self._var = var
+ self._maskandscale = maskandscale
+ self.dtype = var.type
+ self.shape = var.shape
+ self.dimensions = var.dimensions
+ self.__dict__.update(var.attributes)
+
+ def __getitem__(self, index):
+ datout = squeeze(self._var.__getitem__(index))
+ # automatically
+ # - remove singleton dimensions
+ # - create a masked array using missing_value or _FillValue attribute
+ # - apply scale_factor and add_offset to packed integer data
+ if self._maskandscale:
+ return _maskandscale(self,datout)
+ else:
+ return datout
+
+ def typecode(self):
+ return _typecodes[self.dtype]
Modified: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py
===================================================================
--- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py 2008-08-08
12:19:47 UTC (rev 6006)
+++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py 2008-08-08
20:22:50 UTC (rev 6007)
@@ -1,45 +1,89 @@
-"""NetCDF reader.
+"""
+NetCDF reader/writer module.
-Pupynere implements a PUre PYthon NEtcdf REader.
+This module implements the Scientific.IO.NetCDF API to read and create
+NetCDF files. The same API is also used in the PyNIO and pynetcdf
+modules, allowing these modules to be used interchangebly when working
+with NetCDF files. The major advantage of ``scipy.io.netcdf`` over other
+modules is that it doesn't require the code to be linked to the NetCDF
+libraries as the other modules do.
-Copyright (c) 2003-2006 Roberto De Almeida <[EMAIL PROTECTED]>
+The code is based on the `NetCDF file format specification
+<http://www.unidata.ucar.edu/software/netcdf/guide_15.html>`_. A NetCDF
+file is a self-describing binary format, with a header followed by
+data. The header contains metadata describing dimensions, variables
+and the position of the data in the file, so access can be done in an
+efficient manner without loading unnecessary data into memory. We use
+the ``mmap`` module to create Numpy arrays mapped to the data on disk,
+for the same purpose.
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject
-to the following conditions:
+The structure of a NetCDF file is as follows:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
+ C D F <VERSION BYTE> <NUMBER OF RECORDS>
+ <DIMENSIONS> <GLOBAL ATTRIBUTES> <VARIABLES METADATA>
+ <NON-RECORD DATA> <RECORD DATA>
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
-ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Record data refers to data where the first axis can be expanded at
+will. All record variables share a same dimension at the first axis,
+and they are stored at the end of the file per record, ie
+
+ A[0], B[0], ..., A[1], B[1], ..., etc,
+
+so that new data can be appended to the file without changing its original
+structure. Non-record data are padded to a 4n bytes boundary. Record data
+are also padded, unless there is exactly one record variable in the file,
+in which case the padding is dropped. All data is stored in big endian
+byte order.
+
+The Scientific.IO.NetCDF API allows attributes to be added directly to
+instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
+between user-set attributes and instance attributes, user-set attributes
+are automatically stored in the ``_attributes`` attribute by overloading
+``__setattr__``. This is the reason why the code sometimes uses
+``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
+otherwise the key would be inserted into userspace attributes.
+
+To create a NetCDF file::
+
+ >>> import time
+ >>> f = netcdf_file('simple.nc', 'w')
+ >>> f.history = 'Created for a test'
+ >>> f.createDimension('time', 10)
+ >>> time = f.createVariable('time', 'i', ('time',))
+ >>> time[:] = range(10)
+ >>> time.units = 'days since 2008-01-01'
+ >>> f.close()
+
+To read the NetCDF file we just created::
+
+ >>> f = netcdf_file('simple.nc', 'r')
+ >>> print f.history
+ Created for a test
+ >>> time = f.variables['time']
+ >>> print time.units
+ days since 2008-01-01
+ >>> print time.shape
+ (10,)
+ >>> print time[-1]
+ 9
+ >>> f.close()
+
+TODO: properly implement ``_FillValue``.
"""
-__author__ = "Roberto De Almeida <[EMAIL PROTECTED]>"
+__all__ = ['netcdf_file', 'netcdf_variable']
-import struct
-import itertools
-import mmap
+from operator import mul
+from mmap import mmap, ACCESS_READ
-from numpy import ndarray, empty, array, ma, squeeze, zeros
-import numpy
+from numpy import fromstring, ndarray, dtype, empty, array, asarray, squeeze,
zeros, ma
+from numpy import little_endian as LITTLE_ENDIAN
-from dap.client import open as open_remote
-from dap.dtypes import ArrayType, GridType, typemap
-ABSENT = '\x00' * 8
-ZERO = '\x00' * 4
-NC_BYTE = '\x00\x00\x00\x01'
+ABSENT = '\x00\x00\x00\x00\x00\x00\x00\x00'
+ZERO = '\x00\x00\x00\x00'
+NC_BYTE = '\x00\x00\x00\x01'
NC_CHAR = '\x00\x00\x00\x02'
NC_SHORT = '\x00\x00\x00\x03'
NC_INT = '\x00\x00\x00\x04'
@@ -49,360 +93,531 @@
NC_VARIABLE = '\x00\x00\x00\x0b'
NC_ATTRIBUTE = '\x00\x00\x00\x0c'
-_typecodes = dict([[_v,_k] for _k,_v in typemap.items()])
-# default _FillValue for netcdf types (apply also to corresponding
-# DAP types).
-_default_fillvals = {'c':'\0',
- 'S':"",
- 'b':-127,
- 'B':-127,
- 'h':-32767,
- 'H':65535,
- 'i':-2147483647L,
- 'L':4294967295L,
- 'q':-2147483647L,
- 'f':9.9692099683868690e+36,
- 'd':9.9692099683868690e+36}
-def NetCDFFile(file, maskandscale=True):
- """NetCDF File reader. API is the same as Scientific.IO.NetCDF.
- If 'file' is a URL that starts with 'http', it is assumed
- to be a remote OPenDAP dataset, and the python dap client is used
- to retrieve the data. Only the OPenDAP Array and Grid data
- types are recognized. If file does not start with 'http', it
- is assumed to be a local netCDF file. Data will
- automatically be converted to masked arrays if the variable has either
- a 'missing_value' or '_FillValue' attribute, and some data points
- are equal to the value specified by that attribute. In addition,
- variables stored as integers that have the 'scale_factor' and
- 'add_offset' attribute will automatically be rescaled to floats when
- read. To suppress these automatic conversions, set the
- maskandscale keyword to False.
+TYPEMAP = { NC_BYTE: ('b', 1),
+ NC_CHAR: ('c', 1),
+ NC_SHORT: ('h', 2),
+ NC_INT: ('i', 4),
+ NC_FLOAT: ('f', 4),
+ NC_DOUBLE: ('d', 8) }
+
+REVERSE = { 'b': NC_BYTE,
+ 'c': NC_CHAR,
+ 'h': NC_SHORT,
+ 'i': NC_INT,
+ 'f': NC_FLOAT,
+ 'd': NC_DOUBLE,
+
+ # these come from asarray(1).dtype.char and
asarray('foo').dtype.char,
+ # used when getting the types from generic attributes.
+ 'l': NC_INT,
+ 'S': NC_CHAR }
+
+
+class netcdf_file(object):
"""
- if file.startswith('http'):
- return _RemoteFile(file,maskandscale)
- else:
- return _LocalFile(file,maskandscale)
-
-def _maskandscale(var,datout):
- totalmask = zeros(datout.shape,numpy.bool)
- fillval = None
- if hasattr(var, 'missing_value') and (datout == var.missing_value).any():
- fillval = var.missing_value
- totalmask += datout==fillval
- if hasattr(var, '_FillValue') and (datout == var._FillValue).any():
- if fillval is None:
- fillval = var._FillValue
- totalmask += datout==var._FillValue
- elif (datout == _default_fillvals[var.typecode()]).any():
- if fillval is None:
- fillval = _default_fillvals[var.typecode()]
- totalmask += datout==_default_fillvals[var.dtype]
- if fillval is not None:
- datout = ma.masked_array(datout,mask=totalmask,fill_value=fillval)
- try:
- datout = var.scale_factor*datout + var.add_offset
- except:
- pass
- return datout
+ A ``netcdf_file`` object has two standard attributes: ``dimensions`` and
+ ``variables``. The values of both are dictionaries, mapping dimension
+ names to their associated lengths and variable names to variables,
+ respectively. Application programs should never modify these
+ dictionaries.
-class _RemoteFile(object):
- """A NetCDF file reader. API is the same as Scientific.IO.NetCDF."""
+ All other attributes correspond to global attributes defined in the
+ NetCDF file. Global file attributes are created by assigning to an
+ attribute of the ``netcdf_file`` object.
- def __init__(self, file, maskandscale):
- self._buffer = open_remote(file)
+ """
+ def __init__(self, filename, mode='r', maskandscale=False):
+
self._maskandscale = maskandscale
- self._parse()
- def read(self, size=-1):
- """Alias for reading the file buffer."""
- return self._buffer.read(size)
+ self.filename = filename
- def _parse(self):
- """Initial parsing of the header."""
- # Read header info.
- self._dim_array()
- self._gatt_array()
- self._var_array()
+ assert mode in 'rw', "Mode must be either 'r' or 'w'."
+ self.mode = mode
- def _dim_array(self):
- """Read a dict with dimensions names and sizes."""
self.dimensions = {}
+ self.variables = {}
+
self._dims = []
- for k,d in self._buffer.iteritems():
- if (isinstance(d, ArrayType) or isinstance(d, GridType)) and
len(d.shape) == 1 and k == d.dimensions[0]:
- name = k
- length = len(d)
- self.dimensions[name] = length
- self._dims.append(name) # preserve dim order
+ self._recs = 0
+ self._recsize = 0
- def _gatt_array(self):
- """Read global attributes."""
- self.__dict__.update(self._buffer.attributes)
+ self.fp = open(self.filename, '%sb' % mode)
- def _var_array(self):
- """Read all variables."""
- # Read variables.
- self.variables = {}
- for k,d in self._buffer.iteritems():
- if isinstance(d, GridType) or isinstance(d, ArrayType):
- name = k
- self.variables[name] = _RemoteVariable(d,self._maskandscale)
+ self._attributes = {}
+ if mode is 'r':
+ self._read()
+
+ def __setattr__(self, attr, value):
+ # Store user defined attributes in a separate dict,
+ # so we can save them to file later.
+ try:
+ self._attributes[attr] = value
+ except AttributeError:
+ pass
+ self.__dict__[attr] = value
+
def close(self):
- # this is a no-op provided for compatibility
- pass
+ if not self.fp.closed:
+ try:
+ self.flush()
+ finally:
+ self.fp.close()
+ __del__ = close
+ def createDimension(self, name, length):
+ self.dimensions[name] = length
+ self._dims.append(name)
-class _RemoteVariable(object):
- def __init__(self, var, maskandscale):
- self._var = var
- self._maskandscale = maskandscale
- self.dtype = var.type
- self.shape = var.shape
- self.dimensions = var.dimensions
- self.__dict__.update(var.attributes)
+ def createVariable(self, name, type, dimensions):
+ shape = tuple([self.dimensions[dim] for dim in dimensions])
+ shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for
numpy
- def __getitem__(self, index):
- datout = squeeze(self._var.__getitem__(index))
- # automatically
- # - remove singleton dimensions
- # - create a masked array using missing_value or _FillValue attribute
- # - apply scale_factor and add_offset to packed integer data
- if self._maskandscale:
- return _maskandscale(self,datout)
+ if isinstance(type, basestring): type = dtype(type)
+ typecode, size = type.char, type.itemsize
+ dtype_ = '>%s' % typecode
+ if size > 1: dtype_ += str(size)
+
+ data = empty(shape_, dtype=dtype_)
+ self.variables[name] = netcdf_variable(data, typecode, shape,
dimensions)
+ return self.variables[name]
+
+ def flush(self):
+ if self.mode is 'w':
+ self._write()
+ sync = flush
+
+ def _write(self):
+ self.fp.write('CDF')
+
+ self.__dict__['version_byte'] = 1
+ self.fp.write(array(1, '>b').tostring())
+
+ # Write headers and data.
+ self._write_numrecs()
+ self._write_dim_array()
+ self._write_gatt_array()
+ self._write_var_array()
+
+ def _write_numrecs(self):
+ # Get highest record count from all record variables.
+ for var in self.variables.values():
+ if not var.shape[0] and len(var.data) > self._recs:
+ self.__dict__['_recs'] = len(var.data)
+ self._pack_int(self._recs)
+
+ def _write_dim_array(self):
+ if self.dimensions:
+ self.fp.write(NC_DIMENSION)
+ self._pack_int(len(self.dimensions))
+ for name, length in self.dimensions.items():
+ self._pack_string(name)
+ self._pack_int(length or 0) # replace None with 0 for record
dimension
else:
- return datout
+ self.fp.write(ABSENT)
- def typecode(self):
- return _typecodes[self.dtype]
+ def _write_gatt_array(self):
+ self._write_att_array(self._attributes)
+ def _write_att_array(self, attributes):
+ if attributes:
+ self.fp.write(NC_ATTRIBUTE)
+ self._pack_int(len(attributes))
+ for name, values in attributes.items():
+ self._pack_string(name)
+ self._write_values(values)
+ else:
+ self.fp.write(ABSENT)
-class _LocalFile(object):
- """A NetCDF file reader. API is the same as Scientific.IO.NetCDF."""
+ def _write_var_array(self):
+ if self.variables:
+ self.fp.write(NC_VARIABLE)
+ self._pack_int(len(self.variables))
- def __init__(self, file, maskandscale):
- self._buffer = open(file, 'rb')
- self._maskandscale = maskandscale
- self._parse()
+ # Sort variables non-recs first, then recs.
+ variables = self.variables.items()
+ variables.sort(key=lambda (k, v): v.shape and v.shape[0] is not
None)
+ variables.reverse()
+ variables = [k for (k, v) in variables]
- def read(self, size=-1):
- """Alias for reading the file buffer."""
- return self._buffer.read(size)
+ # Set the metadata for all variables.
+ for name in variables:
+ self._write_var_metadata(name)
+ # Now that we have the metadata, we know the vsize of
+ # each record variable, so we can calculate recsize.
+ self.__dict__['_recsize'] = sum([
+ var._vsize for var in self.variables.values()
+ if var.shape[0] is None])
+ # Set the data for all variables.
+ for name in variables:
+ self._write_var_data(name)
+ else:
+ self.fp.write(ABSENT)
- def _parse(self):
- """Initial parsing of the header."""
- # Check magic bytes.
- assert self.read(3) == 'CDF'
+ def _write_var_metadata(self, name):
+ var = self.variables[name]
- # Read version byte.
- byte = self.read(1)
- self.version_byte = struct.unpack('>b', byte)[0]
+ self._pack_string(name)
+ self._pack_int(len(var.dimensions))
+ for dimname in var.dimensions:
+ dimid = self._dims.index(dimname)
+ self._pack_int(dimid)
- # Read header info.
- self._numrecs()
- self._dim_array()
- self._gatt_array()
- self._var_array()
+ self._write_att_array(var._attributes)
- def _numrecs(self):
- """Read number of records."""
- self._nrecs = self._unpack_int()
+ nc_type = REVERSE[var.typecode()]
+ self.fp.write(nc_type)
- def _dim_array(self):
- """Read a dict with dimensions names and sizes."""
- assert self.read(4) in [ZERO, NC_DIMENSION]
+ if var.shape[0]:
+ vsize = var.data.size * var.data.itemsize
+ vsize += -vsize % 4
+ else: # record variable
+ vsize = var.data[0].size * var.data.itemsize
+ rec_vars = len([var for var in self.variables.values()
+ if var.shape[0] is None])
+ if rec_vars > 1:
+ vsize += -vsize % 4
+ self.variables[name].__dict__['_vsize'] = vsize
+ self._pack_int(vsize)
+
+ # Pack a bogus begin, and set the real value later.
+ self.variables[name].__dict__['_begin'] = self.fp.tell()
+ self._pack_begin(0)
+
+ def _write_var_data(self, name):
+ var = self.variables[name]
+
+ # Set begin in file header.
+ the_beguine = self.fp.tell()
+ self.fp.seek(var._begin)
+ self._pack_begin(the_beguine)
+ self.fp.seek(the_beguine)
+
+ # Write data.
+ if var.shape[0]:
+ self.fp.write(var.data.tostring())
+ count = var.data.size * var.data.itemsize
+ self.fp.write('0' * (var._vsize - count))
+ else: # record variable
+ # Handle rec vars with shape[0] < nrecs.
+ if self._recs > len(var.data):
+ shape = (self._recs,) + var.data.shape[1:]
+ var.data.resize(shape)
+
+ pos0 = pos = self.fp.tell()
+ for rec in var.data:
+ # Apparently scalars cannot be converted to big endian. If we
+ # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
+ # will remain as ``=i4``.
+ if not rec.shape and (rec.dtype.byteorder == '<' or
+ (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):
+ rec = rec.byteswap()
+ self.fp.write(rec.tostring())
+ # Padding
+ count = rec.size * rec.itemsize
+ self.fp.write('0' * (var._vsize - count))
+ pos += self._recsize
+ self.fp.seek(pos)
+ self.fp.seek(pos0 + var._vsize)
+
+ def _write_values(self, values):
+ values = asarray(values)
+ values = values.astype(values.dtype.newbyteorder('>'))
+
+ nc_type = REVERSE[values.dtype.char]
+ self.fp.write(nc_type)
+
+ if values.dtype.char == 'S':
+ nelems = values.itemsize
+ else:
+ nelems = values.size
+ self._pack_int(nelems)
+
+ if not values.shape and (values.dtype.byteorder == '<' or
+ (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):
+ values = values.byteswap()
+ self.fp.write(values.tostring())
+ count = values.size * values.itemsize
+ self.fp.write('0' * (-count % 4)) # pad
+
+ def _read(self):
+ # Check magic bytes and version
+ assert self.fp.read(3) == 'CDF', "Error: %s is not a valid NetCDF 3
file" % self.filename
+ self.__dict__['version_byte'] = fromstring(self.fp.read(1), '>b')[0]
+
+ # Read file headers and set data.
+ self._read_numrecs()
+ self._read_dim_array()
+ self._read_gatt_array()
+ self._read_var_array()
+
+ def _read_numrecs(self):
+ self.__dict__['_recs'] = self._unpack_int()
+
+ def _read_dim_array(self):
+ assert self.fp.read(4) in [ZERO, NC_DIMENSION]
count = self._unpack_int()
- self.dimensions = {}
- self._dims = []
for dim in range(count):
- name = self._read_string()
- length = self._unpack_int()
- if length == 0: length = None # record dimension
+ name = self._unpack_string()
+ length = self._unpack_int() or None # None for record dimension
self.dimensions[name] = length
- self._dims.append(name) # preserve dim order
+ self._dims.append(name) # preserve order
- def _gatt_array(self):
- """Read global attributes."""
- self.attributes = self._att_array()
+ def _read_gatt_array(self):
+ for k, v in self._read_att_array().items():
+ self.__setattr__(k, v)
- # Update __dict__ for compatibility with S.IO.N
- self.__dict__.update(self.attributes)
-
- def _att_array(self):
- """Read a dict with attributes."""
- assert self.read(4) in [ZERO, NC_ATTRIBUTE]
+ def _read_att_array(self):
+ assert self.fp.read(4) in [ZERO, NC_ATTRIBUTE]
count = self._unpack_int()
- # Read attributes.
attributes = {}
- for attribute in range(count):
- name = self._read_string()
- nc_type = self._unpack_int()
- n = self._unpack_int()
+ for attr in range(count):
+ name = self._unpack_string()
+ attributes[name] = self._read_values()
+ return attributes
- # Read value for attributes.
- attributes[name] = self._read_values(n, nc_type)
+ def _read_var_array(self):
+ assert self.fp.read(4) in [ZERO, NC_VARIABLE]
- return attributes
+ begin = 0
+ dtypes = {'names': [], 'formats': []}
+ rec_vars = []
+ count = self._unpack_int()
+ for var in range(count):
+ name, dimensions, shape, attributes, typecode, size, dtype_,
begin_, vsize = self._read_var()
+ if shape and shape[0] is None:
+ rec_vars.append(name)
+ self.__dict__['_recsize'] += vsize
+ if begin == 0: begin = begin_
+ dtypes['names'].append(name)
+ dtypes['formats'].append(str(shape[1:]) + dtype_)
- def _var_array(self):
- """Read all variables."""
- assert self.read(4) in [ZERO, NC_VARIABLE]
+ # Handle padding with a virtual variable.
+ if typecode in 'bch':
+ actual_size = reduce(mul, (1,) + shape[1:]) * size
+ padding = -actual_size % 4
+ if padding:
+ dtypes['names'].append('_padding_%d' % var)
+ dtypes['formats'].append('(%d,)>b' % padding)
- # Read size of each record, in bytes.
- self._read_recsize()
+ # Data will be set later.
+ data = None
+ else:
+ mm = mmap(self.fp.fileno(), begin_+vsize, access=ACCESS_READ)
+ data = ndarray.__new__(ndarray, shape, dtype=dtype_,
+ buffer=mm, offset=begin_, order=0)
- # Read variables.
- self.variables = {}
- count = self._unpack_int()
- for variable in range(count):
- name = self._read_string()
- self.variables[name] = self._read_var()
+ # Add variable.
+ self.variables[name] = netcdf_variable(
+ data, typecode, shape, dimensions, attributes)
- def _read_recsize(self):
- """Read all variables and compute record bytes."""
- pos = self._buffer.tell()
-
- recsize = 0
- count = self._unpack_int()
- for variable in range(count):
- name = self._read_string()
- n = self._unpack_int()
- isrec = False
- for i in range(n):
- dimid = self._unpack_int()
- name = self._dims[dimid]
- dim = self.dimensions[name]
- if dim is None and i == 0:
- isrec = True
- attributes = self._att_array()
- nc_type = self._unpack_int()
- vsize = self._unpack_int()
- begin = [self._unpack_int,
self._unpack_int64][self.version_byte-1]()
+ if rec_vars:
+ # Remove padding when only one record variable.
+ if len(rec_vars) == 1:
+ dtypes['names'] = dtypes['names'][:1]
+ dtypes['formats'] = dtypes['formats'][:1]
- if isrec: recsize += vsize
+ # Build rec array.
+ mm = mmap(self.fp.fileno(), begin+self._recs*self._recsize,
access=ACCESS_READ)
+ rec_array = ndarray.__new__(ndarray, (self._recs,), dtype=dtypes,
+ buffer=mm, offset=begin, order=0)
- self._recsize = recsize
- self._buffer.seek(pos)
+ for var in rec_vars:
+ self.variables[var].__dict__['data'] = rec_array[var]
def _read_var(self):
+ name = self._unpack_string()
dimensions = []
shape = []
- n = self._unpack_int()
- isrec = False
- for i in range(n):
+ dims = self._unpack_int()
+
+ for i in range(dims):
dimid = self._unpack_int()
- name = self._dims[dimid]
- dimensions.append(name)
- dim = self.dimensions[name]
- if dim is None and i == 0:
- dim = self._nrecs
- isrec = True
+ dimname = self._dims[dimid]
+ dimensions.append(dimname)
+ dim = self.dimensions[dimname]
shape.append(dim)
dimensions = tuple(dimensions)
shape = tuple(shape)
- attributes = self._att_array()
- nc_type = self._unpack_int()
+ attributes = self._read_att_array()
+ nc_type = self.fp.read(4)
vsize = self._unpack_int()
-
- # Read offset.
begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
- return _LocalVariable(self._buffer.fileno(), nc_type, vsize, begin,
shape, dimensions, attributes, isrec, self._recsize,
maskandscale=self._maskandscale)
+ typecode, size = TYPEMAP[nc_type]
+ if typecode is 'c':
+ dtype_ = '>c'
+ else:
+ dtype_ = '>%s' % typecode
+ if size > 1: dtype_ += str(size)
- def _read_values(self, n, nc_type):
- bytes = [1, 1, 2, 4, 4, 8]
- typecodes = ['b', 'c', 'h', 'i', 'f', 'd']
-
- count = n * bytes[nc_type-1]
- values = self.read(count)
- padding = self.read((4 - (count % 4)) % 4)
-
- typecode = typecodes[nc_type-1]
- if nc_type != 2: # not char
- values = struct.unpack('>%s' % (typecode * n), values)
- values = array(values, dtype=typecode)
+ return name, dimensions, shape, attributes, typecode, size, dtype_,
begin, vsize
+
+ def _read_values(self):
+ nc_type = self.fp.read(4)
+ n = self._unpack_int()
+
+ typecode, size = TYPEMAP[nc_type]
+
+ count = n*size
+ values = self.fp.read(count)
+ self.fp.read(-count % 4) # read padding
+
+ if typecode is not 'c':
+ values = fromstring(values, dtype='>%s%d' % (typecode, size))
+ if values.shape == (1,): values = values[0]
else:
- # Remove EOL terminator.
- if values.endswith('\x00'): values = values[:-1]
-
+ values = values.rstrip('\x00')
return values
+ def _pack_begin(self, begin):
+ if self.version_byte == 1:
+ self._pack_int(begin)
+ elif self.version_byte == 2:
+ self._pack_int64(begin)
+
+ def _pack_int(self, value):
+ self.fp.write(array(value, '>i').tostring())
+ _pack_int32 = _pack_int
+
def _unpack_int(self):
- return struct.unpack('>i', self.read(4))[0]
+ return fromstring(self.fp.read(4), '>i')[0]
_unpack_int32 = _unpack_int
+ def _pack_int64(self, value):
+ self.fp.write(array(value, '>q').tostring())
+
def _unpack_int64(self):
- return struct.unpack('>q', self.read(8))[0]
+ return fromstring(self.fp.read(8), '>q')[0]
- def _read_string(self):
- count = struct.unpack('>i', self.read(4))[0]
- s = self.read(count)
- # Remove EOL terminator.
- if s.endswith('\x00'): s = s[:-1]
- padding = self.read((4 - (count % 4)) % 4)
+ def _pack_string(self, s):
+ count = len(s)
+ self._pack_int(count)
+ self.fp.write(s)
+ self.fp.write('0' * (-count % 4)) # pad
+
+ def _unpack_string(self):
+ count = self._unpack_int()
+ s = self.fp.read(count).rstrip('\x00')
+ self.fp.read(-count % 4) # read padding
return s
- def close(self):
- self._buffer.close()
+class netcdf_variable(object):
+ """
+ ``netcdf_variable`` objects are constructed by calling the method
+ ``createVariable`` on the netcdf_file object.
-class _LocalVariable(object):
- def __init__(self, fileno, nc_type, vsize, begin, shape, dimensions,
attributes, isrec=False, recsize=0, maskandscale=True):
- self._nc_type = nc_type
- self._vsize = vsize
- self._begin = begin
- self.shape = shape
+ ``netcdf_variable`` objects behave much like array objects defined in
+ Numpy, except that their data resides in a file. Data is read by
+ indexing and written by assigning to an indexed subset; the entire
+ array can be accessed by the index ``[:]`` or using the methods
+ ``getValue`` and ``assignValue``. ``netcdf_variable`` objects also
+ have attribute ``shape`` with the same meaning as for arrays, but
+ the shape cannot be modified. There is another read-only attribute
+ ``dimensions``, whose value is the tuple of dimension names.
+
+ All other attributes correspond to variable attributes defined in
+ the NetCDF file. Variable attributes are created by assigning to an
+ attribute of the ``netcdf_variable`` object.
+
+ """
+ def __init__(self, data, typecode, shape, dimensions, attributes=None,
maskandscale=True):
+ self.data = data
+ self._typecode = typecode
+ self._shape = shape
self.dimensions = dimensions
- self.attributes = attributes # for ``dap.plugins.netcdf``
- self.__dict__.update(attributes)
- self._is_record = isrec
self._maskandscale = maskandscale
- # Number of bytes and type.
- self._bytes = [1, 1, 2, 4, 4, 8][self._nc_type-1]
- type_ = ['i', 'S', 'i', 'i', 'f', 'f'][self._nc_type-1]
- dtype = '>%s%d' % (type_, self._bytes)
- bytes = self._begin + self._vsize
+ self._attributes = attributes or {}
+ for k, v in self._attributes.items():
+ self.__dict__[k] = v
- if isrec:
- # Record variables are not stored contiguosly on disk, so we
- # need to create a separate array for each record.
- self.__array_data__ = empty(shape, dtype)
- bytes += (shape[0] - 1) * recsize
- for n in range(shape[0]):
- offset = self._begin + (n * recsize)
- mm = mmap.mmap(fileno, bytes, access=mmap.ACCESS_READ)
- self.__array_data__[n] = ndarray.__new__(ndarray, shape[1:],
dtype=dtype, buffer=mm, offset=offset, order=0)
- else:
- # Create buffer and data.
- mm = mmap.mmap(fileno, bytes, access=mmap.ACCESS_READ)
- self.__array_data__ = ndarray.__new__(ndarray, shape, dtype=dtype,
buffer=mm, offset=self._begin, order=0)
+ def __setattr__(self, attr, value):
+ # Store user defined attributes in a separate dict,
+ # so we can save them to file later.
+ try:
+ self._attributes[attr] = value
+ except AttributeError:
+ pass
+ self.__dict__[attr] = value
- # N-D array interface
- self.__array_interface__ = {'shape' : shape,
- 'typestr': dtype,
- 'data' : self.__array_data__,
- 'version': 3,
- }
+ @property
+ def shape(self):
+ return self.data.shape
+ def getValue(self):
+ return self.data.item()
+
+ def assignValue(self, value):
+ self.data.itemset(value)
+
+ def typecode(self):
+ return self._typecode
+
def __getitem__(self, index):
- datout = squeeze(self.__array_data__.__getitem__(index))
- # automatically
- # - remove singleton dimensions
- # - create a masked array using missing_value or _FillValue attribute
- # - apply scale_factor and add_offset to packed integer data
+ datout = squeeze(self.data[index])
if self._maskandscale:
return _maskandscale(self,datout)
else:
return datout
- def getValue(self):
- """For scalars."""
- return self.__array_data__.item()
+ def __setitem__(self, index, data):
+ # Expand data for record vars?
+ if not self._shape[0]:
+ if isinstance(index, tuple):
+ rec_index = index[0]
+ else:
+ rec_index = index
+ if isinstance(rec_index, slice):
+ recs = (rec_index.start or 0) + len(data)
+ else:
+ recs = rec_index + 1
+ if recs > len(self.data):
+ shape = (recs,) + self._shape[1:]
+ self.data.resize(shape)
+ self.data[index] = data
- def typecode(self):
- return ['b', 'c', 'h', 'i', 'f', 'd'][self._nc_type-1]
+
+NetCDFFile = netcdf_file
+NetCDFVariable = netcdf_variable
+
+# default _FillValue for netcdf types (apply also to corresponding
+# DAP types).
+_default_fillvals = {'c':'\0',
+ 'S':"",
+ 'b':-127,
+ 'B':-127,
+ 'h':-32767,
+ 'H':65535,
+ 'i':-2147483647L,
+ 'L':4294967295L,
+ 'q':-2147483647L,
+ 'f':9.9692099683868690e+36,
+ 'd':9.9692099683868690e+36}
+def _maskandscale(var,datout):
+ totalmask = zeros(datout.shape,bool)
+ fillval = None
+ if hasattr(var, 'missing_value') and (datout == var.missing_value).any():
+ fillval = var.missing_value
+ totalmask += datout==fillval
+ if hasattr(var, '_FillValue') and (datout == var._FillValue).any():
+ if fillval is None:
+ fillval = var._FillValue
+ totalmask += datout==var._FillValue
+ elif (datout == _default_fillvals[var.typecode()]).any():
+ if fillval is None:
+ fillval = _default_fillvals[var.typecode()]
+ totalmask += datout==_default_fillvals[var.dtype]
+ if fillval is not None:
+ datout = ma.masked_array(datout,mask=totalmask,fill_value=fillval)
+ try:
+ datout = var.scale_factor*datout + var.add_offset
+ except:
+ pass
+ return datout
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Matplotlib-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins