Revision: 6007 http://matplotlib.svn.sourceforge.net/matplotlib/?rev=6007&view=rev Author: jswhit Date: 2008-08-08 20:22:50 +0000 (Fri, 08 Aug 2008)
Log Message: ----------- update pupynere to 1.0.1 Modified Paths: -------------- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py Added Paths: ----------- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py Modified: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py =================================================================== --- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py 2008-08-08 12:19:47 UTC (rev 6006) +++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/__init__.py 2008-08-08 20:22:50 UTC (rev 6007) @@ -36,7 +36,7 @@ import numpy as np import numpy.ma as ma from shapelib import ShapeFile -import _geoslib, pupynere, netcdftime +import _geoslib, netcdf, netcdftime # basemap data files now installed in lib/matplotlib/toolkits/basemap/data basemap_datadir = os.sep.join([os.path.dirname(__file__), 'data']) @@ -3640,7 +3640,7 @@ else: return corners -def NetCDFFile(file, maskandscale=True): +def NetCDFFile(file, mode='r', maskandscale=True): """NetCDF File reader. API is the same as Scientific.IO.NetCDF. If ``file`` is a URL that starts with `http`, it is assumed to be a remote OPenDAP dataset, and the python dap client is used @@ -3656,9 +3656,9 @@ ``maskandscale`` keyword to False. """ if file.startswith('http'): - return pupynere._RemoteFile(file,maskandscale) + return netcdf._RemoteFile(file,maskandscale=maskandscale) else: - return pupynere._LocalFile(file,maskandscale) + return netcdf.netcdf_file(file,mode=mode,maskandscale=maskandscale) def num2date(times,units='days since 0001-01-01 00:00:00',calendar='proleptic_gregorian'): """ Added: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py =================================================================== --- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py (rev 0) +++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/netcdf.py 2008-08-08 20:22:50 UTC (rev 6007) @@ -0,0 +1,76 @@ +from numpy import ma, squeeze +from pupynere import netcdf_file, _maskandscale +from dap.client import open as open_remote +from dap.dtypes import ArrayType, GridType, typemap + +_typecodes = dict([[_v,_k] for _k,_v in typemap.items()]) + +class _RemoteFile(object): + """A NetCDF file reader. API is the same as Scientific.IO.NetCDF.""" + + def __init__(self, file, maskandscale=False): + self._buffer = open_remote(file) + self._maskandscale = maskandscale + self._parse() + + def read(self, size=-1): + """Alias for reading the file buffer.""" + return self._buffer.read(size) + + def _parse(self): + """Initial parsing of the header.""" + # Read header info. + self._dim_array() + self._gatt_array() + self._var_array() + + def _dim_array(self): + """Read a dict with dimensions names and sizes.""" + self.dimensions = {} + self._dims = [] + for k,d in self._buffer.iteritems(): + if (isinstance(d, ArrayType) or isinstance(d, GridType)) and len(d.shape) == 1 and k == d.dimensions[0]: + name = k + length = len(d) + self.dimensions[name] = length + self._dims.append(name) # preserve dim order + + def _gatt_array(self): + """Read global attributes.""" + self.__dict__.update(self._buffer.attributes) + + def _var_array(self): + """Read all variables.""" + # Read variables. + self.variables = {} + for k,d in self._buffer.iteritems(): + if isinstance(d, GridType) or isinstance(d, ArrayType): + name = k + self.variables[name] = _RemoteVariable(d,self._maskandscale) + + def close(self): + # this is a no-op provided for compatibility + pass + +class _RemoteVariable(object): + def __init__(self, var, maskandscale): + self._var = var + self._maskandscale = maskandscale + self.dtype = var.type + self.shape = var.shape + self.dimensions = var.dimensions + self.__dict__.update(var.attributes) + + def __getitem__(self, index): + datout = squeeze(self._var.__getitem__(index)) + # automatically + # - remove singleton dimensions + # - create a masked array using missing_value or _FillValue attribute + # - apply scale_factor and add_offset to packed integer data + if self._maskandscale: + return _maskandscale(self,datout) + else: + return datout + + def typecode(self): + return _typecodes[self.dtype] Modified: trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py =================================================================== --- trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py 2008-08-08 12:19:47 UTC (rev 6006) +++ trunk/toolkits/basemap/lib/mpl_toolkits/basemap/pupynere.py 2008-08-08 20:22:50 UTC (rev 6007) @@ -1,45 +1,89 @@ -"""NetCDF reader. +""" +NetCDF reader/writer module. -Pupynere implements a PUre PYthon NEtcdf REader. +This module implements the Scientific.IO.NetCDF API to read and create +NetCDF files. The same API is also used in the PyNIO and pynetcdf +modules, allowing these modules to be used interchangebly when working +with NetCDF files. The major advantage of ``scipy.io.netcdf`` over other +modules is that it doesn't require the code to be linked to the NetCDF +libraries as the other modules do. -Copyright (c) 2003-2006 Roberto De Almeida <[EMAIL PROTECTED]> +The code is based on the `NetCDF file format specification +<http://www.unidata.ucar.edu/software/netcdf/guide_15.html>`_. A NetCDF +file is a self-describing binary format, with a header followed by +data. The header contains metadata describing dimensions, variables +and the position of the data in the file, so access can be done in an +efficient manner without loading unnecessary data into memory. We use +the ``mmap`` module to create Numpy arrays mapped to the data on disk, +for the same purpose. -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject -to the following conditions: +The structure of a NetCDF file is as follows: -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. + C D F <VERSION BYTE> <NUMBER OF RECORDS> + <DIMENSIONS> <GLOBAL ATTRIBUTES> <VARIABLES METADATA> + <NON-RECORD DATA> <RECORD DATA> -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR -ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +Record data refers to data where the first axis can be expanded at +will. All record variables share a same dimension at the first axis, +and they are stored at the end of the file per record, ie + + A[0], B[0], ..., A[1], B[1], ..., etc, + +so that new data can be appended to the file without changing its original +structure. Non-record data are padded to a 4n bytes boundary. Record data +are also padded, unless there is exactly one record variable in the file, +in which case the padding is dropped. All data is stored in big endian +byte order. + +The Scientific.IO.NetCDF API allows attributes to be added directly to +instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate +between user-set attributes and instance attributes, user-set attributes +are automatically stored in the ``_attributes`` attribute by overloading +``__setattr__``. This is the reason why the code sometimes uses +``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``; +otherwise the key would be inserted into userspace attributes. + +To create a NetCDF file:: + + >>> import time + >>> f = netcdf_file('simple.nc', 'w') + >>> f.history = 'Created for a test' + >>> f.createDimension('time', 10) + >>> time = f.createVariable('time', 'i', ('time',)) + >>> time[:] = range(10) + >>> time.units = 'days since 2008-01-01' + >>> f.close() + +To read the NetCDF file we just created:: + + >>> f = netcdf_file('simple.nc', 'r') + >>> print f.history + Created for a test + >>> time = f.variables['time'] + >>> print time.units + days since 2008-01-01 + >>> print time.shape + (10,) + >>> print time[-1] + 9 + >>> f.close() + +TODO: properly implement ``_FillValue``. """ -__author__ = "Roberto De Almeida <[EMAIL PROTECTED]>" +__all__ = ['netcdf_file', 'netcdf_variable'] -import struct -import itertools -import mmap +from operator import mul +from mmap import mmap, ACCESS_READ -from numpy import ndarray, empty, array, ma, squeeze, zeros -import numpy +from numpy import fromstring, ndarray, dtype, empty, array, asarray, squeeze, zeros, ma +from numpy import little_endian as LITTLE_ENDIAN -from dap.client import open as open_remote -from dap.dtypes import ArrayType, GridType, typemap -ABSENT = '\x00' * 8 -ZERO = '\x00' * 4 -NC_BYTE = '\x00\x00\x00\x01' +ABSENT = '\x00\x00\x00\x00\x00\x00\x00\x00' +ZERO = '\x00\x00\x00\x00' +NC_BYTE = '\x00\x00\x00\x01' NC_CHAR = '\x00\x00\x00\x02' NC_SHORT = '\x00\x00\x00\x03' NC_INT = '\x00\x00\x00\x04' @@ -49,360 +93,531 @@ NC_VARIABLE = '\x00\x00\x00\x0b' NC_ATTRIBUTE = '\x00\x00\x00\x0c' -_typecodes = dict([[_v,_k] for _k,_v in typemap.items()]) -# default _FillValue for netcdf types (apply also to corresponding -# DAP types). -_default_fillvals = {'c':'\0', - 'S':"", - 'b':-127, - 'B':-127, - 'h':-32767, - 'H':65535, - 'i':-2147483647L, - 'L':4294967295L, - 'q':-2147483647L, - 'f':9.9692099683868690e+36, - 'd':9.9692099683868690e+36} -def NetCDFFile(file, maskandscale=True): - """NetCDF File reader. API is the same as Scientific.IO.NetCDF. - If 'file' is a URL that starts with 'http', it is assumed - to be a remote OPenDAP dataset, and the python dap client is used - to retrieve the data. Only the OPenDAP Array and Grid data - types are recognized. If file does not start with 'http', it - is assumed to be a local netCDF file. Data will - automatically be converted to masked arrays if the variable has either - a 'missing_value' or '_FillValue' attribute, and some data points - are equal to the value specified by that attribute. In addition, - variables stored as integers that have the 'scale_factor' and - 'add_offset' attribute will automatically be rescaled to floats when - read. To suppress these automatic conversions, set the - maskandscale keyword to False. +TYPEMAP = { NC_BYTE: ('b', 1), + NC_CHAR: ('c', 1), + NC_SHORT: ('h', 2), + NC_INT: ('i', 4), + NC_FLOAT: ('f', 4), + NC_DOUBLE: ('d', 8) } + +REVERSE = { 'b': NC_BYTE, + 'c': NC_CHAR, + 'h': NC_SHORT, + 'i': NC_INT, + 'f': NC_FLOAT, + 'd': NC_DOUBLE, + + # these come from asarray(1).dtype.char and asarray('foo').dtype.char, + # used when getting the types from generic attributes. + 'l': NC_INT, + 'S': NC_CHAR } + + +class netcdf_file(object): """ - if file.startswith('http'): - return _RemoteFile(file,maskandscale) - else: - return _LocalFile(file,maskandscale) - -def _maskandscale(var,datout): - totalmask = zeros(datout.shape,numpy.bool) - fillval = None - if hasattr(var, 'missing_value') and (datout == var.missing_value).any(): - fillval = var.missing_value - totalmask += datout==fillval - if hasattr(var, '_FillValue') and (datout == var._FillValue).any(): - if fillval is None: - fillval = var._FillValue - totalmask += datout==var._FillValue - elif (datout == _default_fillvals[var.typecode()]).any(): - if fillval is None: - fillval = _default_fillvals[var.typecode()] - totalmask += datout==_default_fillvals[var.dtype] - if fillval is not None: - datout = ma.masked_array(datout,mask=totalmask,fill_value=fillval) - try: - datout = var.scale_factor*datout + var.add_offset - except: - pass - return datout + A ``netcdf_file`` object has two standard attributes: ``dimensions`` and + ``variables``. The values of both are dictionaries, mapping dimension + names to their associated lengths and variable names to variables, + respectively. Application programs should never modify these + dictionaries. -class _RemoteFile(object): - """A NetCDF file reader. API is the same as Scientific.IO.NetCDF.""" + All other attributes correspond to global attributes defined in the + NetCDF file. Global file attributes are created by assigning to an + attribute of the ``netcdf_file`` object. - def __init__(self, file, maskandscale): - self._buffer = open_remote(file) + """ + def __init__(self, filename, mode='r', maskandscale=False): + self._maskandscale = maskandscale - self._parse() - def read(self, size=-1): - """Alias for reading the file buffer.""" - return self._buffer.read(size) + self.filename = filename - def _parse(self): - """Initial parsing of the header.""" - # Read header info. - self._dim_array() - self._gatt_array() - self._var_array() + assert mode in 'rw', "Mode must be either 'r' or 'w'." + self.mode = mode - def _dim_array(self): - """Read a dict with dimensions names and sizes.""" self.dimensions = {} + self.variables = {} + self._dims = [] - for k,d in self._buffer.iteritems(): - if (isinstance(d, ArrayType) or isinstance(d, GridType)) and len(d.shape) == 1 and k == d.dimensions[0]: - name = k - length = len(d) - self.dimensions[name] = length - self._dims.append(name) # preserve dim order + self._recs = 0 + self._recsize = 0 - def _gatt_array(self): - """Read global attributes.""" - self.__dict__.update(self._buffer.attributes) + self.fp = open(self.filename, '%sb' % mode) - def _var_array(self): - """Read all variables.""" - # Read variables. - self.variables = {} - for k,d in self._buffer.iteritems(): - if isinstance(d, GridType) or isinstance(d, ArrayType): - name = k - self.variables[name] = _RemoteVariable(d,self._maskandscale) + self._attributes = {} + if mode is 'r': + self._read() + + def __setattr__(self, attr, value): + # Store user defined attributes in a separate dict, + # so we can save them to file later. + try: + self._attributes[attr] = value + except AttributeError: + pass + self.__dict__[attr] = value + def close(self): - # this is a no-op provided for compatibility - pass + if not self.fp.closed: + try: + self.flush() + finally: + self.fp.close() + __del__ = close + def createDimension(self, name, length): + self.dimensions[name] = length + self._dims.append(name) -class _RemoteVariable(object): - def __init__(self, var, maskandscale): - self._var = var - self._maskandscale = maskandscale - self.dtype = var.type - self.shape = var.shape - self.dimensions = var.dimensions - self.__dict__.update(var.attributes) + def createVariable(self, name, type, dimensions): + shape = tuple([self.dimensions[dim] for dim in dimensions]) + shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for numpy - def __getitem__(self, index): - datout = squeeze(self._var.__getitem__(index)) - # automatically - # - remove singleton dimensions - # - create a masked array using missing_value or _FillValue attribute - # - apply scale_factor and add_offset to packed integer data - if self._maskandscale: - return _maskandscale(self,datout) + if isinstance(type, basestring): type = dtype(type) + typecode, size = type.char, type.itemsize + dtype_ = '>%s' % typecode + if size > 1: dtype_ += str(size) + + data = empty(shape_, dtype=dtype_) + self.variables[name] = netcdf_variable(data, typecode, shape, dimensions) + return self.variables[name] + + def flush(self): + if self.mode is 'w': + self._write() + sync = flush + + def _write(self): + self.fp.write('CDF') + + self.__dict__['version_byte'] = 1 + self.fp.write(array(1, '>b').tostring()) + + # Write headers and data. + self._write_numrecs() + self._write_dim_array() + self._write_gatt_array() + self._write_var_array() + + def _write_numrecs(self): + # Get highest record count from all record variables. + for var in self.variables.values(): + if not var.shape[0] and len(var.data) > self._recs: + self.__dict__['_recs'] = len(var.data) + self._pack_int(self._recs) + + def _write_dim_array(self): + if self.dimensions: + self.fp.write(NC_DIMENSION) + self._pack_int(len(self.dimensions)) + for name, length in self.dimensions.items(): + self._pack_string(name) + self._pack_int(length or 0) # replace None with 0 for record dimension else: - return datout + self.fp.write(ABSENT) - def typecode(self): - return _typecodes[self.dtype] + def _write_gatt_array(self): + self._write_att_array(self._attributes) + def _write_att_array(self, attributes): + if attributes: + self.fp.write(NC_ATTRIBUTE) + self._pack_int(len(attributes)) + for name, values in attributes.items(): + self._pack_string(name) + self._write_values(values) + else: + self.fp.write(ABSENT) -class _LocalFile(object): - """A NetCDF file reader. API is the same as Scientific.IO.NetCDF.""" + def _write_var_array(self): + if self.variables: + self.fp.write(NC_VARIABLE) + self._pack_int(len(self.variables)) - def __init__(self, file, maskandscale): - self._buffer = open(file, 'rb') - self._maskandscale = maskandscale - self._parse() + # Sort variables non-recs first, then recs. + variables = self.variables.items() + variables.sort(key=lambda (k, v): v.shape and v.shape[0] is not None) + variables.reverse() + variables = [k for (k, v) in variables] - def read(self, size=-1): - """Alias for reading the file buffer.""" - return self._buffer.read(size) + # Set the metadata for all variables. + for name in variables: + self._write_var_metadata(name) + # Now that we have the metadata, we know the vsize of + # each record variable, so we can calculate recsize. + self.__dict__['_recsize'] = sum([ + var._vsize for var in self.variables.values() + if var.shape[0] is None]) + # Set the data for all variables. + for name in variables: + self._write_var_data(name) + else: + self.fp.write(ABSENT) - def _parse(self): - """Initial parsing of the header.""" - # Check magic bytes. - assert self.read(3) == 'CDF' + def _write_var_metadata(self, name): + var = self.variables[name] - # Read version byte. - byte = self.read(1) - self.version_byte = struct.unpack('>b', byte)[0] + self._pack_string(name) + self._pack_int(len(var.dimensions)) + for dimname in var.dimensions: + dimid = self._dims.index(dimname) + self._pack_int(dimid) - # Read header info. - self._numrecs() - self._dim_array() - self._gatt_array() - self._var_array() + self._write_att_array(var._attributes) - def _numrecs(self): - """Read number of records.""" - self._nrecs = self._unpack_int() + nc_type = REVERSE[var.typecode()] + self.fp.write(nc_type) - def _dim_array(self): - """Read a dict with dimensions names and sizes.""" - assert self.read(4) in [ZERO, NC_DIMENSION] + if var.shape[0]: + vsize = var.data.size * var.data.itemsize + vsize += -vsize % 4 + else: # record variable + vsize = var.data[0].size * var.data.itemsize + rec_vars = len([var for var in self.variables.values() + if var.shape[0] is None]) + if rec_vars > 1: + vsize += -vsize % 4 + self.variables[name].__dict__['_vsize'] = vsize + self._pack_int(vsize) + + # Pack a bogus begin, and set the real value later. + self.variables[name].__dict__['_begin'] = self.fp.tell() + self._pack_begin(0) + + def _write_var_data(self, name): + var = self.variables[name] + + # Set begin in file header. + the_beguine = self.fp.tell() + self.fp.seek(var._begin) + self._pack_begin(the_beguine) + self.fp.seek(the_beguine) + + # Write data. + if var.shape[0]: + self.fp.write(var.data.tostring()) + count = var.data.size * var.data.itemsize + self.fp.write('0' * (var._vsize - count)) + else: # record variable + # Handle rec vars with shape[0] < nrecs. + if self._recs > len(var.data): + shape = (self._recs,) + var.data.shape[1:] + var.data.resize(shape) + + pos0 = pos = self.fp.tell() + for rec in var.data: + # Apparently scalars cannot be converted to big endian. If we + # try to convert a ``=i4`` scalar to, say, '>i4' the dtype + # will remain as ``=i4``. + if not rec.shape and (rec.dtype.byteorder == '<' or + (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)): + rec = rec.byteswap() + self.fp.write(rec.tostring()) + # Padding + count = rec.size * rec.itemsize + self.fp.write('0' * (var._vsize - count)) + pos += self._recsize + self.fp.seek(pos) + self.fp.seek(pos0 + var._vsize) + + def _write_values(self, values): + values = asarray(values) + values = values.astype(values.dtype.newbyteorder('>')) + + nc_type = REVERSE[values.dtype.char] + self.fp.write(nc_type) + + if values.dtype.char == 'S': + nelems = values.itemsize + else: + nelems = values.size + self._pack_int(nelems) + + if not values.shape and (values.dtype.byteorder == '<' or + (values.dtype.byteorder == '=' and LITTLE_ENDIAN)): + values = values.byteswap() + self.fp.write(values.tostring()) + count = values.size * values.itemsize + self.fp.write('0' * (-count % 4)) # pad + + def _read(self): + # Check magic bytes and version + assert self.fp.read(3) == 'CDF', "Error: %s is not a valid NetCDF 3 file" % self.filename + self.__dict__['version_byte'] = fromstring(self.fp.read(1), '>b')[0] + + # Read file headers and set data. + self._read_numrecs() + self._read_dim_array() + self._read_gatt_array() + self._read_var_array() + + def _read_numrecs(self): + self.__dict__['_recs'] = self._unpack_int() + + def _read_dim_array(self): + assert self.fp.read(4) in [ZERO, NC_DIMENSION] count = self._unpack_int() - self.dimensions = {} - self._dims = [] for dim in range(count): - name = self._read_string() - length = self._unpack_int() - if length == 0: length = None # record dimension + name = self._unpack_string() + length = self._unpack_int() or None # None for record dimension self.dimensions[name] = length - self._dims.append(name) # preserve dim order + self._dims.append(name) # preserve order - def _gatt_array(self): - """Read global attributes.""" - self.attributes = self._att_array() + def _read_gatt_array(self): + for k, v in self._read_att_array().items(): + self.__setattr__(k, v) - # Update __dict__ for compatibility with S.IO.N - self.__dict__.update(self.attributes) - - def _att_array(self): - """Read a dict with attributes.""" - assert self.read(4) in [ZERO, NC_ATTRIBUTE] + def _read_att_array(self): + assert self.fp.read(4) in [ZERO, NC_ATTRIBUTE] count = self._unpack_int() - # Read attributes. attributes = {} - for attribute in range(count): - name = self._read_string() - nc_type = self._unpack_int() - n = self._unpack_int() + for attr in range(count): + name = self._unpack_string() + attributes[name] = self._read_values() + return attributes - # Read value for attributes. - attributes[name] = self._read_values(n, nc_type) + def _read_var_array(self): + assert self.fp.read(4) in [ZERO, NC_VARIABLE] - return attributes + begin = 0 + dtypes = {'names': [], 'formats': []} + rec_vars = [] + count = self._unpack_int() + for var in range(count): + name, dimensions, shape, attributes, typecode, size, dtype_, begin_, vsize = self._read_var() + if shape and shape[0] is None: + rec_vars.append(name) + self.__dict__['_recsize'] += vsize + if begin == 0: begin = begin_ + dtypes['names'].append(name) + dtypes['formats'].append(str(shape[1:]) + dtype_) - def _var_array(self): - """Read all variables.""" - assert self.read(4) in [ZERO, NC_VARIABLE] + # Handle padding with a virtual variable. + if typecode in 'bch': + actual_size = reduce(mul, (1,) + shape[1:]) * size + padding = -actual_size % 4 + if padding: + dtypes['names'].append('_padding_%d' % var) + dtypes['formats'].append('(%d,)>b' % padding) - # Read size of each record, in bytes. - self._read_recsize() + # Data will be set later. + data = None + else: + mm = mmap(self.fp.fileno(), begin_+vsize, access=ACCESS_READ) + data = ndarray.__new__(ndarray, shape, dtype=dtype_, + buffer=mm, offset=begin_, order=0) - # Read variables. - self.variables = {} - count = self._unpack_int() - for variable in range(count): - name = self._read_string() - self.variables[name] = self._read_var() + # Add variable. + self.variables[name] = netcdf_variable( + data, typecode, shape, dimensions, attributes) - def _read_recsize(self): - """Read all variables and compute record bytes.""" - pos = self._buffer.tell() - - recsize = 0 - count = self._unpack_int() - for variable in range(count): - name = self._read_string() - n = self._unpack_int() - isrec = False - for i in range(n): - dimid = self._unpack_int() - name = self._dims[dimid] - dim = self.dimensions[name] - if dim is None and i == 0: - isrec = True - attributes = self._att_array() - nc_type = self._unpack_int() - vsize = self._unpack_int() - begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() + if rec_vars: + # Remove padding when only one record variable. + if len(rec_vars) == 1: + dtypes['names'] = dtypes['names'][:1] + dtypes['formats'] = dtypes['formats'][:1] - if isrec: recsize += vsize + # Build rec array. + mm = mmap(self.fp.fileno(), begin+self._recs*self._recsize, access=ACCESS_READ) + rec_array = ndarray.__new__(ndarray, (self._recs,), dtype=dtypes, + buffer=mm, offset=begin, order=0) - self._recsize = recsize - self._buffer.seek(pos) + for var in rec_vars: + self.variables[var].__dict__['data'] = rec_array[var] def _read_var(self): + name = self._unpack_string() dimensions = [] shape = [] - n = self._unpack_int() - isrec = False - for i in range(n): + dims = self._unpack_int() + + for i in range(dims): dimid = self._unpack_int() - name = self._dims[dimid] - dimensions.append(name) - dim = self.dimensions[name] - if dim is None and i == 0: - dim = self._nrecs - isrec = True + dimname = self._dims[dimid] + dimensions.append(dimname) + dim = self.dimensions[dimname] shape.append(dim) dimensions = tuple(dimensions) shape = tuple(shape) - attributes = self._att_array() - nc_type = self._unpack_int() + attributes = self._read_att_array() + nc_type = self.fp.read(4) vsize = self._unpack_int() - - # Read offset. begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() - return _LocalVariable(self._buffer.fileno(), nc_type, vsize, begin, shape, dimensions, attributes, isrec, self._recsize, maskandscale=self._maskandscale) + typecode, size = TYPEMAP[nc_type] + if typecode is 'c': + dtype_ = '>c' + else: + dtype_ = '>%s' % typecode + if size > 1: dtype_ += str(size) - def _read_values(self, n, nc_type): - bytes = [1, 1, 2, 4, 4, 8] - typecodes = ['b', 'c', 'h', 'i', 'f', 'd'] - - count = n * bytes[nc_type-1] - values = self.read(count) - padding = self.read((4 - (count % 4)) % 4) - - typecode = typecodes[nc_type-1] - if nc_type != 2: # not char - values = struct.unpack('>%s' % (typecode * n), values) - values = array(values, dtype=typecode) + return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize + + def _read_values(self): + nc_type = self.fp.read(4) + n = self._unpack_int() + + typecode, size = TYPEMAP[nc_type] + + count = n*size + values = self.fp.read(count) + self.fp.read(-count % 4) # read padding + + if typecode is not 'c': + values = fromstring(values, dtype='>%s%d' % (typecode, size)) + if values.shape == (1,): values = values[0] else: - # Remove EOL terminator. - if values.endswith('\x00'): values = values[:-1] - + values = values.rstrip('\x00') return values + def _pack_begin(self, begin): + if self.version_byte == 1: + self._pack_int(begin) + elif self.version_byte == 2: + self._pack_int64(begin) + + def _pack_int(self, value): + self.fp.write(array(value, '>i').tostring()) + _pack_int32 = _pack_int + def _unpack_int(self): - return struct.unpack('>i', self.read(4))[0] + return fromstring(self.fp.read(4), '>i')[0] _unpack_int32 = _unpack_int + def _pack_int64(self, value): + self.fp.write(array(value, '>q').tostring()) + def _unpack_int64(self): - return struct.unpack('>q', self.read(8))[0] + return fromstring(self.fp.read(8), '>q')[0] - def _read_string(self): - count = struct.unpack('>i', self.read(4))[0] - s = self.read(count) - # Remove EOL terminator. - if s.endswith('\x00'): s = s[:-1] - padding = self.read((4 - (count % 4)) % 4) + def _pack_string(self, s): + count = len(s) + self._pack_int(count) + self.fp.write(s) + self.fp.write('0' * (-count % 4)) # pad + + def _unpack_string(self): + count = self._unpack_int() + s = self.fp.read(count).rstrip('\x00') + self.fp.read(-count % 4) # read padding return s - def close(self): - self._buffer.close() +class netcdf_variable(object): + """ + ``netcdf_variable`` objects are constructed by calling the method + ``createVariable`` on the netcdf_file object. -class _LocalVariable(object): - def __init__(self, fileno, nc_type, vsize, begin, shape, dimensions, attributes, isrec=False, recsize=0, maskandscale=True): - self._nc_type = nc_type - self._vsize = vsize - self._begin = begin - self.shape = shape + ``netcdf_variable`` objects behave much like array objects defined in + Numpy, except that their data resides in a file. Data is read by + indexing and written by assigning to an indexed subset; the entire + array can be accessed by the index ``[:]`` or using the methods + ``getValue`` and ``assignValue``. ``netcdf_variable`` objects also + have attribute ``shape`` with the same meaning as for arrays, but + the shape cannot be modified. There is another read-only attribute + ``dimensions``, whose value is the tuple of dimension names. + + All other attributes correspond to variable attributes defined in + the NetCDF file. Variable attributes are created by assigning to an + attribute of the ``netcdf_variable`` object. + + """ + def __init__(self, data, typecode, shape, dimensions, attributes=None, maskandscale=True): + self.data = data + self._typecode = typecode + self._shape = shape self.dimensions = dimensions - self.attributes = attributes # for ``dap.plugins.netcdf`` - self.__dict__.update(attributes) - self._is_record = isrec self._maskandscale = maskandscale - # Number of bytes and type. - self._bytes = [1, 1, 2, 4, 4, 8][self._nc_type-1] - type_ = ['i', 'S', 'i', 'i', 'f', 'f'][self._nc_type-1] - dtype = '>%s%d' % (type_, self._bytes) - bytes = self._begin + self._vsize + self._attributes = attributes or {} + for k, v in self._attributes.items(): + self.__dict__[k] = v - if isrec: - # Record variables are not stored contiguosly on disk, so we - # need to create a separate array for each record. - self.__array_data__ = empty(shape, dtype) - bytes += (shape[0] - 1) * recsize - for n in range(shape[0]): - offset = self._begin + (n * recsize) - mm = mmap.mmap(fileno, bytes, access=mmap.ACCESS_READ) - self.__array_data__[n] = ndarray.__new__(ndarray, shape[1:], dtype=dtype, buffer=mm, offset=offset, order=0) - else: - # Create buffer and data. - mm = mmap.mmap(fileno, bytes, access=mmap.ACCESS_READ) - self.__array_data__ = ndarray.__new__(ndarray, shape, dtype=dtype, buffer=mm, offset=self._begin, order=0) + def __setattr__(self, attr, value): + # Store user defined attributes in a separate dict, + # so we can save them to file later. + try: + self._attributes[attr] = value + except AttributeError: + pass + self.__dict__[attr] = value - # N-D array interface - self.__array_interface__ = {'shape' : shape, - 'typestr': dtype, - 'data' : self.__array_data__, - 'version': 3, - } + @property + def shape(self): + return self.data.shape + def getValue(self): + return self.data.item() + + def assignValue(self, value): + self.data.itemset(value) + + def typecode(self): + return self._typecode + def __getitem__(self, index): - datout = squeeze(self.__array_data__.__getitem__(index)) - # automatically - # - remove singleton dimensions - # - create a masked array using missing_value or _FillValue attribute - # - apply scale_factor and add_offset to packed integer data + datout = squeeze(self.data[index]) if self._maskandscale: return _maskandscale(self,datout) else: return datout - def getValue(self): - """For scalars.""" - return self.__array_data__.item() + def __setitem__(self, index, data): + # Expand data for record vars? + if not self._shape[0]: + if isinstance(index, tuple): + rec_index = index[0] + else: + rec_index = index + if isinstance(rec_index, slice): + recs = (rec_index.start or 0) + len(data) + else: + recs = rec_index + 1 + if recs > len(self.data): + shape = (recs,) + self._shape[1:] + self.data.resize(shape) + self.data[index] = data - def typecode(self): - return ['b', 'c', 'h', 'i', 'f', 'd'][self._nc_type-1] + +NetCDFFile = netcdf_file +NetCDFVariable = netcdf_variable + +# default _FillValue for netcdf types (apply also to corresponding +# DAP types). +_default_fillvals = {'c':'\0', + 'S':"", + 'b':-127, + 'B':-127, + 'h':-32767, + 'H':65535, + 'i':-2147483647L, + 'L':4294967295L, + 'q':-2147483647L, + 'f':9.9692099683868690e+36, + 'd':9.9692099683868690e+36} +def _maskandscale(var,datout): + totalmask = zeros(datout.shape,bool) + fillval = None + if hasattr(var, 'missing_value') and (datout == var.missing_value).any(): + fillval = var.missing_value + totalmask += datout==fillval + if hasattr(var, '_FillValue') and (datout == var._FillValue).any(): + if fillval is None: + fillval = var._FillValue + totalmask += datout==var._FillValue + elif (datout == _default_fillvals[var.typecode()]).any(): + if fillval is None: + fillval = _default_fillvals[var.typecode()] + totalmask += datout==_default_fillvals[var.dtype] + if fillval is not None: + datout = ma.masked_array(datout,mask=totalmask,fill_value=fillval) + try: + datout = var.scale_factor*datout + var.add_offset + except: + pass + return datout This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------- This SF.Net email is sponsored by the Moblin Your Move Developer's challenge Build the coolest Linux based applications with Moblin SDK & win great prizes Grand prize is a trip for two to an Open Source event anywhere in the world http://moblin-contest.org/redirect.php?banner_id=100&url=/ _______________________________________________ Matplotlib-checkins mailing list Matplotlib-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins