Repository: climate
Updated Branches:
  refs/heads/master d49c5677d -> 4b37b125c


CLIMATE-683 - A new loader to read multiple netCDF files with a file list and 
spatial mask

- A new loader, ocw.data_source.local.load_dataset_from_multiple_netcdf_files, 
is added


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/76b0914a
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/76b0914a
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/76b0914a

Branch: refs/heads/master
Commit: 76b0914a14fde97c1e4bd95185c415e2e69e4a88
Parents: ece260d
Author: huikyole <[email protected]>
Authored: Mon Oct 12 17:26:48 2015 -0700
Committer: huikyole <[email protected]>
Committed: Mon Oct 12 17:26:48 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py | 74 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/76b0914a/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 60fcb50..c6114c0 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -330,3 +330,77 @@ def load_multiple_files(file_path,
                         lat_name=lat_name, lon_name=lon_name, 
time_name=time_name))
     
     return datasets, data_name
+
+def load_dataset_from_multiple_netcdf_files(file_list, variable_name,
+                                            lat_name=None, lon_name=None, 
time_name=None,
+                                            name='', file_path=None, 
filename_pattern=None,
+                                            mask_file=None, 
mask_variable=None, mask_value=0):
+    ''' Load multiple netCDF files from the same source (an observation or a 
model) into a Dataset.
+    The dataset can be spatially subset.
+    :param filelist: A text file including a list of filenames
+    :type filelist: :mod:`string`
+    :param variable_name: The variable name to load from the NetCDF file.
+    :type variable_name: :mod:`string`
+    :param lat_name: (Optional) The latitude variable name to extract from the
+        dataset.
+    :type lat_name: :mod:`string`
+
+    :param lon_name: (Optional) The longitude variable name to extract from the
+        dataset.
+    :type lon_name: :mod:`string`
+
+    :param time_name: (Optional) The time variable name to extract from the
+        dataset.
+    :type time_name: :mod:`string`
+    :param name: (Optional) A name for the loaded dataset.
+    :type name: :mod:`string`
+    :param file_path: Directory to the NetCDF file to load.
+    :type file_path: :mod:`string`
+    :param filename_pattern: Path to the NetCDF file to load.
+    :type filename_pattern: :list:`string`
+    :param mask_file: A netcdf file with two-dimensional mask indices
+    :type filelist: :mod:`string`
+    :param mask_variable: The variable name to load from the mask_file.
+    :type variable_name: :mod:`string`
+    :param mask_value: an index for spatial subsetting a dataset 
+    :type mask_value: :class:`int`
+    :returns: An OCW Dataset object with the requested variable's data from
+        the NetCDF file.
+    :rtype: :class:`dataset.Dataset`
+    :raises ValueError:
+    '''
+    nc_files = []
+    if not file_list:
+        for pattern in filename_pattern:
+            nc_files.extend(glob(file_path + pattern))
+    else:
+        nc_files = [line.rstrip('\n') for line in open(file_list)]
+
+    nc_files.sort()
+
+    dataset0 = load_file(nc_files[0], variable_name=variable_name, 
lat_name=lat_name, lon_name=lon_name, time_name=time_name)
+    if dataset0.lons.ndim == 1 and dataset0.lats.ndim ==1:
+        lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats)
+    elif dataset0.lons.ndim == 2 and dataset0.lats.ndim ==2:
+        lons = dataset0.lons
+        lats = dataset0.lats
+
+    if mask_file: 
+        mask_dataset = load_file(mask_file, mask_variable)
+        y_index, x_index = numpy.where(mask_dataset.values == mask_value)
+
+    times = []
+    nfile = len(nc_files)
+    for ifile, file in enumerate(nc_files):
+        print 'NC file '+str(ifile+1)+'/'+str(nfile), file
+        file_object0= load_file(file, variable_name)
+        values0= file_object0.values
+        times.extend(file_object0.times)
+        if mask_file:
+            values0 = values0[:,y_index, x_index]
+        if ifile == 0:
+            data_values = values0
+        else:
+            data_values= numpy.concatenate((data_values, values0))
+    times = numpy.array(times)
+    return Dataset(lats, lons, times, data_values, variable_name, name=name)

Reply via email to