CLIMATE-592 - update dataset_processor.py to accommodate units in Dataset object


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/d4cefc54
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/d4cefc54
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/d4cefc54

Branch: refs/heads/master
Commit: d4cefc545178b44417f39942d84148815fc11172
Parents: c43235b
Author: Kim Whitehall <[email protected]>
Authored: Wed Feb 25 17:54:00 2015 -0800
Committer: Michael Joyce <[email protected]>
Committed: Thu Mar 12 09:04:46 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py |  9 +++++++--
 ocw/data_source/rcmed.py |  5 +++--
 ocw/dataset.py           |  2 +-
 ocw/dataset_processor.py | 38 ++++++++++++++++++++++++++++++++++++++
 ocw/tests/test_local.py  |  1 +
 5 files changed, 50 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 2b56d69..f2f0388 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -111,6 +111,7 @@ def _get_netcdf_variable_name(valid_var_names, netcdf, 
netcdf_var):
 
 def load_file(file_path,
               variable_name,
+              variable_unit = None,
               elevation_index=0,
               name='',
               lat_name=None,
@@ -124,6 +125,9 @@ def load_file(file_path,
     :param variable_name: The variable name to load from the NetCDF file.
     :type variable_name: :mod:`string`
 
+    :param variable_unit: (Optional) The variable unit to load from the NetCDF 
file.
+    :type variable_unit: :mod:`string`
+
     :param elevation_index: (Optional) The elevation index for which data 
should
         be returned. Climate data is often times 4 dimensional data. Some
         datasets will have readins at different height/elevation levels. OCW
@@ -182,6 +186,7 @@ def load_file(file_path,
     times = utils.decode_time_values(netcdf, time_name)
     times = numpy.array(times)
     values = ma.array(netcdf.variables[variable_name][:])
+    variable_unit = netcdf.variables[variable_name].units
 
     # If the values are 4D then we need to strip out the elevation index
     if len(values.shape) == 4:
@@ -214,5 +219,5 @@ def load_file(file_path,
     }
     if elevation_index != 0: origin['elevation_index'] = elevation_index
 
-    return Dataset(lats, lons, times, values, variable_name,
-                   name=name, origin=origin)
+    return Dataset(lats, lons, times, values, variable=variable_name,
+                   units=variable_unit, name=name, origin=origin)

http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/data_source/rcmed.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/rcmed.py b/ocw/data_source/rcmed.py
index 4733a45..ef0dc78 100644
--- a/ocw/data_source/rcmed.py
+++ b/ocw/data_source/rcmed.py
@@ -347,7 +347,7 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, 
max_lat, min_lon, max_l
     '''
     
     parameters_metadata = get_parameters_metadata()
-    parameter_name, time_step, _, _, _, _, _= 
_get_parameter_info(parameters_metadata, parameter_id)
+    parameter_name, time_step, _, _, _, _, parameter_units = 
_get_parameter_info(parameters_metadata, parameter_id)
     url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, 
min_lon, max_lon, start_time, end_time, time_step)
     lats, lons, times, values = _get_data(url)
 
@@ -365,6 +365,7 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, 
max_lat, min_lon, max_l
                    unique_lats_lons_times[1],
                    unique_times,
                    values,
-                   parameter_name,
+                   variable=parameter_name,
+                   units=parameter_units,
                    name=name,
                    origin=origin)

http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/dataset.py
----------------------------------------------------------------------
diff --git a/ocw/dataset.py b/ocw/dataset.py
index ce604e3..ee86532 100644
--- a/ocw/dataset.py
+++ b/ocw/dataset.py
@@ -207,7 +207,7 @@ Expected shape (%s, %s, %s) but received (%s, %s, %s)""" % 
(time_count,
             "lat-range: {}, "
             "lon-range: {}, "
             "time_range: {}, "
-            "var: {}>"
+            "var: {}, "
             "units: {}>"
         )
 

http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 37296f2..1f9edf0 100644
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -62,6 +62,7 @@ def temporal_rebin(target_dataset, temporal_resolution):
                              binned_dates, 
                              binned_values,
                              target_dataset.variable,
+                             target_dataset.units,
                              target_dataset.name)
     
     return new_dataset
@@ -117,12 +118,16 @@ def spatial_regrid(target_dataset, new_latitudes, 
new_longitudes):
                                    target_dataset.times, 
                                    new_values,
                                    target_dataset.variable,
+                                   target_dataset.units,
                                    target_dataset.name)
     return regridded_dataset
 
 def ensemble(datasets):
     """
     Generate a single dataset which is the mean of the input datasets
+
+    An ensemble datasets combines input datasets assuming the all have
+    similar shape, dimensions, and units. 
     
     :param datasets: Datasets to be used to compose the ensemble dataset from.
         All Datasets must be the same shape.
@@ -140,6 +145,7 @@ def ensemble(datasets):
                                   datasets[0].lons, 
                                   datasets[0].times,
                                   ensemble_values,
+                                  datasets[0].units,
                                   name="Dataset Ensemble")
     
     return ensemble_dataset
@@ -182,6 +188,7 @@ def subset(subregion, target_dataset):
             dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1,
             dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1],
         target_dataset.variable,
+        target_dataset.units,
         target_dataset.name
     )
 
@@ -248,6 +255,7 @@ def normalize_dataset_datetimes(dataset, timestep):
         np.array(new_times),
         dataset.values,
         dataset.variable,
+        dataset.units,
         dataset.name
     )
 
@@ -295,9 +303,39 @@ def write_netcdf(dataset, path, compress=True):
     lons[:] = dataset.lons
     times[:] = netCDF4.date2num(dataset.times, times.units)
     values[:] = dataset.values
+    values.units = dataset.units
 
     out_file.close()
 
+def water_flux_unit_conversion(dataset):
+    ''' Convert water flux variables units as necessary
+
+    Convert full SI units water flux units to more common units.
+
+    :param dataset: The dataset to convert.
+    :type dataset: :class:`dataset.Dataset`
+
+    :returns: A Dataset with values converted to new units.
+    :rtype: :class:`dataset.Dataset`
+    '''
+    waterFluxVariables = ['pr', 'evspsbl', 'mrro', 'swe']
+    variable = dataset.variable.lower()
+
+    if any(subString in variable for subString in waterFluxVariables):
+        dataset_units = dataset.units.lower()
+        if variable in 'swe':
+            if any(unit in dataset_units for unit in ['m', 'meter']):
+                dataset.values = 1.e3 * dataset.values
+                dataset.units = 'km'
+        else:
+            if any(unit in dataset_units 
+                for unit in ['kg m-2 s-1', 'mm s-1', 'mm/sec']):
+                dataset.values = 86400. * dataset.values
+                dataset.units = 'mm/day'
+
+    return dataset
+
+
 def _rcmes_normalize_datetimes(datetimes, timestep):
     """ Normalize Dataset datetime values.
 

http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/tests/test_local.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_local.py b/ocw/tests/test_local.py
index 6d67896..254c7f7 100644
--- a/ocw/tests/test_local.py
+++ b/ocw/tests/test_local.py
@@ -152,6 +152,7 @@ def create_netcdf_object():
         values[:] = values
         #Assign time info to time variable
         netCDF_file.variables['time'].units = 'months since 2001-01-01 
00:00:00' 
+        netCDF_file.variables['value'].units = 'foo_units'
         netCDF_file.close()
         return file_path
 

Reply via email to