CLIMATE-592 - update dataset_processor.py to accommodate units in Dataset object
Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/d4cefc54 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/d4cefc54 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/d4cefc54 Branch: refs/heads/master Commit: d4cefc545178b44417f39942d84148815fc11172 Parents: c43235b Author: Kim Whitehall <[email protected]> Authored: Wed Feb 25 17:54:00 2015 -0800 Committer: Michael Joyce <[email protected]> Committed: Thu Mar 12 09:04:46 2015 -0700 ---------------------------------------------------------------------- ocw/data_source/local.py | 9 +++++++-- ocw/data_source/rcmed.py | 5 +++-- ocw/dataset.py | 2 +- ocw/dataset_processor.py | 38 ++++++++++++++++++++++++++++++++++++++ ocw/tests/test_local.py | 1 + 5 files changed, 50 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/data_source/local.py ---------------------------------------------------------------------- diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py index 2b56d69..f2f0388 100644 --- a/ocw/data_source/local.py +++ b/ocw/data_source/local.py @@ -111,6 +111,7 @@ def _get_netcdf_variable_name(valid_var_names, netcdf, netcdf_var): def load_file(file_path, variable_name, + variable_unit = None, elevation_index=0, name='', lat_name=None, @@ -124,6 +125,9 @@ def load_file(file_path, :param variable_name: The variable name to load from the NetCDF file. :type variable_name: :mod:`string` + :param variable_unit: (Optional) The variable unit to load from the NetCDF file. + :type variable_unit: :mod:`string` + :param elevation_index: (Optional) The elevation index for which data should be returned. Climate data is often times 4 dimensional data. Some datasets will have readins at different height/elevation levels. OCW @@ -182,6 +186,7 @@ def load_file(file_path, times = utils.decode_time_values(netcdf, time_name) times = numpy.array(times) values = ma.array(netcdf.variables[variable_name][:]) + variable_unit = netcdf.variables[variable_name].units # If the values are 4D then we need to strip out the elevation index if len(values.shape) == 4: @@ -214,5 +219,5 @@ def load_file(file_path, } if elevation_index != 0: origin['elevation_index'] = elevation_index - return Dataset(lats, lons, times, values, variable_name, - name=name, origin=origin) + return Dataset(lats, lons, times, values, variable=variable_name, + units=variable_unit, name=name, origin=origin) http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/data_source/rcmed.py ---------------------------------------------------------------------- diff --git a/ocw/data_source/rcmed.py b/ocw/data_source/rcmed.py index 4733a45..ef0dc78 100644 --- a/ocw/data_source/rcmed.py +++ b/ocw/data_source/rcmed.py @@ -347,7 +347,7 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_l ''' parameters_metadata = get_parameters_metadata() - parameter_name, time_step, _, _, _, _, _= _get_parameter_info(parameters_metadata, parameter_id) + parameter_name, time_step, _, _, _, _, parameter_units = _get_parameter_info(parameters_metadata, parameter_id) url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step) lats, lons, times, values = _get_data(url) @@ -365,6 +365,7 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_l unique_lats_lons_times[1], unique_times, values, - parameter_name, + variable=parameter_name, + units=parameter_units, name=name, origin=origin) http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/dataset.py ---------------------------------------------------------------------- diff --git a/ocw/dataset.py b/ocw/dataset.py index ce604e3..ee86532 100644 --- a/ocw/dataset.py +++ b/ocw/dataset.py @@ -207,7 +207,7 @@ Expected shape (%s, %s, %s) but received (%s, %s, %s)""" % (time_count, "lat-range: {}, " "lon-range: {}, " "time_range: {}, " - "var: {}>" + "var: {}, " "units: {}>" ) http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/dataset_processor.py ---------------------------------------------------------------------- diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 37296f2..1f9edf0 100644 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -62,6 +62,7 @@ def temporal_rebin(target_dataset, temporal_resolution): binned_dates, binned_values, target_dataset.variable, + target_dataset.units, target_dataset.name) return new_dataset @@ -117,12 +118,16 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes): target_dataset.times, new_values, target_dataset.variable, + target_dataset.units, target_dataset.name) return regridded_dataset def ensemble(datasets): """ Generate a single dataset which is the mean of the input datasets + + An ensemble datasets combines input datasets assuming the all have + similar shape, dimensions, and units. :param datasets: Datasets to be used to compose the ensemble dataset from. All Datasets must be the same shape. @@ -140,6 +145,7 @@ def ensemble(datasets): datasets[0].lons, datasets[0].times, ensemble_values, + datasets[0].units, name="Dataset Ensemble") return ensemble_dataset @@ -182,6 +188,7 @@ def subset(subregion, target_dataset): dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1], target_dataset.variable, + target_dataset.units, target_dataset.name ) @@ -248,6 +255,7 @@ def normalize_dataset_datetimes(dataset, timestep): np.array(new_times), dataset.values, dataset.variable, + dataset.units, dataset.name ) @@ -295,9 +303,39 @@ def write_netcdf(dataset, path, compress=True): lons[:] = dataset.lons times[:] = netCDF4.date2num(dataset.times, times.units) values[:] = dataset.values + values.units = dataset.units out_file.close() +def water_flux_unit_conversion(dataset): + ''' Convert water flux variables units as necessary + + Convert full SI units water flux units to more common units. + + :param dataset: The dataset to convert. + :type dataset: :class:`dataset.Dataset` + + :returns: A Dataset with values converted to new units. + :rtype: :class:`dataset.Dataset` + ''' + waterFluxVariables = ['pr', 'evspsbl', 'mrro', 'swe'] + variable = dataset.variable.lower() + + if any(subString in variable for subString in waterFluxVariables): + dataset_units = dataset.units.lower() + if variable in 'swe': + if any(unit in dataset_units for unit in ['m', 'meter']): + dataset.values = 1.e3 * dataset.values + dataset.units = 'km' + else: + if any(unit in dataset_units + for unit in ['kg m-2 s-1', 'mm s-1', 'mm/sec']): + dataset.values = 86400. * dataset.values + dataset.units = 'mm/day' + + return dataset + + def _rcmes_normalize_datetimes(datetimes, timestep): """ Normalize Dataset datetime values. http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/tests/test_local.py ---------------------------------------------------------------------- diff --git a/ocw/tests/test_local.py b/ocw/tests/test_local.py index 6d67896..254c7f7 100644 --- a/ocw/tests/test_local.py +++ b/ocw/tests/test_local.py @@ -152,6 +152,7 @@ def create_netcdf_object(): values[:] = values #Assign time info to time variable netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00' + netCDF_file.variables['value'].units = 'foo_units' netCDF_file.close() return file_path
