Repository: climate Updated Branches: refs/heads/master d9e3c7e73 -> 9c28fe6a9
CLIMATE-723 - Update subset module for regional climate model output - ocw.dataset_processor.subset can properly handle datasets on a curvilinear grid by masking grids outside the subdomain. Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/a8024393 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/a8024393 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/a8024393 Branch: refs/heads/master Commit: a80243935e40d27c22753a22ab62b43eddc5593c Parents: d9e3c7e Author: huikyole <[email protected]> Authored: Fri Jan 22 20:56:19 2016 -0800 Committer: huikyole <[email protected]> Committed: Fri Jan 22 20:56:19 2016 -0800 ---------------------------------------------------------------------- ocw/dataset_processor.py | 88 ++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/a8024393/ocw/dataset_processor.py ---------------------------------------------------------------------- diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 09edbaa..07f2011 100755 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -299,54 +299,64 @@ def subset(subregion, target_dataset, subregion_name=None): # Ensure that the subregion information is well formed _are_bounds_contained_by_dataset(subregion, target_dataset) - # Get subregion indices into subregion data - dataset_slices = _get_subregion_slice_indices(subregion, target_dataset) - if not subregion_name: subregion_name = target_dataset.name - # Slice the values array with our calculated slice indices - if target_dataset.values.ndim == 2: - subset_values = ma.zeros([len(target_dataset.values[ - dataset_slices["lat_start"]:dataset_slices["lat_end"]]), - len(target_dataset.values[ - dataset_slices["lon_start"]:dataset_slices["lon_end"]])]) + if target_dataset.lats.ndim ==2 and target_dataset.lons.ndim ==2: + target_dataset = temporal_slice(subregion.start, subregion.end, target_dataset) + nt, ny, nx = target_dataset.values.shape + y_index, x_index = np.where((target_dataset.lats >= subregion.lat_max) | (target_dataset.lats <= subregion.lat_min) | + (target_dataset.lons >= subregion.lon_max) | (target_dataset.lons <= subregion.lon_min)) + for it in np.arange(nt): + target_dataset.values[it,y_index, x_index] = 1.e+20 + target_dataset.values = ma.masked_equal(target_dataset.values, 1.e+20) + return target_dataset + + elif target_dataset.lats.ndim ==1 and target_dataset.lons.ndim ==1: + # Get subregion indices into subregion data + dataset_slices = _get_subregion_slice_indices(subregion, target_dataset) + # Slice the values array with our calculated slice indices + if target_dataset.values.ndim == 2: + subset_values = ma.zeros([len(target_dataset.values[ + dataset_slices["lat_start"]:dataset_slices["lat_end"]]), + len(target_dataset.values[ + dataset_slices["lon_start"]:dataset_slices["lon_end"]])]) - subset_values = target_dataset.values[ - dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, - dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] + subset_values = target_dataset.values[ + dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, + dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] - elif target_dataset.values.ndim == 3: - subset_values = ma.zeros([len(target_dataset.values[ - dataset_slices["time_start"]:dataset_slices["time_end"]]), - len(target_dataset.values[ + elif target_dataset.values.ndim == 3: + subset_values = ma.zeros([len(target_dataset.values[ + dataset_slices["time_start"]:dataset_slices["time_end"]]), + len(target_dataset.values[ dataset_slices["lat_start"]:dataset_slices["lat_end"]]), - len(target_dataset.values[ + len(target_dataset.values[ dataset_slices["lon_start"]:dataset_slices["lon_end"]])]) - subset_values = target_dataset.values[ - dataset_slices["time_start"]:dataset_slices["time_end"] + 1, - dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, - dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] + subset_values = target_dataset.values[ + dataset_slices["time_start"]:dataset_slices["time_end"] + 1, + dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, + dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] - # Build new dataset with subset information - return ds.Dataset( - # Slice the lats array with our calculated slice indices - target_dataset.lats[dataset_slices["lat_start"]: - dataset_slices["lat_end"] + 1], - # Slice the lons array with our calculated slice indices - target_dataset.lons[dataset_slices["lon_start"]: - dataset_slices["lon_end"] + 1], - # Slice the times array with our calculated slice indices - target_dataset.times[dataset_slices["time_start"]: - dataset_slices["time_end"]+ 1], - # Slice the values array with our calculated slice indices - subset_values, - variable=target_dataset.variable, - units=target_dataset.units, - name=subregion_name, - origin=target_dataset.origin - ) + # Build new dataset with subset information + return ds.Dataset( + # Slice the lats array with our calculated slice indices + target_dataset.lats[dataset_slices["lat_start"]: + dataset_slices["lat_end"] + 1], + # Slice the lons array with our calculated slice indices + target_dataset.lons[dataset_slices["lon_start"]: + dataset_slices["lon_end"] + 1], + # Slice the times array with our calculated slice indices + target_dataset.times[dataset_slices["time_start"]: + dataset_slices["time_end"]+ 1], + # Slice the values array with our calculated slice indices + subset_values, + variable=target_dataset.variable, + units=target_dataset.units, + name=subregion_name, + origin=target_dataset.origin + ) def temporal_slice(start_time_index, end_time_index, target_dataset): '''Temporally slice given dataset(s) with subregion information. This does not
