CLIMATE-841 - ocw.dataset_processor.subset - dataset_processor.subset supports two boundary types='us_states' and 'countries' - examples/subset_TRMM_data_for_NCA_regions.py has been added to test the added spatial subsetting functionality.
Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/243a8953 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/243a8953 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/243a8953 Branch: refs/heads/master Commit: 243a8953fc265229a0cf47b581e7a8f0aa78a9cd Parents: 036e5fd Author: huikyole <huiky...@argo.jpl.nasa.gov> Authored: Mon Aug 8 21:41:08 2016 -0700 Committer: huikyole <huiky...@argo.jpl.nasa.gov> Committed: Mon Aug 8 21:41:08 2016 -0700 ---------------------------------------------------------------------- examples/subregions.py | 53 ------------- examples/subregions_rectangular_boundaries.py | 53 +++++++++++++ examples/subset_TRMM_data_for_NCA_regions.py | 56 +++++++++++++ ocw/dataset.py | 65 +-------------- ocw/dataset_processor.py | 51 +++++++----- ocw/utils.py | 92 +++++++++++++++++++++- 6 files changed, 235 insertions(+), 135 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/examples/subregions.py ---------------------------------------------------------------------- diff --git a/examples/subregions.py b/examples/subregions.py deleted file mode 100644 index 20aaee9..0000000 --- a/examples/subregions.py +++ /dev/null @@ -1,53 +0,0 @@ -#Apache OCW lib immports -from ocw.dataset import Dataset, Bounds -import ocw.data_source.local as local -import ocw.data_source.rcmed as rcmed -import ocw.dataset_processor as dsp -import ocw.evaluation as evaluation -import ocw.metrics as metrics -import ocw.plotter as plotter -import ocw.utils as utils - -import datetime -import numpy as np -import numpy.ma as ma - -OUTPUT_PLOT = "subregions" - -# Spatial and temporal configurations -LAT_MIN = -45.0 -LAT_MAX = 42.24 -LON_MIN = -24.0 -LON_MAX = 60.0 -START_SUB = datetime.datetime(2000, 01, 1) -END_SUB = datetime.datetime(2007, 12, 31) - -#regridding parameters -gridLonStep=0.5 -gridLatStep=0.5 - -#Regrid -print("... regrid") -new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) -new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) - -list_of_regions = [ - Bounds(-10.0, 0.0, 29.0, 36.5, START_SUB, END_SUB), - Bounds(0.0, 10.0, 29.0, 37.5, START_SUB, END_SUB), - Bounds(10.0, 20.0, 25.0, 32.5, START_SUB, END_SUB), - Bounds(20.0, 33.0, 25.0, 32.5, START_SUB, END_SUB), - Bounds(-19.3,-10.2,12.0, 20.0, START_SUB, END_SUB), - Bounds( 15.0, 30.0, 15.0, 25.0,START_SUB, END_SUB), - Bounds(-10.0, 10.0, 7.3, 15.0, START_SUB, END_SUB), - Bounds(-10.9, 10.0, 5.0, 7.3, START_SUB, END_SUB), - Bounds(33.9, 40.0, 6.9, 15.0, START_SUB, END_SUB), - Bounds(10.0, 25.0, 0.0, 10.0, START_SUB, END_SUB), - Bounds(10.0, 25.0,-10.0, 0.0, START_SUB, END_SUB), - Bounds(30.0, 40.0,-15.0, 0.0, START_SUB, END_SUB), - Bounds(33.0, 40.0, 25.0, 35.0, START_SUB, END_SUB)] - -#for plotting the subregions -plotter.draw_subregions(list_of_regions, new_lats, new_lons, OUTPUT_PLOT, fmt='png') - - - http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/examples/subregions_rectangular_boundaries.py ---------------------------------------------------------------------- diff --git a/examples/subregions_rectangular_boundaries.py b/examples/subregions_rectangular_boundaries.py new file mode 100644 index 0000000..20aaee9 --- /dev/null +++ b/examples/subregions_rectangular_boundaries.py @@ -0,0 +1,53 @@ +#Apache OCW lib immports +from ocw.dataset import Dataset, Bounds +import ocw.data_source.local as local +import ocw.data_source.rcmed as rcmed +import ocw.dataset_processor as dsp +import ocw.evaluation as evaluation +import ocw.metrics as metrics +import ocw.plotter as plotter +import ocw.utils as utils + +import datetime +import numpy as np +import numpy.ma as ma + +OUTPUT_PLOT = "subregions" + +# Spatial and temporal configurations +LAT_MIN = -45.0 +LAT_MAX = 42.24 +LON_MIN = -24.0 +LON_MAX = 60.0 +START_SUB = datetime.datetime(2000, 01, 1) +END_SUB = datetime.datetime(2007, 12, 31) + +#regridding parameters +gridLonStep=0.5 +gridLatStep=0.5 + +#Regrid +print("... regrid") +new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) +new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) + +list_of_regions = [ + Bounds(-10.0, 0.0, 29.0, 36.5, START_SUB, END_SUB), + Bounds(0.0, 10.0, 29.0, 37.5, START_SUB, END_SUB), + Bounds(10.0, 20.0, 25.0, 32.5, START_SUB, END_SUB), + Bounds(20.0, 33.0, 25.0, 32.5, START_SUB, END_SUB), + Bounds(-19.3,-10.2,12.0, 20.0, START_SUB, END_SUB), + Bounds( 15.0, 30.0, 15.0, 25.0,START_SUB, END_SUB), + Bounds(-10.0, 10.0, 7.3, 15.0, START_SUB, END_SUB), + Bounds(-10.9, 10.0, 5.0, 7.3, START_SUB, END_SUB), + Bounds(33.9, 40.0, 6.9, 15.0, START_SUB, END_SUB), + Bounds(10.0, 25.0, 0.0, 10.0, START_SUB, END_SUB), + Bounds(10.0, 25.0,-10.0, 0.0, START_SUB, END_SUB), + Bounds(30.0, 40.0,-15.0, 0.0, START_SUB, END_SUB), + Bounds(33.0, 40.0, 25.0, 35.0, START_SUB, END_SUB)] + +#for plotting the subregions +plotter.draw_subregions(list_of_regions, new_lats, new_lons, OUTPUT_PLOT, fmt='png') + + + http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/examples/subset_TRMM_data_for_NCA_regions.py ---------------------------------------------------------------------- diff --git a/examples/subset_TRMM_data_for_NCA_regions.py b/examples/subset_TRMM_data_for_NCA_regions.py new file mode 100644 index 0000000..bc946a2 --- /dev/null +++ b/examples/subset_TRMM_data_for_NCA_regions.py @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#Apache OCW lib immports +import ocw.dataset_processor as dsp +import ocw.utils as utils +from ocw.dataset import Bounds +import ocw.data_source.rcmed as rcmed +import ocw.plotter as plotter + +from datetime import datetime +import numpy.ma as ma + +import ssl + +if hasattr(ssl, '_create_unverified_context'): + ssl._create_default_https_context = ssl._create_unverified_context + +# rectangular boundary +min_lat = 15.75 +max_lat = 55.75 +min_lon = -125.75 +max_lon = -66.75 + +start_time = datetime(1998,1,1) +end_time = datetime(1998,12,31) + +TRMM_dataset = rcmed.parameter_dataset(3, 36, min_lat, max_lat, min_lon, max_lon, + start_time, end_time) + +Cuba_and_Bahamas_bounds = Bounds(boundary_type='countries', countries=['Cuba','Bahamas']) +TRMM_dataset2 = dsp.subset(TRMM_dataset, Cuba_and_Bahamas_bounds, extract=False) # to mask out the data over Mexico and Canada + +plotter.draw_contour_map(ma.mean(TRMM_dataset2.values, axis=0), TRMM_dataset2.lats, TRMM_dataset2.lons, fname='TRMM_without_Cuba_and_Bahamas') + +NCA_SW_bounds = Bounds(boundary_type='us_states', us_states=['CA','NV','UT','AZ','NM','CO']) +TRMM_dataset3 = dsp.subset(TRMM_dataset2, NCA_SW_bounds, extract=True) # to mask out the data over Mexico and Canada + +plotter.draw_contour_map(ma.mean(TRMM_dataset3.values, axis=0), TRMM_dataset3.lats, TRMM_dataset3.lons, fname='TRMM_NCA_SW') + + + http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/ocw/dataset.py ---------------------------------------------------------------------- diff --git a/ocw/dataset.py b/ocw/dataset.py index 562c014..3dfc835 100644 --- a/ocw/dataset.py +++ b/ocw/dataset.py @@ -298,9 +298,9 @@ class Bounds(object): self._end = None if boundary_type == 'us_states': - self.masked_regions = shapefile_boundary(boundary_type, us_states) + self.masked_regions = utils.shapefile_boundary(boundary_type, us_states) if boundary_type == 'countries': - self.masked_regions = shapefile_boundary(boundary_type, countries) + self.masked_regions = utils.shapefile_boundary(boundary_type, countries) if boundary_type == 'user': file_object = netCDF4.Dataset(user_mask_file) self.mask_variable = file_object.variables[mask_variable_name][:] @@ -334,7 +334,7 @@ class Bounds(object): self.lon_min = float(lon_min) self.lon_max = float(lon_max) if boundary_type[:6].upper() == 'CORDEX': - self.lat_min, self.lat_max, self.lon_min, self.lon_max = CORDEX_boundary(boundary_type[6:].replace(" ","").lower()) + self.lat_min, self.lat_max, self.lon_min, self.lon_max = utils.CORDEX_boundary(boundary_type[6:].replace(" ","").lower()) @property def start(self): @@ -364,62 +364,3 @@ class Bounds(object): self._end = value -def shapefile_boundary(boundary_type, region_names): - ''' - :param boundary_type: The type of spatial subset boundary - :type boundary_type: :mod:'string' - - :param region_names: An array of regions for spatial subset - :type region_names: :mod:'list' - ''' - # Read the shapefile - map_read = Basemap() - regions = [] - shapefile_dir = os.sep.join([os.path.dirname(__file__), 'shape']) - map_read.readshapefile(os.path.join(shapefile_dir, boundary_type), - boundary_type) - if boundary_type == 'us_states': - for region_name in region_names: - for iregion, region_info in enumerate(map_read.us_states_info): - if region_info['st'] == region_name: - regions.append(numpy.array(map_read.us_states[iregion])) - elif boundary_type == 'countries': - for region_name in region_names: - for iregion, region_info in enumerate(map_read.countries_info): - if region_info['COUNTRY'] == region_name: - regions.append(numpy.array(map_read.countries[iregion])) - return regions - -def CORDEX_boundary(domain_name): - ''' - :param domain_name: CORDEX domain name (http://www.cordex.org/) - :type domain_name: :mod:'string' - ''' - if domain_name =='southamerica': - return -57.61, 18.50, 254.28-360., 343.02-360. - if domain_name =='centralamerica': - return -19.46, 34.83, 235.74-360., 337.78-360. - if domain_name =='northamerica': - return 12.55, 75.88, 189.26-360., 336.74-360. - if domain_name =='europe': - return 22.20, 71.84, 338.23-360., 64.4 - if domain_name =='africa': - return -45.76, 42.24, 335.36-360., 60.28 - if domain_name =='southasia': - return -15.23, 45.07, 19.88, 115.55 - if domain_name =='eastasia': - return -0.10, 61.90, 51.59, 179.99 - if domain_name =='centralasia': - return 18.34, 69.37, 11.05, 139.13 - if domain_name =='australasia': - return -52.36, 12.21, 89.25, 179.99 - if domain_name =='antartica': - return -89.48,-56.00, -179.00, 179.00 - if domain_name =='artic': - return 46.06, 89.50, -179.00, 179.00 - if domain_name =='mediterranean': - return 25.63, 56.66, 339.79-360.00, 50.85 - if domain_name =='middleeastnorthafrica': - return -7.00, 45.00, 333.00-360.00, 76.00 - if domain_name =='southeastasia': - return -15.14, 27.26, 89.26, 146.96 http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/ocw/dataset_processor.py ---------------------------------------------------------------------- diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 8b817a2..56980c3 100755 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -16,6 +16,7 @@ # from ocw import dataset as ds +import ocw.utils as utils import datetime import numpy as np @@ -362,7 +363,7 @@ def ensemble(datasets): return ensemble_dataset -def subset(target_dataset, subregion, subregion_name=None): +def subset(target_dataset, subregion, subregion_name=None, extract=True): '''Subset given dataset(s) with subregion information :param subregion: The Bounds with which to subset the target Dataset. @@ -374,6 +375,9 @@ def subset(target_dataset, subregion, subregion_name=None): :param subregion_name: The subset-ed Dataset name :type subregion_name: :mod:`string` + :param extract: If False, the dataset inside regions will be masked. + :type extract: :mod:`boolean` + :returns: The subset-ed Dataset object :rtype: :class:`dataset.Dataset` @@ -387,7 +391,7 @@ def subset(target_dataset, subregion, subregion_name=None): if not subregion_name: subregion_name = target_dataset.name - if subregion.lat_min: + if hasattr(subregion, 'lat_min'): # If boundary_type is 'rectangular' or 'CORDEX ***', ensure that the subregion information is well formed _are_bounds_contained_by_dataset(target_dataset, subregion) @@ -435,27 +439,36 @@ def subset(target_dataset, subregion, subregion_name=None): dataset_slices["time_start"]:dataset_slices["time_end"] + 1, dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] - - # Build new dataset with subset information - return ds.Dataset( + # Build new dataset with subset information + return ds.Dataset( # Slice the lats array with our calculated slice indices - target_dataset.lats[dataset_slices["lat_start"]: - dataset_slices["lat_end"] + 1], + target_dataset.lats[dataset_slices["lat_start"]: + dataset_slices["lat_end"] + 1], # Slice the lons array with our calculated slice indices - target_dataset.lons[dataset_slices["lon_start"]: - dataset_slices["lon_end"] + 1], + target_dataset.lons[dataset_slices["lon_start"]: + dataset_slices["lon_end"] + 1], # Slice the times array with our calculated slice indices - target_dataset.times[dataset_slices["time_start"]: - dataset_slices["time_end"] + 1], + target_dataset.times[dataset_slices["time_start"]: + dataset_slices["time_end"] + 1], # Slice the values array with our calculated slice indices - subset_values, - variable=target_dataset.variable, - units=target_dataset.units, - name=subregion_name, - origin=target_dataset.origin - ) - - + subset_values, + variable=target_dataset.variable, + units=target_dataset.units, + name=subregion_name, + origin=target_dataset.origin + ) + + if subregion.boundary_type == 'us_states' or subregion.boundary_type == 'countries': + start_time_index = np.where( + target_dataset.times == subregion.start)[0][0] + end_time_index = np.where(target_dataset.times == subregion.end)[0][0] + target_dataset = temporal_slice( + target_dataset, start_time_index, end_time_index) + nt, ny, nx = target_dataset.values.shape + spatial_mask = utils.mask_using_shapefile_info(target_dataset.lons, target_dataset.lats, subregion.masked_regions, extract = extract) + target_dataset.values = utils.propagate_spatial_mask_over_time(target_dataset.values, mask=spatial_mask) + return target_dataset + def temporal_slice(target_dataset, start_time_index, end_time_index): '''Temporally slice given dataset(s) with subregion information. This does not spatially subset the target_Dataset http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/ocw/utils.py ---------------------------------------------------------------------- diff --git a/ocw/utils.py b/ocw/utils.py index 2fab66f..0a9ba22 100755 --- a/ocw/utils.py +++ b/ocw/utils.py @@ -18,11 +18,13 @@ '''''' import sys +import os import datetime as dt import numpy as np import numpy.ma as ma -from mpl_toolkits.basemap import shiftgrid +from mpl_toolkits.basemap import shiftgrid, Basemap, maskoceans +from matplotlib.path import Path from dateutil.relativedelta import relativedelta from netCDF4 import num2date @@ -445,3 +447,91 @@ def calc_area_weighted_spatial_average(dataset, area_weight=False): spatial_average[it] = ma.average(dataset.values[it, :]) return spatial_average + +def shapefile_boundary(boundary_type, region_names): + ''' + :param boundary_type: The type of spatial subset boundary + :type boundary_type: :mod:'string' + + :param region_names: An array of regions for spatial subset + :type region_names: :mod:'list' + ''' + # Read the shapefile + map_read = Basemap() + regions = [] + shapefile_dir = os.sep.join([os.path.dirname(__file__), 'shape']) + map_read.readshapefile(os.path.join(shapefile_dir, boundary_type), + boundary_type) + if boundary_type == 'us_states': + for region_name in region_names: + for iregion, region_info in enumerate(map_read.us_states_info): + if region_info['st'] == region_name: + regions.append(np.array(map_read.us_states[iregion])) + elif boundary_type == 'countries': + for region_name in region_names: + for iregion, region_info in enumerate(map_read.countries_info): + if region_info['COUNTRY'].replace(" ","").lower() == region_name.replace(" ","").lower(): + regions.append(np.array(map_read.countries[iregion])) + return regions + +def CORDEX_boundary(domain_name): + ''' + :param domain_name: CORDEX domain name (http://www.cordex.org/) + :type domain_name: :mod:'string' + ''' + if domain_name =='southamerica': + return -57.61, 18.50, 254.28-360., 343.02-360. + if domain_name =='centralamerica': + return -19.46, 34.83, 235.74-360., 337.78-360. + if domain_name =='northamerica': + return 12.55, 75.88, 189.26-360., 336.74-360. + if domain_name =='europe': + return 22.20, 71.84, 338.23-360., 64.4 + if domain_name =='africa': + return -45.76, 42.24, 335.36-360., 60.28 + if domain_name =='southasia': + return -15.23, 45.07, 19.88, 115.55 + if domain_name =='eastasia': + return -0.10, 61.90, 51.59, 179.99 + if domain_name =='centralasia': + return 18.34, 69.37, 11.05, 139.13 + if domain_name =='australasia': + return -52.36, 12.21, 89.25, 179.99 + if domain_name =='antartica': + return -89.48,-56.00, -179.00, 179.00 + if domain_name =='artic': + return 46.06, 89.50, -179.00, 179.00 + if domain_name =='mediterranean': + return 25.63, 56.66, 339.79-360.00, 50.85 + if domain_name =='middleeastnorthafrica': + return -7.00, 45.00, 333.00-360.00, 76.00 + if domain_name =='southeastasia': + return -15.14, 27.26, 89.26, 146.96 + +def mask_using_shapefile_info(lons, lats, masked_regions, extract = True): + if lons.ndim == 2 and lats.ndim == 2: + lons_2d = lons + lats_2d = lats + else: + lons_2d, lats_2d = np.meshgrid(lons, lats) + + points = np.array((lons_2d.flatten(), lats_2d.flatten())).T + for iregion, region in enumerate(masked_regions): + mpath = Path(region) + mask0 = mpath.contains_points(points).reshape(lons_2d.shape) + if iregion == 0: + mask = mask0 + else: + mask = mask|mask0 + if extract: + mask = np.invert(mask) + return mask + +def propagate_spatial_mask_over_time(data_array, mask): + if data_array.ndim == 3 and mask.ndim == 2: + nt = data_array.shape[0] + for it in np.arange(nt): + mask_original = data_array[it,:].mask + data_array[it,:] = ma.array(data_array[it,:], mask=mask|mask_original) + return data_array +