Repository: climate Updated Branches: refs/heads/master 34bb0aaec -> 6598b9f1a
CLIMATE-903 - Adding a module to calculate the least square trend and standard error - debugging in dataset_processor - A new module (utils.calculate_temporal_trends) to calculate the least square trend and standard error in the input time series. - A new module (plotter.fill_US_states_with_color) to visualize US NCA regions Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/77674bde Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/77674bde Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/77674bde Branch: refs/heads/master Commit: 77674bde27814c63a2d8da32019c92d928be1e12 Parents: 34bb0aa Author: huikyole <huiky...@argo.jpl.nasa.gov> Authored: Thu Mar 23 10:18:27 2017 -0700 Committer: huikyole <huiky...@argo.jpl.nasa.gov> Committed: Thu Mar 23 10:18:27 2017 -0700 ---------------------------------------------------------------------- examples/temperature_trends_over_CONUS.py | 102 +++++++++++++++++++++++++ ocw/dataset_processor.py | 102 ++++++++++++++++--------- ocw/plotter.py | 71 +++++++++++++++++ ocw/utils.py | 95 ++++++++++++++++++++--- 4 files changed, 325 insertions(+), 45 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/77674bde/examples/temperature_trends_over_CONUS.py ---------------------------------------------------------------------- diff --git a/examples/temperature_trends_over_CONUS.py b/examples/temperature_trends_over_CONUS.py new file mode 100644 index 0000000..0467291 --- /dev/null +++ b/examples/temperature_trends_over_CONUS.py @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import numpy as np +import datetime + +# import Apache OCW dependences +import ocw.data_source.local as local +from ocw.dataset import Bounds as Bounds +import ocw.dataset_processor as dsp +import ocw.metrics as metrics +import ocw.plotter as plotter +import ocw.utils as utils +import ssl +if hasattr(ssl, '_create_unverified_context'): + ssl._create_default_https_context = ssl._create_unverified_context + +# nClimDiv observation file +file_obs = 'nClimDiv/nClimDiv_tave_1895-2005.nc' + +# temporal boundary +start_date = datetime.datetime(1979, 12, 1) +end_date = datetime.datetime(2005, 8, 31) + +month_start = 6 # June +month_end = 8 # August + +regions = [] +regions.append(['WA', 'OR', 'ID']) +regions.append(['CA', 'NV', 'AZ', 'NM', 'UT','CO']) +regions.append(['MT', 'WY', 'ND', 'SD', 'NE']) +regions.append(['KS', 'TX', 'OK']) +regions.append(['MN', 'IA', 'MO', 'WI', 'IL', 'IN', 'MI','OH']) +regions.append(['ME', 'VT', 'NH', 'MA', 'NY', 'CT', 'RI','NJ','PA','WV','DE', 'MD']) +regions.append(['KY', 'VA', 'AR','AL', 'LA','MS', 'FL', 'GA','NC','SC', 'DC','TN']) + +plotter.fill_US_states_with_color(regions, 'NCA_seven_regions', colors=True, + region_names=['NW','SW','NGP','SGP','MW','NE','SE']) + +# CONUS regional boundaries +NW_bounds = Bounds(boundary_type='us_states', + us_states=regions[0]) +SW_bounds = Bounds(boundary_type='us_states', + us_states=regions[1]) +NGP_bounds = Bounds(boundary_type='us_states', + us_states=regions[2]) +SGP_bounds = Bounds(boundary_type='us_states', + us_states=regions[3]) +MW_bounds = Bounds(boundary_type='us_states', + us_states=regions[4]) +NE_bounds = Bounds(boundary_type='us_states', + us_states=regions[5]) +SE_bounds = Bounds(boundary_type='us_states', + us_states=regions[6]) + + +""" Load nClimDiv file into OCW Dataset """ +obs_dataset = local.load_file(file_obs, variable_name='tave') + +""" Temporal subset of obs_dataset """ +obs_dataset_subset = dsp.temporal_slice(obs_dataset, + start_time=start_date, end_time=end_date) +obs_dataset_season = dsp.temporal_subset(obs_dataset_subset, month_start, month_end, + average_each_year=True) +""" Spatial subset of obs_dataset and generate time series """ +NW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NW_bounds)) +SW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SW_bounds)) +NGP_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NGP_bounds)) +SGP_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SGP_bounds)) +MW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, MW_bounds)) +NE_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NE_bounds)) +SE_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SE_bounds)) + +year = np.arange(len(NW_timeseries)) + +regional_trends=[] +regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NW_timeseries)[0]) +regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SW_timeseries)[0]) +regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NGP_timeseries)[0]) +regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SGP_timeseries)[0]) +regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, MW_timeseries)[0]) +regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NE_timeseries)[0]) +regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SE_timeseries)[0]) + +plotter.fill_US_states_with_color(regions, 'NCA_tave_trends_JJA_1980-2005', + values=regional_trends, + region_names=['%.3f' %(10*i) for i in regional_trends]) + http://git-wip-us.apache.org/repos/asf/climate/blob/77674bde/ocw/dataset_processor.py ---------------------------------------------------------------------- diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index b113bc7..f8e740a 100755 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -401,23 +401,28 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma _are_bounds_contained_by_dataset(target_dataset, subregion) if target_dataset.lats.ndim == 2 and target_dataset.lons.ndim == 2: - start_time_index = np.where( - target_dataset.times >= subregion.start)[0][0] - end_time_index = np.where( - target_dataset.times <= subregion.end)[0][-1] - target_dataset = temporal_slice( - target_dataset, start_time_index, end_time_index) - nt, ny, nx = target_dataset.values.shape + temporal_subset = temporal_slice( + target_dataset, subregion.start, subregion.end) + nt, ny, nx = temporal_subset.values.shape y_index, x_index = np.where( (target_dataset.lats >= subregion.lat_max) | ( target_dataset.lats <= subregion.lat_min) | (target_dataset.lons >= subregion.lon_max) | ( target_dataset.lons <= subregion.lon_min)) for it in np.arange(nt): - target_dataset.values[it, y_index, x_index] = 1.e+20 - target_dataset.values = ma.masked_equal( - target_dataset.values, 1.e+20) - return target_dataset + temporal_subset.values[it, y_index, x_index] = 1.e+20 + new_values = ma.masked_equal( + temporal_subset.values, 1.e+20) + return ds.Dataset( + target_dataset.lats, + target_dataset.lons, + temporal_subset.times, + new_values, + variable=target_dataset.variable, + units=target_dataset.units, + name=subregion_name, + origin=target_dataset.origin + ) elif target_dataset.lats.ndim == 1 and target_dataset.lons.ndim == 1: # Get subregion indices into subregion data @@ -466,36 +471,52 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma ) if subregion.boundary_type == 'us_states' or subregion.boundary_type == 'countries': - start_time_index = np.where( - target_dataset.times == subregion.start)[0][0] - end_time_index = np.where(target_dataset.times == subregion.end)[0][0] - target_dataset = temporal_slice( - target_dataset, start_time_index, end_time_index) - nt, ny, nx = target_dataset.values.shape + temporal_subset = temporal_slice( + target_dataset, subregion.start, subregion.end) spatial_mask = utils.mask_using_shapefile_info(target_dataset.lons, target_dataset.lats, subregion.masked_regions, extract=extract) - target_dataset.values = utils.propagate_spatial_mask_over_time( - target_dataset.values, mask=spatial_mask) - return target_dataset + subset_values = utils.propagate_spatial_mask_over_time( + temporal_subset.values, mask=spatial_mask) + return ds.Dataset( + target_dataset.lats, + target_dataset.lons, + temporal_subset.times, + subset_values, + variable=target_dataset.variable, + units=target_dataset.units, + name=subregion_name, + origin=target_dataset.origin + ) if subregion.boundary_type == 'user': + temporal_subset = temporal_slice( + target_dataset, subregion.start, subregion.end) spatial_mask = utils.regrid_spatial_mask(target_dataset.lons, target_dataset.lats, subregion.mask_longitude, subregion.mask_latitude, subregion.mask_variable, user_mask_values, extract=extract) - target_dataset.values = utils.propagate_spatial_mask_over_time( - target_dataset.values, mask=spatial_mask) - return target_dataset + subset_values = utils.propagate_spatial_mask_over_time( + temporal_subset.values, mask=spatial_mask) + return ds.Dataset( + target_dataset.lats, + target_dataset.lons, + temporal_subset.times, + subset_values, + variable=target_dataset.variable, + units=target_dataset.units, + name=subregion_name, + origin=target_dataset.origin + ) -def temporal_slice(target_dataset, start_time_index, end_time_index): +def temporal_slice(target_dataset, start_time, end_time): '''Temporally slice given dataset(s) with subregion information. This does not spatially subset the target_Dataset - :param start_time_index: time index of the start time - :type start_time_index: :class:'int' + :param start_time: start time + :type start_time: :class:'int' - :param end_time_index: time index of the end time - :type end_time_index: :class:'int' + :param end_time: end time + :type end_time: :class:'datetime.datetime' :param target_dataset: The Dataset object to subset. :type target_dataset: :class:`dataset.Dataset` @@ -505,14 +526,23 @@ def temporal_slice(target_dataset, start_time_index, end_time_index): :raises: ValueError ''' - start_date = target_dataset.times[start_time_index] - end_date = target_dataset.times[end_time_index] - timeStart = min(np.nonzero(target_dataset.times >= start_date)[0]) - timeEnd = max(np.nonzero(target_dataset.times <= end_date)[0]) - target_dataset.times = target_dataset.times[timeStart:timeEnd + 1] - target_dataset.values = target_dataset.values[timeStart:timeEnd + 1, :] - - return target_dataset + start_time_index = np.where( + target_dataset.times >= start_time)[0][0] + end_time_index = np.where( + target_dataset.times <= end_time)[0][-1] + new_times = target_dataset.times[start_time_index:end_time_index + 1] + new_values = target_dataset.values[start_time_index:end_time_index + 1, :] + + return ds.Dataset( + target_dataset.lats, + target_dataset.lons, + new_times, + new_values, + variable=target_dataset.variable, + units=target_dataset.units, + origin=target_dataset.origin + ) + def safe_subset(target_dataset, subregion, subregion_name=None): http://git-wip-us.apache.org/repos/asf/climate/blob/77674bde/ocw/plotter.py ---------------------------------------------------------------------- diff --git a/ocw/plotter.py b/ocw/plotter.py index 9e2e0ad..8e56b1d 100755 --- a/ocw/plotter.py +++ b/ocw/plotter.py @@ -18,12 +18,16 @@ from tempfile import TemporaryFile import matplotlib as mpl import matplotlib.pyplot as plt +from matplotlib.patches import Polygon +from matplotlib.collections import PatchCollection from mpl_toolkits.basemap import Basemap from mpl_toolkits.axes_grid1 import ImageGrid import scipy.stats.mstats as mstats import numpy as np import numpy.ma as ma +import ocw.utils as utils + # Set the default colormap to coolwarm mpl.rc('image', cmap='coolwarm') @@ -1087,3 +1091,70 @@ def draw_histogram(dataset_array, data_names, fname, fmt='png', nbins=10): data_max + (data_max - data_min) * 0.15]) fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight', dpi=fig.dpi) + +def fill_US_states_with_color(regions, fname, fmt='png', ptitle='', + colors=False, values=None, region_names=None): + + ''' Fill the States over the contiguous US with colors + + :param regions: The list of subregions(lists of US States) to be filled + with different colors. + :type regions: :class:`list` + :param fname: The filename of the plot. + :type fname: :mod:`string` + :param fmt: (Optional) filetype for the output. + :type fmt: :mod:`string` + :param ptitle: (Optional) plot title. + :type ptitle: :mod:`string` + :param colors: (Optional) : If True, each region will be filled + with different colors without using values + :type colors: :class:`bool` + :param values: (Optional) : If colors==False, color for each region scales + an associated element in values + :type values: :class:`numpy.ndarray` + ''' + + nregion = len(regions) + if colors: + cmap = plt.cm.rainbow + if values: + cmap = plt.cm.seismic + max_abs = np.abs(values).max() + + # Set up the figure + fig = plt.figure() + fig.set_size_inches((8.5, 11.)) + fig.dpi = 300 + ax = fig.add_subplot(111) + + # create the map + m = Basemap(llcrnrlon=-127,llcrnrlat=22,urcrnrlon=-65,urcrnrlat=52, + ax=ax) + + for iregion, region in enumerate(regions): + shapes = utils.shapefile_boundary('us_states', region) + patches=[] + lats=np.empty(0) + lons=np.empty(0) + for shape in shapes: + patches.append(Polygon(np.array(shape), True)) + + lons = np.append(lons, shape[:,0]) + lats = np.append(lats, shape[:,1]) + if colors: + color_to_fill=cmap((iregion+0.5)/nregion) + if values: + value = values[iregion] + color_to_fill = cmap(0.5+np.sign(value)*abs(value)/max_abs*0.45) + ax.add_collection(PatchCollection(patches, facecolor=color_to_fill)) + if region_names: + ax.text(lons.mean(), lats.mean(), region_names[iregion], + ha='center', va='center', fontsize=10) + m.drawcountries(linewidth=0.) + + # Add the title + ax.set_title(ptitle) + + # Save the figure + fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight', dpi=fig.dpi) + fig.clf() http://git-wip-us.apache.org/repos/asf/climate/blob/77674bde/ocw/utils.py ---------------------------------------------------------------------- diff --git a/ocw/utils.py b/ocw/utils.py index ed7f0cb..ccc4ea9 100755 --- a/ocw/utils.py +++ b/ocw/utils.py @@ -26,8 +26,9 @@ import numpy.ma as ma from mpl_toolkits.basemap import shiftgrid, Basemap from matplotlib.path import Path from dateutil.relativedelta import relativedelta -from netCDF4 import num2date +from netCDF4 import num2date, date2num import scipy.interpolate as interpolate +import scipy.stats as stats from scipy.ndimage import map_coordinates @@ -470,8 +471,7 @@ def shapefile_boundary(boundary_type, region_names): regions = [] shapefile_dir = os.sep.join([os.path.dirname(__file__), 'shape']) map_read.readshapefile(os.path.join(shapefile_dir, boundary_type), - boundary_type) - + boundary_type, drawbounds=False) # Note: The shapefile may contain countries or states with latin letters. # Hence, the output of readshapefile can be a mix of ascii and unicode # strings. Because python 2 and 3 treat them differently, we must @@ -480,8 +480,9 @@ def shapefile_boundary(boundary_type, region_names): for region_name in region_names: region_name = _force_unicode(region_name) for iregion, region_info in enumerate(map_read.us_states_info): - state = _force_unicode(region_info['st'], 'latin-1') - if state == region_name: + state1 = _force_unicode(region_info['st'], 'latin-1') + state2 = _force_unicode(region_info['state'], 'latin-1') + if state1 == region_name or state2 == region_name: regions.append(np.array(map_read.us_states[iregion])) elif boundary_type == 'countries': for region_name in region_names: @@ -594,10 +595,19 @@ def regrid_spatial_mask(target_lon, target_lat, mask_lon, mask_lat, mask_var, def propagate_spatial_mask_over_time(data_array, mask): if data_array.ndim == 3 and mask.ndim == 2: nt = data_array.shape[0] - for it in np.arange(nt): - mask_original = data_array[it, :].mask - data_array.mask[it, :] = mask | mask_original - return data_array + new_data_array = ma.zeros(data_array.shape) + if isinstance(data_array.mask, bool): + new_mask = mask + for it in np.arange(nt): + new_data_array[it,:] = ma.array(data_array[it,:], + mask=new_mask) + else: + for it in np.arange(nt): + new_mask = data_array[it, :].mask | mask + new_data_array[it,:] = ma.array(data_array[it,:], + mask=new_mask) + + return new_data_array def convert_lat_lon_2d_array(lon, lat): @@ -614,3 +624,70 @@ def _force_unicode(s, encoding='utf-8'): s = s.decode(encoding=encoding) return s + +def calculate_temporal_trends(dataset): + ''' Calculate temporal trends in dataset.values + :param dataset: The dataset from which time values should be extracted. + :type dataset: :class:`dataset.Dataset' + + :returns: Arrays of the temporal trend and standard error + :rtype: :class:`numpy.ma.core.MaskedArray` + ''' + + nt, ny, nx = dataset.values.shape + x = np.arange(nt) + + trend = np.zeros([ny, nx])-999. + slope_err = np.zeros([ny, nx])-999. + for iy in np.arange(ny): + for ix in np.arange(nx): + if dataset.values[:,iy,ix].count() == nt: + trend[iy,ix], slope_err[iy,ix] = calculate_temporal_trend_of_time_series( + x, dataset.values[:,iy,ix]) + + return ma.masked_equal(trend, -999.), ma.masked_equal(slope_err, -999.) + +def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000): + ''' Calculate temporal trends in an ensemble of time series + :param timeseries_array: A list whose elements are one-dimensional numpy arrays + :type timeseries_array: :class:`list' + + :param sampling: A list whose elements are one-dimensional numpy arrays + :type timeseries_array: :class:`list' + + :returns: temporal trend and estimated error from bootstrapping + :rtype: :float:`float','float' + ''' + + nmodels = len(timeseries_array) + nt = len(timeseries_array[0]) + x = np.arange(nt) + merged_array = np.zeros([nmodels, nt]) + sampled_trend = np.zeros(number_of_samples) + for imodel,timeseries in enumerate(timeseries_array): + merged_array[imodel,:] = timeseries[:] + ensemble_trend, _ = calculate_temporal_trend_of_time_series( + x, np.mean(merged_array, axis=0)) + + for isample in np.arange(number_of_samples): + index = np.random.choice(nmodels, size=nmodels, replace=True) + random_ensemble = np.mean(merged_array[index, :], axis=0) + sampled_trend[isample], _ = calculate_temporal_trend_of_time_series( + x, random_ensemble) + return ensemble_trend, np.std(sample_trend, ddof=1) + + +def calculate_temporal_trend_of_time_series(x,y): + ''' Calculate least-square trends (a) in y = ax+b and a's standard error + :param x: time series + :type x: :class:`numpy.ndarray' + + :param x: time series + :type x: :class:`numpy.ndarray' + + :returns: temporal trend and standard error + :rtype: :float:`float','float' + ''' + slope, intercept, r_value, p_value, std_err = stats.linregress(x,y) + n = len(x) + return slope, std_err/np.std(x)/n**0.5