Repository: climate Updated Branches: refs/heads/master 6598b9f1a -> 0ed13a19c
CLIMATE-905 - Box whisker plot to evaluate simulated trends - example/temperature/trends_over_CONUS.py has been updated - data_source.local.load_multiple_files has been updated to load the CMIP5 files without file concatenation. - dataset_processor has been updated. - plotter has a new module (draw_plot_to_compare_trends) to generate a plot to compare observed trends with simulated ones. - test_local.py has been updated to reflect the changes in - utils.calculate_temporal_trend_of_time_series has been debugged. - utils.calculate_ensemble_temporal_trends has been updated. Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/6f6cde83 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/6f6cde83 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/6f6cde83 Branch: refs/heads/master Commit: 6f6cde83ed23e5d121244896c97638070e280a74 Parents: 6598b9f Author: huikyole <[email protected]> Authored: Sun Apr 16 23:01:39 2017 -0700 Committer: huikyole <[email protected]> Committed: Sun Apr 16 23:01:39 2017 -0700 ---------------------------------------------------------------------- examples/temperature_trends_over_CONUS.py | 100 ++++++++++++++++++------- ocw/data_source/local.py | 78 ++++++++++--------- ocw/dataset_processor.py | 13 ++-- ocw/plotter.py | 53 ++++++++++++- ocw/tests/test_local.py | 5 +- ocw/utils.py | 19 ++--- 6 files changed, 188 insertions(+), 80 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/examples/temperature_trends_over_CONUS.py ---------------------------------------------------------------------- diff --git a/examples/temperature_trends_over_CONUS.py b/examples/temperature_trends_over_CONUS.py index 0467291..54f5843 100644 --- a/examples/temperature_trends_over_CONUS.py +++ b/examples/temperature_trends_over_CONUS.py @@ -25,17 +25,23 @@ import ocw.dataset_processor as dsp import ocw.metrics as metrics import ocw.plotter as plotter import ocw.utils as utils -import ssl -if hasattr(ssl, '_create_unverified_context'): - ssl._create_default_https_context = ssl._create_unverified_context # nClimDiv observation file file_obs = 'nClimDiv/nClimDiv_tave_1895-2005.nc' +# CMIP5 simulations +model_file_path = 'CMIP5_historical' +dataset_name = ['BNU-ESM','CanESM2','CNRM-CM5','CSIRO-Mk3', + 'GISS-E2-H','HadCM3','inmcm4','MIROC-ESM', + 'MPI-ESM-LR','NorESM1-M'] +nmodel = len(dataset_name) # number of CMIP5 simulations + # temporal boundary start_date = datetime.datetime(1979, 12, 1) end_date = datetime.datetime(2005, 8, 31) +nyear = 26 + month_start = 6 # June month_end = 8 # August @@ -51,6 +57,8 @@ regions.append(['KY', 'VA', 'AR','AL', 'LA','MS', 'FL', 'GA','NC','SC', 'DC','TN plotter.fill_US_states_with_color(regions, 'NCA_seven_regions', colors=True, region_names=['NW','SW','NGP','SGP','MW','NE','SE']) +n_region = 7 # number of regions + # CONUS regional boundaries NW_bounds = Bounds(boundary_type='us_states', us_states=regions[0]) @@ -67,36 +75,78 @@ NE_bounds = Bounds(boundary_type='us_states', SE_bounds = Bounds(boundary_type='us_states', us_states=regions[6]) +regional_bounds = [NW_bounds, SW_bounds, NGP_bounds, + SGP_bounds, MW_bounds, NE_bounds, SE_bounds] """ Load nClimDiv file into OCW Dataset """ obs_dataset = local.load_file(file_obs, variable_name='tave') +""" Load CMIP5 simulations into a list of OCW Datasets""" +model_dataset = local.load_multiple_files(file_path=model_file_path, variable_name='tas', + dataset_name=dataset_name, variable_unit='K') + """ Temporal subset of obs_dataset """ obs_dataset_subset = dsp.temporal_slice(obs_dataset, start_time=start_date, end_time=end_date) obs_dataset_season = dsp.temporal_subset(obs_dataset_subset, month_start, month_end, average_each_year=True) + +""" Temporal subset of model_dataset """ +model_dataset_subset = [dsp.temporal_slice(dataset,start_time=start_date, end_time=end_date) + for dataset in model_dataset] +model_dataset_season = [dsp.temporal_subset(dataset, month_start, month_end, + average_each_year=True) for dataset in model_dataset_subset] + + """ Spatial subset of obs_dataset and generate time series """ -NW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NW_bounds)) -SW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SW_bounds)) -NGP_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NGP_bounds)) -SGP_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SGP_bounds)) -MW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, MW_bounds)) -NE_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NE_bounds)) -SE_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SE_bounds)) - -year = np.arange(len(NW_timeseries)) - -regional_trends=[] -regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NW_timeseries)[0]) -regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SW_timeseries)[0]) -regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NGP_timeseries)[0]) -regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SGP_timeseries)[0]) -regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, MW_timeseries)[0]) -regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NE_timeseries)[0]) -regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SE_timeseries)[0]) - -plotter.fill_US_states_with_color(regions, 'NCA_tave_trends_JJA_1980-2005', - values=regional_trends, - region_names=['%.3f' %(10*i) for i in regional_trends]) +obs_timeseries = np.zeros([nyear, n_region]) # region index 0-6: NW, SW, NGP, SGP, MW, NE, SE +model_timeseries = np.zeros([nmodel, nyear, n_region]) + +for iregion in np.arange(n_region): + obs_timeseries[:, iregion] = utils.calc_time_series( + dsp.subset(obs_dataset_season, regional_bounds[iregion])) + for imodel in np.arange(nmodel): + model_timeseries[imodel, :, iregion] = utils.calc_time_series( + dsp.subset(model_dataset_season[imodel], regional_bounds[iregion])) + +year = np.arange(nyear) + +regional_trends_obs = np.zeros(n_region) +regional_trends_obs_error = np.zeros(n_region) +regional_trends_model = np.zeros([nmodel, n_region]) +regional_trends_model_error = np.zeros([nmodel, n_region]) +regional_trends_ens = np.zeros(n_region) +regional_trends_ens_error = np.zeros(n_region) + +for iregion in np.arange(n_region): + regional_trends_obs[iregion], regional_trends_obs_error[iregion] = utils.calculate_temporal_trend_of_time_series( + year, obs_timeseries[:,iregion]) + for imodel in np.arange(nmodel): + regional_trends_model[imodel, iregion], regional_trends_model_error[iregion] = utils.calculate_temporal_trend_of_time_series( + year, model_timeseries[imodel, :, iregion]) + regional_trends_ens[iregion], regional_trends_ens_error[iregion] = utils.calculate_ensemble_temporal_trends( + model_timeseries[:, :, iregion]) + +""" Generate plots """ + +plotter.fill_US_states_with_color(regions, 'nClimDiv_tave_trends_JJA_1980-2005', + values=regional_trends_obs, + region_names=['%.3f' %(10*i) for i in regional_trends_obs]) + +plotter.fill_US_states_with_color(regions, 'CMIP5_ENS_tave_trends_JJA_1980-2005', + values=regional_trends_ens, + region_names=['%.3f' %(10*i) for i in regional_trends_ens]) + +bias_ens = regional_trends_ens - regional_trends_obs +plotter.fill_US_states_with_color(regions, 'CMIP5_ENS_tave_trends_bias_from_nClimDiv_JJA_1980-2005', + values=bias_ens, + region_names=['%.3f' %(10*i) for i in bias_ens]) + +obs_data = np.vstack([regional_trends_obs, regional_trends_obs_error]) +ens_data = np.vstack([regional_trends_ens, regional_trends_ens_error]) + +plotter.draw_plot_to_compare_trends(obs_data, ens_data, regional_trends_model, + fname='Trends_comparison_btn_CMIP5_and_nClimDiv', + data_labels=['NW','SW','NGP','SGP','MW','NE','SE'], + xlabel='NCA regions', ylabel='tas trend [K/year]') http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/data_source/local.py ---------------------------------------------------------------------- diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py index 0c29588..78cf3cc 100644 --- a/ocw/data_source/local.py +++ b/ocw/data_source/local.py @@ -302,27 +302,24 @@ def load_file(file_path, def load_multiple_files(file_path, variable_name, - dataset_name='data', + generic_dataset_name=False, + dataset_name=None, variable_unit=None, lat_name=None, lon_name=None, time_name=None): ''' load multiple netcdf files with common filename pattern and return an array of OCW datasets - :param file_path: directory name and common file name patterns where the NetCDF files to load are stored. + :param file_path: directory name and (optionally common file name patterns) where the NetCDF files to load are stored. :type file_path: :mod:`string` - :param dataset_name: a name of dataset when reading a single file - :type dataset_name: :mod:'string' + :param dataset_name: a list of dataset names. Data filenames must include the elements of dataset_name list. + :type dataset_name: :mod:'list' + :param generic_dataset_name: If false, data filenames must include the elements of dataset_name list. + :type generic_dataset_name: :mod:'bool' :param variable_name: The variable name to load from the NetCDF file. :type variable_name: :mod:`string` :param variable_unit: (Optional) The variable unit to load from the NetCDF file. :type variable_unit: :mod:`string` - :param elevation_index: (Optional) The elevation index for which data should - be returned. Climate data is often times 4 dimensional data. Some - datasets will have readins at different height/elevation levels. OCW - expects 3D data so a single layer needs to be stripped out when loading. - By default, the first elevation layer is used. If desired you may - specify the elevation value to use. :param lat_name: (Optional) The latitude variable name to extract from the dataset. :type lat_name: :mod:`string` @@ -335,33 +332,42 @@ def load_multiple_files(file_path, :returns: An array of OCW Dataset objects :rtype: :class:`list` ''' + datasets = [] - data_filenames = [] - data_filenames.extend(glob(file_path)) - data_filenames.sort() - - # number of files - ndata = len(data_filenames) - if type(dataset_name) is str and ndata == 1: - data_name = [dataset_name] - elif type(dataset_name).__name__ == 'list': - if len(dataset_name) == ndata: - data_name = [name for name in dataset_name] - else: + if not dataset_name: + data_filename = glob(file_path) + data_filename.sort() + ndata = len(data_filename) data_name = [] - data_filenames_reversed = [] - for element in data_filenames: - data_filenames_reversed.append(element[::-1]) - prefix = os.path.commonprefix(data_filenames) - postfix = os.path.commonprefix(data_filenames_reversed)[::-1] - for element in data_filenames: - data_name.append(element.replace(prefix, '').replace(postfix, '')) + data_filename_reversed = [i[::-1] for i in data_filename] + prefix = os.path.commonprefix(data_filename) + postfix = os.path.commonprefix(data_filename_reversed)[::-1] + data_name = [i.replace(prefix,'').replace(postfix,'') for i in data_filename] - datasets = [] - for ifile, filename in enumerate(data_filenames): - datasets.append(load_file(filename, variable_name, variable_unit, name=data_name[ifile], + for ifile, filename in enumerate(data_filename): + datasets.append(load_file(filename, variable_name, variable_unit, name=data_name[ifile], + lat_name=lat_name, lon_name=lon_name, time_name=time_name)) + elif generic_dataset_name: + data_name = [i for i in dataset_name] + data_filename = glob(file_path) + data_filename.sort() + for ifile, filename in enumerate(data_filename): + datasets.append(load_file(filename, variable_name, variable_unit, name=data_name[ifile], lat_name=lat_name, lon_name=lon_name, time_name=time_name)) + else: + data_name = [i for i in dataset_name] + pattern = [['*'+i+'*'] for i in dataset_name] + if file_path[-1] != '/': + file_path = file_path+'/' + + ndata = len(dataset_name) + for idata in numpy.arange(ndata): + datasets.append(load_dataset_from_multiple_netcdf_files(variable_name, + variable_unit=variable_unit, name=data_name[idata], + lat_name=lat_name, lon_name=lon_name, time_name=time_name, + file_path=file_path, filename_pattern=pattern[idata])) + return datasets @@ -443,7 +449,7 @@ def load_WRF_2d_files_RAIN(file_path=None, return Dataset(lats, lons, times2, values, variable_name, units=variable_unit, name=name) -def load_dataset_from_multiple_netcdf_files(variable_name, +def load_dataset_from_multiple_netcdf_files(variable_name, variable_unit=None, lat_name=None, lon_name=None, time_name=None, name='', file_list=None, file_path=None, filename_pattern=None, mask_file=None, mask_variable=None, mask_value=0): @@ -457,6 +463,9 @@ def load_dataset_from_multiple_netcdf_files(variable_name, :param variable_name: The variable name to load from the NetCDF file. :type variable_name: :mod:`string` + :param variable_name: The variable's unit to load from the NetCDF file. + :type variable_name: :mod:`string` + :param lat_name: (Optional) The latitude variable name to extract from the \ dataset. :type lat_name: :mod:`string` @@ -527,7 +536,8 @@ def load_dataset_from_multiple_netcdf_files(variable_name, else: data_values = numpy.concatenate((data_values, values0)) times = numpy.array(times) - return Dataset(lats, lons, times, data_values, variable_name, name=name) + return Dataset(lats, lons, times, data_values, + variable_name, units=variable_unit,name=name) def load_NLDAS_forcingA_files(file_path=None, http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/dataset_processor.py ---------------------------------------------------------------------- diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 10f41e3..18058ca 100755 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -391,8 +391,11 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma ''' if not subregion.start: - subregion.start = target_dataset.times[0] - subregion.end = target_dataset.times[-1] + time_start = target_dataset.times[0] + time_end = target_dataset.times[-1] + else: + time_start = subregion.start + time_end = subregion.end if not subregion_name: subregion_name = target_dataset.name @@ -402,7 +405,7 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma if target_dataset.lats.ndim == 2 and target_dataset.lons.ndim == 2: temporal_subset = temporal_slice( - target_dataset, subregion.start, subregion.end) + target_dataset, time_start, time_end) nt = temporal_subset.values.shape[0] y_index, x_index = np.where( (target_dataset.lats >= subregion.lat_max) | ( @@ -471,7 +474,7 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma if subregion.boundary_type == 'us_states' or subregion.boundary_type == 'countries': temporal_subset = temporal_slice( - target_dataset, subregion.start, subregion.end) + target_dataset, time_start, time_end) spatial_mask = utils.mask_using_shapefile_info(target_dataset.lons, target_dataset.lats, subregion.masked_regions, extract=extract) subset_values = utils.propagate_spatial_mask_over_time( @@ -488,7 +491,7 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma if subregion.boundary_type == 'user': temporal_subset = temporal_slice( - target_dataset, subregion.start, subregion.end) + target_dataset, time_start, time_end) spatial_mask = utils.regrid_spatial_mask(target_dataset.lons, target_dataset.lats, subregion.mask_longitude, subregion.mask_latitude, subregion.mask_variable, user_mask_values, extract=extract) http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/plotter.py ---------------------------------------------------------------------- diff --git a/ocw/plotter.py b/ocw/plotter.py index c0a6858..55302a9 100755 --- a/ocw/plotter.py +++ b/ocw/plotter.py @@ -1117,7 +1117,7 @@ def fill_US_states_with_color(regions, fname, fmt='png', ptitle='', nregion = len(regions) if colors: cmap = plt.cm.rainbow - if values: + if not (values is None): cmap = plt.cm.seismic max_abs = np.abs(values).max() @@ -1143,7 +1143,7 @@ def fill_US_states_with_color(regions, fname, fmt='png', ptitle='', lats = np.append(lats, shape[:,1]) if colors: color_to_fill=cmap((iregion+0.5)/nregion) - if values: + if not (values is None): value = values[iregion] color_to_fill = cmap(0.5+np.sign(value)*abs(value)/max_abs*0.45) ax.add_collection(PatchCollection(patches, facecolor=color_to_fill)) @@ -1157,3 +1157,52 @@ def fill_US_states_with_color(regions, fname, fmt='png', ptitle='', # Save the figure fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight', dpi=fig.dpi) fig.clf() + +def draw_plot_to_compare_trends(obs_data, ens_data, model_data, + fname, fmt='png', ptitle='', data_labels=None, + xlabel='', ylabel=''): + + ''' Fill the States over the contiguous US with colors + + :param obs_data: An array of observed trend and standard errors for regions + :type obs_data: :class:'numpy.ndarray' + :param ens_data: An array of trend and standard errors from a multi-model ensemble for regions + :type ens_data: : class:'numpy.ndarray' + :param model_data: An array of trends from models for regions + :type model_data: : class:'numpy.ndarray' + :param fname: The filename of the plot. + :type fname: :mod:`string` + :param fmt: (Optional) filetype for the output. + :type fmt: :mod:`string` + :param ptitle: (Optional) plot title. + :type ptitle: :mod:`string` + :param data_labels: (Optional) names of the regions + :type data_labels: :mod:`list` + :param xlabel: (Optional) a label for x-axis + :type xlabel: :mod:`string` + :param ylabel: (Optional) a label for y-axis + :type ylabel: :mod:`string` + ''' + nregions = obs_data.shape[1] + + # Set up the figure + fig = plt.figure() + fig.set_size_inches((8.5, 11.)) + fig.dpi = 300 + ax = fig.add_subplot(111) + + b_plot = ax.boxplot(model_data, widths=np.repeat(0.2, nregions), positions=np.arange(nregions)+1.3) + plt.setp(b_plot['medians'], color='black') + plt.setp(b_plot['whiskers'], color='black') + plt.setp(b_plot['boxes'], color='black') + plt.setp(b_plot['fliers'], color='black') + ax.errorbar(np.arange(nregions)+0.8, obs_data[0,:], yerr=obs_data[1,:], + fmt='o', color='r', ecolor='r') + ax.errorbar(np.arange(nregions)+1., ens_data[0,:], yerr=ens_data[1,:], + fmt='o', color='b', ecolor='b') + ax.set_xticks(np.arange(nregions)+1) + ax.set_xlim([0, nregions+1]) + + if data_labels: + ax.set_xticklabels(data_labels) + fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight') http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/tests/test_local.py ---------------------------------------------------------------------- diff --git a/ocw/tests/test_local.py b/ocw/tests/test_local.py index bff2dd1..0927f0f 100644 --- a/ocw/tests/test_local.py +++ b/ocw/tests/test_local.py @@ -125,7 +125,7 @@ class TestLoadMultipleFiles(unittest.TestCase): def test_function_load_multiple_files_data_name(self): dataset = local.load_multiple_files(self.file_path, "value") - self.assertEqual([dataset[0].name], ['data']) + self.assertEqual([dataset[0].name], ['']) def test_function_load_multiple_files_lons(self): """To test load_multiple_file function for longitudes""" @@ -151,7 +151,8 @@ class TestLoadMultipleFiles(unittest.TestCase): """Test adding a custom name to a dataset""" dataset = local.load_multiple_files(self.file_path, "value", - dataset_name='foo') + generic_dataset_name=True, + dataset_name=['foo']) self.assertEqual(dataset[0].name, 'foo') def test_dataset_origin(self): http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/utils.py ---------------------------------------------------------------------- diff --git a/ocw/utils.py b/ocw/utils.py index ccc4ea9..0d9eb4e 100755 --- a/ocw/utils.py +++ b/ocw/utils.py @@ -649,8 +649,8 @@ def calculate_temporal_trends(dataset): def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000): ''' Calculate temporal trends in an ensemble of time series - :param timeseries_array: A list whose elements are one-dimensional numpy arrays - :type timeseries_array: :class:`list' + :param timeseries_array: Two dimensional array. 1st index: model, 2nd index: time. + :type timeseries_array: :class:`numpy.ndarray' :param sampling: A list whose elements are one-dimensional numpy arrays :type timeseries_array: :class:`list' @@ -659,22 +659,18 @@ def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000) :rtype: :float:`float','float' ''' - nmodels = len(timeseries_array) - nt = len(timeseries_array[0]) + nmodels, nt = timeseries_array.shape x = np.arange(nt) - merged_array = np.zeros([nmodels, nt]) sampled_trend = np.zeros(number_of_samples) - for imodel,timeseries in enumerate(timeseries_array): - merged_array[imodel,:] = timeseries[:] ensemble_trend, _ = calculate_temporal_trend_of_time_series( - x, np.mean(merged_array, axis=0)) + x, np.mean(timeseries_array, axis=0)) for isample in np.arange(number_of_samples): index = np.random.choice(nmodels, size=nmodels, replace=True) - random_ensemble = np.mean(merged_array[index, :], axis=0) + random_ensemble = np.mean(timeseries_array[index, :], axis=0) sampled_trend[isample], _ = calculate_temporal_trend_of_time_series( x, random_ensemble) - return ensemble_trend, np.std(sample_trend, ddof=1) + return ensemble_trend, np.std(sampled_trend, ddof=1) def calculate_temporal_trend_of_time_series(x,y): @@ -689,5 +685,4 @@ def calculate_temporal_trend_of_time_series(x,y): :rtype: :float:`float','float' ''' slope, intercept, r_value, p_value, std_err = stats.linregress(x,y) - n = len(x) - return slope, std_err/np.std(x)/n**0.5 + return slope, std_err
