Repository: climate Updated Branches: refs/heads/master c025ecb88 -> 7187cf306
Update run_RCMES.py to treat obs and model datasets the same Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/5971be65 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/5971be65 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/5971be65 Branch: refs/heads/master Commit: 5971be65e956788dec3d65af98d6bac518df3fa6 Parents: c025ecb Author: Alex Goodman <ago...@users.noreply.github.com> Authored: Tue Nov 15 16:14:06 2016 -0800 Committer: Alex Goodman <ago...@users.noreply.github.com> Committed: Tue Nov 15 16:14:06 2016 -0800 ---------------------------------------------------------------------- RCMES/run_RCMES.py | 226 ++++++++++++++++++++++-------------------------- 1 file changed, 105 insertions(+), 121 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/5971be65/RCMES/run_RCMES.py ---------------------------------------------------------------------- diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py index 516be92..7afa830 100644 --- a/RCMES/run_RCMES.py +++ b/RCMES/run_RCMES.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +from __future__ import print_function import os import sys import ssl @@ -61,7 +62,7 @@ if hasattr(ssl, '_create_unverified_context'): config_file = str(sys.argv[1]) -print 'Reading the configuration file ', config_file +print('Reading the configuration file {}'.format(config_file)) config = yaml.load(open(config_file)) time_info = config['time'] temporal_resolution = time_info['temporal_resolution'] @@ -86,55 +87,36 @@ extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, 'end_time': end_time, 'username': None, 'password': None} # Get the dataset loader options -obs_data_info = config['datasets']['reference'] -model_data_info = config['datasets']['targets'] +data_info = config['datasets'] # Extract info we don't want to put into the loader config # Multiplying Factor to scale obs by -multiplying_factor = np.ones(len(obs_data_info)) -for i, info in enumerate(obs_data_info): - if 'multiplying_factor' in info: - multiplying_factor[i] = info.pop('multiplying_factor') - -# If models are GCMs we can skip boundary check. Probably need to find a more -# elegant way to express this in the config file API. -boundary_check = True -for i, info in enumerate(model_data_info): - if 'boundary_check' in info: - boundary_check = info.pop('boundary_check') - -""" Step 1: Load the observation data """ -print 'Loading observation datasets:\n',obs_data_info -obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info) -obs_names = [dataset.name for dataset in obs_datasets] -for i, dataset in enumerate(obs_datasets): +multiplying_factor = np.ones(len(data_info)) +for i, info in enumerate(reference_data_info): + multiplying_factor[i] = info.pop('multiplying_factor', 1) + +""" Step 1: Load the datasets """ +print('Loading datasets:\n{}'.format(data_info)) +datasets = load_datasets_from_config(extra_opts, *data_info) +names = [dataset.name for dataset in datasets] +for i, dataset in enumerate(datasets): if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset, - temporal_resolution) + datasets[i] = dsp.normalize_dataset_datetimes(dataset, + temporal_resolution) + datasets[i].values *= multiplying_factor[i] - if multiplying_factor[i] != 1: - obs_datasets[i].values *= multiplying_factor[i] - -""" Step 2: Load model NetCDF Files into OCW Dataset Objects """ -model_datasets = load_datasets_from_config(extra_opts, *model_data_info) -model_names = [dataset.name for dataset in model_datasets] -if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - for i, dataset in enumerate(model_datasets): - model_datasets[i] = dsp.normalize_dataset_datetimes(dataset, - temporal_resolution) - -""" Step 3: Subset the data for temporal and spatial domain """ +""" Step 2: Subset the data for temporal and spatial domain """ # Create a Bounds object to use for subsetting if time_info['maximum_overlap_period']: - start_time, end_time = utils.get_temporal_overlap(obs_datasets+model_datasets) - print 'Maximum overlap period' - print 'start_time:', start_time - print 'end_time:', end_time + start_time, end_time = utils.get_temporal_overlap(datasets) + print('Maximum overlap period') + print('start_time: {}'.format(start_time)) + print('end_time: {}'.format(end_time)) if temporal_resolution == 'monthly' and end_time.day !=1: end_time = end_time.replace(day=1) -for i, dataset in enumerate(obs_datasets): +for i, dataset in enumerate(datasets): min_lat = np.max([min_lat, dataset.lats.min()]) max_lat = np.min([max_lat, dataset.lats.max()]) min_lon = np.max([min_lon, dataset.lons.min()]) @@ -152,15 +134,10 @@ else: start=start_time, end=end_time) -for i, dataset in enumerate(obs_datasets): - obs_datasets[i] = dsp.subset(dataset, bounds) +for i, dataset in enumerate(datasets): + datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: - obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) - -for i, dataset in enumerate(model_datasets): - model_datasets[i] = dsp.subset(dataset, bounds) - if dataset.temporal_resolution() != temporal_resolution: - model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) + datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) # Temporally subset both observation and model datasets # for the user specified season @@ -168,19 +145,21 @@ month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] -# TODO: Fully support multiple observation / reference datasets. -# For now we will only use the first reference dataset listed in the config file -obs_dataset = obs_datasets[0] -obs_name = obs_names[0] -obs_dataset = dsp.temporal_subset(obs_dataset,month_start, month_end,average_each_year) -for i, dataset in enumerate(model_datasets): - model_datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, - average_each_year) +# For now we will treat the first listed dataset as the reference dataset for +# evaluation purposes. +for i, dataset in enumerate(datasets): + datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, + average_each_year) + +reference_dataset = datasets[0] +target_datasets = datasets[1:] +reference_name = names[0] +target_names = names[1:] # generate grid points for regridding if config['regrid']['regrid_on_reference']: - new_lat = obs_dataset.lats - new_lon = obs_dataset.lons + new_lat = reference_dataset.lats + new_lon = reference_dataset.lons else: delta_lat = config['regrid']['regrid_dlat'] delta_lon = config['regrid']['regrid_dlon'] @@ -189,40 +168,45 @@ else: new_lat = np.linspace(min_lat, max_lat, nlat) new_lon = np.linspace(min_lon, max_lon, nlon) -# number of models -nmodel = len(model_datasets) -print 'Dataset loading completed' -print 'Observation data:', obs_name -print 'Number of model datasets:',nmodel -for model_name in model_names: - print model_name - -""" Step 4: Spatial regriding of the reference datasets """ -print 'Regridding datasets: ', config['regrid'] +# Get flag for boundary checking for regridding. By default, this is set to True +# since the main intent of this program is to evaluate RCMs. However, it can be +# used for GCMs in which case it should be set to False to save time. +boundary_check = config['regrid'].get('boundary_check', True) + +# number of target datasets (usually models, but can also be obs / reanalysis) +ntarget = len(target_datasets) +print('Dataset loading completed') +print('Reference data: {}'.format(reference_name)) +print('Number of target datasets: {}'.format(ntarget)) +for target_name in target_names: + print(target_name) + +""" Step 3: Spatial regriding of the datasets """ +print('Regridding datasets: {}'.format(config['regrid'])) if not config['regrid']['regrid_on_reference']: - obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon) - print 'Reference dataset has been regridded' -for i, dataset in enumerate(model_datasets): - model_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, + reference_dataset = dsp.spatial_regrid(reference_dataset, new_lat, new_lon) + print('Reference dataset has been regridded') +for i, dataset in enumerate(target_datasets): + target_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, boundary_check=boundary_check) - print model_names[i]+' has been regridded' -print 'Propagating missing data information' -obs_dataset = dsp.mask_missing_data([obs_dataset]+model_datasets)[0] -model_datasets = dsp.mask_missing_data([obs_dataset]+model_datasets)[1:] - -""" Step 5: Checking and converting variable units """ -print 'Checking and converting variable units' -obs_dataset = dsp.variable_unit_conversion(obs_dataset) -for idata,dataset in enumerate(model_datasets): - model_datasets[idata] = dsp.variable_unit_conversion(dataset) - - -print 'Generating multi-model ensemble' -if len(model_datasets) >= 2.: - model_datasets.append(dsp.ensemble(model_datasets)) - model_names.append('ENS') - -""" Step 6: Generate subregion average and standard deviation """ + print('{} has been regridded'.format(target_names[i])) +print('Propagating missing data information') +datasets = dsp.mask_missing_data([reference_dataset]+target_datasets) +reference_dataset = datasets[0] +target_datasets = datasets[1:] + +""" Step 4: Checking and converting variable units """ +print('Checking and converting variable units') +reference_dataset = dsp.variable_unit_conversion(reference_dataset) +for i, dataset in enumerate(target_datasets): + target_datasets[i] = dsp.variable_unit_conversion(dataset) + +print('Generating multi-model ensemble') +if len(target_datasets) >= 2.: + target_datasets.append(dsp.ensemble(target_datasets)) + target_names.append('ENS') + +""" Step 5: Generate subregion average and standard deviation """ if config['use_subregions']: # sort the subregion by region names and make a list subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0)) @@ -230,94 +214,94 @@ if config['use_subregions']: # number of subregions nsubregion = len(subregions) - print ('Calculating spatial averages and standard deviations of ', - str(nsubregion),' subregions') + print('Calculating spatial averages and standard deviations of {} subregions' + .format(nsubregions)) - obs_subregion_mean, obs_subregion_std, subregion_array = ( - utils.calc_subregion_area_mean_and_std([obs_dataset], subregions)) - model_subregion_mean, model_subregion_std, subregion_array = ( - utils.calc_subregion_area_mean_and_std(model_datasets, subregions)) + reference_subregion_mean, reference_subregion_std, subregion_array = ( + utils.calc_subregion_area_mean_and_std([reference_dataset], subregions)) + target_subregion_mean, target_subregion_std, subregion_array = ( + utils.calc_subregion_area_mean_and_std(target_datasets, subregions)) -""" Step 7: Write a netCDF file """ +""" Step 6: Write a netCDF file """ workdir = config['workdir'] if workdir[-1] != '/': workdir = workdir+'/' -print 'Writing a netcdf file: ',workdir+config['output_netcdf_filename'] +print('Writing a netcdf file: ',workdir+config['output_netcdf_filename']) if not os.path.exists(workdir): os.system("mkdir -p "+workdir) if config['use_subregions']: dsp.write_netcdf_multiple_datasets_with_subregions( - obs_dataset, obs_name, model_datasets, model_names, + reference_dataset, reference_name, target_datasets, target_names, path=workdir+config['output_netcdf_filename'], subregions=subregions, subregion_array=subregion_array, - ref_subregion_mean=obs_subregion_mean, - ref_subregion_std=obs_subregion_std, - model_subregion_mean=model_subregion_mean, - model_subregion_std=model_subregion_std) + ref_subregion_mean=reference_subregion_mean, + ref_subregion_std=reference_subregion_std, + target_subregion_mean=target_subregion_mean, + target_subregion_std=target_subregion_std) else: dsp.write_netcdf_multiple_datasets_with_subregions( - obs_dataset, obs_name, model_datasets, - model_names, + reference_dataset, reference_name, target_datasets, + target_names, path=workdir+config['output_netcdf_filename']) -""" Step 8: Calculate metrics and draw plots """ +""" Step 7: Calculate metrics and draw plots """ nmetrics = config['number_of_metrics_and_plots'] if config['use_subregions']: - Map_plot_subregion(subregions, obs_dataset, workdir) + Map_plot_subregion(subregions, reference_dataset, workdir) if nmetrics > 0: - print 'Calculating metrics and generating plots' + print('Calculating metrics and generating plots') for imetric in np.arange(nmetrics)+1: metrics_name = config['metrics'+'%1d' %imetric] plot_info = config['plots'+'%1d' %imetric] file_name = workdir+plot_info['file_name'] - print 'metrics '+str(imetric)+'/'+str(nmetrics)+': ', metrics_name + print('metrics {0}/{1}: {2}'.format(imetric, nmetrics, metrics_name)) if metrics_name == 'Map_plot_bias_of_multiyear_climatology': row, column = plot_info['subplots_array'] if 'map_projection' in plot_info.keys(): Map_plot_bias_of_multiyear_climatology( - obs_dataset, obs_name, model_datasets, model_names, + reference_dataset, reference_name, target_datasets, target_names, file_name, row, column, map_projection=plot_info['map_projection']) else: Map_plot_bias_of_multiyear_climatology( - obs_dataset, obs_name, model_datasets, model_names, + reference_dataset, reference_name, target_datasets, target_names, file_name, row, column) elif metrics_name == 'Taylor_diagram_spatial_pattern_of_multiyear_climatology': Taylor_diagram_spatial_pattern_of_multiyear_climatology( - obs_dataset, obs_name, model_datasets, model_names, + reference_dataset, reference_name, target_datasets, target_names, file_name) elif config['use_subregions']: if (metrics_name == 'Timeseries_plot_subregion_interannual_variability' and average_each_year): row, column = plot_info['subplots_array'] Time_series_subregion( - obs_subregion_mean, obs_name, model_subregion_mean, - model_names, False, file_name, row, column, + reference_subregion_mean, reference_name, target_subregion_mean, + target_names, False, file_name, row, column, x_tick=['Y'+str(i+1) - for i in np.arange(model_subregion_mean.shape[1])]) + for i in np.arange(target_subregion_mean.shape[1])]) if (metrics_name == 'Timeseries_plot_subregion_annual_cycle' and not average_each_year and month_start==1 and month_end==12): row, column = plot_info['subplots_array'] Time_series_subregion( - obs_subregion_mean, obs_name, - model_subregion_mean, model_names, True, + reference_subregion_mean, reference_name, + target_subregion_mean, target_names, True, file_name, row, column, x_tick=['J','F','M','A','M','J','J','A','S','O','N','D']) if (metrics_name == 'Portrait_diagram_subregion_interannual_variability' and average_each_year): - Portrait_diagram_subregion(obs_subregion_mean, obs_name, - model_subregion_mean, model_names, + Portrait_diagram_subregion(reference_subregion_mean, reference_name, + target_subregion_mean, target_names, False, file_name) if (metrics_name == 'Portrait_diagram_subregion_annual_cycle' and not average_each_year and month_start==1 and month_end==12): - Portrait_diagram_subregion(obs_subregion_mean, obs_name, - model_subregion_mean, model_names, + Portrait_diagram_subregion(reference_subregion_mean, reference_name, + target_subregion_mean, target_names, True, file_name) else: - print 'please check the currently supported metrics' + print('please check the currently supported metrics')