[1/3] climate git commit: Update run_RCMES.py to treat obs and model datasets the same

goodman Mon, 28 Nov 2016 14:54:04 -0800

Repository: climate
Updated Branches:
  refs/heads/master c025ecb88 -> 7187cf306



Update run_RCMES.py to treat obs and model datasets the same


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/5971be65
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/5971be65
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/5971be65

Branch: refs/heads/master
Commit: 5971be65e956788dec3d65af98d6bac518df3fa6
Parents: c025ecb
Author: Alex Goodman <ago...@users.noreply.github.com>
Authored: Tue Nov 15 16:14:06 2016 -0800
Committer: Alex Goodman <ago...@users.noreply.github.com>
Committed: Tue Nov 15 16:14:06 2016 -0800

----------------------------------------------------------------------
 RCMES/run_RCMES.py | 226 ++++++++++++++++++++++--------------------------
 1 file changed, 105 insertions(+), 121 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/5971be65/RCMES/run_RCMES.py
----------------------------------------------------------------------
diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py
index 516be92..7afa830 100644
--- a/RCMES/run_RCMES.py
+++ b/RCMES/run_RCMES.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import os
 import sys
 import ssl
@@ -61,7 +62,7 @@ if hasattr(ssl, '_create_unverified_context'):
 
 config_file = str(sys.argv[1])
 
-print 'Reading the configuration file ', config_file
+print('Reading the configuration file {}'.format(config_file))
 config = yaml.load(open(config_file))
 time_info = config['time']
 temporal_resolution = time_info['temporal_resolution']
@@ -86,55 +87,36 @@ extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 
'min_lon': min_lon,
               'end_time': end_time, 'username': None, 'password': None}
 
 # Get the dataset loader options
-obs_data_info = config['datasets']['reference']
-model_data_info = config['datasets']['targets']
+data_info = config['datasets']
 
 # Extract info we don't want to put into the loader config
 # Multiplying Factor to scale obs by
-multiplying_factor = np.ones(len(obs_data_info))
-for i, info in enumerate(obs_data_info):
-    if 'multiplying_factor' in info:
-        multiplying_factor[i] = info.pop('multiplying_factor')
-
-# If models are GCMs we can skip boundary check. Probably need to find a more
-# elegant way to express this in the config file API.
-boundary_check = True
-for i, info in enumerate(model_data_info):
-    if 'boundary_check' in info:
-        boundary_check = info.pop('boundary_check')
-
-""" Step 1: Load the observation data """
-print 'Loading observation datasets:\n',obs_data_info
-obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info)
-obs_names = [dataset.name for dataset in obs_datasets]
-for i, dataset in enumerate(obs_datasets):
+multiplying_factor = np.ones(len(data_info))
+for i, info in enumerate(reference_data_info):
+    multiplying_factor[i] = info.pop('multiplying_factor', 1)
+    
+""" Step 1: Load the datasets """
+print('Loading datasets:\n{}'.format(data_info))
+datasets = load_datasets_from_config(extra_opts, *data_info)
+names = [dataset.name for dataset in datasets]
+for i, dataset in enumerate(datasets):
     if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
-        obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
-                                                          temporal_resolution)
+        datasets[i] = dsp.normalize_dataset_datetimes(dataset,
+                                                      temporal_resolution)
+        datasets[i].values *= multiplying_factor[i]
 
-    if multiplying_factor[i] != 1:
-        obs_datasets[i].values *= multiplying_factor[i]
-
-""" Step 2: Load model NetCDF Files into OCW Dataset Objects """
-model_datasets = load_datasets_from_config(extra_opts, *model_data_info)
-model_names = [dataset.name for dataset in model_datasets]
-if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
-    for i, dataset in enumerate(model_datasets):
-        model_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
-                                                            
temporal_resolution)
-
-""" Step 3: Subset the data for temporal and spatial domain """
+""" Step 2: Subset the data for temporal and spatial domain """
 # Create a Bounds object to use for subsetting
 if time_info['maximum_overlap_period']:
-    start_time, end_time = 
utils.get_temporal_overlap(obs_datasets+model_datasets)
-    print 'Maximum overlap period'
-    print 'start_time:', start_time
-    print 'end_time:', end_time
+    start_time, end_time = utils.get_temporal_overlap(datasets)
+    print('Maximum overlap period')
+    print('start_time: {}'.format(start_time))
+    print('end_time: {}'.format(end_time))
 
 if temporal_resolution == 'monthly' and end_time.day !=1:
     end_time = end_time.replace(day=1)
 
-for i, dataset in enumerate(obs_datasets):
+for i, dataset in enumerate(datasets):
     min_lat = np.max([min_lat, dataset.lats.min()])
     max_lat = np.min([max_lat, dataset.lats.max()])
     min_lon = np.max([min_lon, dataset.lons.min()])
@@ -152,15 +134,10 @@ else:
                     start=start_time,
                     end=end_time)
 
-for i, dataset in enumerate(obs_datasets):
-    obs_datasets[i] = dsp.subset(dataset, bounds)
+for i, dataset in enumerate(datasets):
+    datasets[i] = dsp.subset(dataset, bounds)
     if dataset.temporal_resolution() != temporal_resolution:
-        obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
-
-for i, dataset in enumerate(model_datasets):
-    model_datasets[i] = dsp.subset(dataset, bounds)
-    if dataset.temporal_resolution() != temporal_resolution:
-        model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
+        datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
 
 # Temporally subset both observation and model datasets
 # for the user specified season
@@ -168,19 +145,21 @@ month_start = time_info['month_start']
 month_end = time_info['month_end']
 average_each_year = time_info['average_each_year']
 
-# TODO: Fully support multiple observation / reference datasets.
-# For now we will only use the first reference dataset listed in the config 
file
-obs_dataset = obs_datasets[0]
-obs_name = obs_names[0]
-obs_dataset = dsp.temporal_subset(obs_dataset,month_start, 
month_end,average_each_year)
-for i, dataset in enumerate(model_datasets):
-    model_datasets[i] = dsp.temporal_subset(dataset, month_start, month_end,
-                                            average_each_year)
+# For now we will treat the first listed dataset as the reference dataset for
+# evaluation purposes.
+for i, dataset in enumerate(datasets):
+    datasets[i] = dsp.temporal_subset(dataset, month_start, month_end,
+                                      average_each_year)
+
+reference_dataset = datasets[0]
+target_datasets = datasets[1:]
+reference_name = names[0]
+target_names = names[1:]
 
 # generate grid points for regridding
 if config['regrid']['regrid_on_reference']:
-    new_lat = obs_dataset.lats
-    new_lon = obs_dataset.lons
+    new_lat = reference_dataset.lats
+    new_lon = reference_dataset.lons
 else:
     delta_lat = config['regrid']['regrid_dlat']
     delta_lon = config['regrid']['regrid_dlon']
@@ -189,40 +168,45 @@ else:
     new_lat = np.linspace(min_lat, max_lat, nlat)
     new_lon = np.linspace(min_lon, max_lon, nlon)
 
-# number of models
-nmodel = len(model_datasets)
-print 'Dataset loading completed'
-print 'Observation data:', obs_name
-print 'Number of model datasets:',nmodel
-for model_name in model_names:
-    print model_name
-
-""" Step 4: Spatial regriding of the reference datasets """
-print 'Regridding datasets: ', config['regrid']
+# Get flag for boundary checking for regridding. By default, this is set to 
True
+# since the main intent of this program is to evaluate RCMs. However, it can be
+# used for GCMs in which case it should be set to False to save time.
+boundary_check = config['regrid'].get('boundary_check', True)
+
+# number of target datasets (usually models, but can also be obs / reanalysis)
+ntarget = len(target_datasets)
+print('Dataset loading completed')
+print('Reference data: {}'.format(reference_name))
+print('Number of target datasets: {}'.format(ntarget))
+for target_name in target_names:
+    print(target_name)
+
+""" Step 3: Spatial regriding of the datasets """
+print('Regridding datasets: {}'.format(config['regrid']))
 if not config['regrid']['regrid_on_reference']:
-    obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon)
-    print 'Reference dataset has been regridded'
-for i, dataset in enumerate(model_datasets):
-    model_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon,
+    reference_dataset = dsp.spatial_regrid(reference_dataset, new_lat, new_lon)
+    print('Reference dataset has been regridded')
+for i, dataset in enumerate(target_datasets):
+    target_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon,
                                            boundary_check=boundary_check)
-    print model_names[i]+' has been regridded'
-print 'Propagating missing data information'
-obs_dataset = dsp.mask_missing_data([obs_dataset]+model_datasets)[0]
-model_datasets = dsp.mask_missing_data([obs_dataset]+model_datasets)[1:]
-
-""" Step 5: Checking and converting variable units """
-print 'Checking and converting variable units'
-obs_dataset = dsp.variable_unit_conversion(obs_dataset)
-for idata,dataset in enumerate(model_datasets):
-    model_datasets[idata] = dsp.variable_unit_conversion(dataset)
-
-
-print 'Generating multi-model ensemble'
-if len(model_datasets) >= 2.:
-    model_datasets.append(dsp.ensemble(model_datasets))
-    model_names.append('ENS')
-
-""" Step 6: Generate subregion average and standard deviation """
+    print('{} has been regridded'.format(target_names[i]))
+print('Propagating missing data information')
+datasets = dsp.mask_missing_data([reference_dataset]+target_datasets)
+reference_dataset = datasets[0]
+target_datasets = datasets[1:]
+
+""" Step 4: Checking and converting variable units """
+print('Checking and converting variable units')
+reference_dataset = dsp.variable_unit_conversion(reference_dataset)
+for i, dataset in enumerate(target_datasets):
+    target_datasets[i] = dsp.variable_unit_conversion(dataset)
+
+print('Generating multi-model ensemble')
+if len(target_datasets) >= 2.:
+    target_datasets.append(dsp.ensemble(target_datasets))
+    target_names.append('ENS')
+
+""" Step 5: Generate subregion average and standard deviation """
 if config['use_subregions']:
     # sort the subregion by region names and make a list
     subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0))
@@ -230,94 +214,94 @@ if config['use_subregions']:
     # number of subregions
     nsubregion = len(subregions)
 
-    print ('Calculating spatial averages and standard deviations of ',
-        str(nsubregion),' subregions')
+    print('Calculating spatial averages and standard deviations of {} 
subregions'
+          .format(nsubregions))
 
-    obs_subregion_mean, obs_subregion_std, subregion_array = (
-        utils.calc_subregion_area_mean_and_std([obs_dataset], subregions))
-    model_subregion_mean, model_subregion_std, subregion_array = (
-        utils.calc_subregion_area_mean_and_std(model_datasets, subregions))
+    reference_subregion_mean, reference_subregion_std, subregion_array = (
+        utils.calc_subregion_area_mean_and_std([reference_dataset], 
subregions))
+    target_subregion_mean, target_subregion_std, subregion_array = (
+        utils.calc_subregion_area_mean_and_std(target_datasets, subregions))
 
-""" Step 7: Write a netCDF file """
+""" Step 6: Write a netCDF file """
 workdir = config['workdir']
 if workdir[-1] != '/':
     workdir = workdir+'/'
-print 'Writing a netcdf file: ',workdir+config['output_netcdf_filename']
+print('Writing a netcdf file: ',workdir+config['output_netcdf_filename'])
 if not os.path.exists(workdir):
     os.system("mkdir -p "+workdir)
 
 if config['use_subregions']:
     dsp.write_netcdf_multiple_datasets_with_subregions(
-        obs_dataset, obs_name, model_datasets, model_names,
+        reference_dataset, reference_name, target_datasets, target_names,
         path=workdir+config['output_netcdf_filename'],
         subregions=subregions, subregion_array=subregion_array,
-        ref_subregion_mean=obs_subregion_mean,
-        ref_subregion_std=obs_subregion_std,
-        model_subregion_mean=model_subregion_mean,
-        model_subregion_std=model_subregion_std)
+        ref_subregion_mean=reference_subregion_mean,
+        ref_subregion_std=reference_subregion_std,
+        target_subregion_mean=target_subregion_mean,
+        target_subregion_std=target_subregion_std)
 else:
     dsp.write_netcdf_multiple_datasets_with_subregions(
-                                obs_dataset, obs_name, model_datasets,
-                                model_names,
+                                reference_dataset, reference_name, 
target_datasets,
+                                target_names,
                                 path=workdir+config['output_netcdf_filename'])
 
-""" Step 8: Calculate metrics and draw plots """
+""" Step 7: Calculate metrics and draw plots """
 nmetrics = config['number_of_metrics_and_plots']
 if config['use_subregions']:
-    Map_plot_subregion(subregions, obs_dataset, workdir)
+    Map_plot_subregion(subregions, reference_dataset, workdir)
 
 if nmetrics > 0:
-    print 'Calculating metrics and generating plots'
+    print('Calculating metrics and generating plots')
     for imetric in np.arange(nmetrics)+1:
         metrics_name = config['metrics'+'%1d' %imetric]
         plot_info = config['plots'+'%1d' %imetric]
         file_name = workdir+plot_info['file_name']
 
-        print 'metrics '+str(imetric)+'/'+str(nmetrics)+': ', metrics_name
+        print('metrics {0}/{1}: {2}'.format(imetric, nmetrics, metrics_name))
         if metrics_name == 'Map_plot_bias_of_multiyear_climatology':
             row, column = plot_info['subplots_array']
             if 'map_projection' in plot_info.keys():
                 Map_plot_bias_of_multiyear_climatology(
-                    obs_dataset, obs_name, model_datasets, model_names,
+                    reference_dataset, reference_name, target_datasets, 
target_names,
                     file_name, row, column,
                     map_projection=plot_info['map_projection'])
             else:
                 Map_plot_bias_of_multiyear_climatology(
-                    obs_dataset, obs_name, model_datasets, model_names,
+                    reference_dataset, reference_name, target_datasets, 
target_names,
                     file_name, row, column)
         elif metrics_name == 
'Taylor_diagram_spatial_pattern_of_multiyear_climatology':
             Taylor_diagram_spatial_pattern_of_multiyear_climatology(
-                obs_dataset, obs_name, model_datasets, model_names,
+                reference_dataset, reference_name, target_datasets, 
target_names,
                 file_name)
         elif config['use_subregions']:
             if (metrics_name == 
'Timeseries_plot_subregion_interannual_variability'
                 and average_each_year):
                 row, column = plot_info['subplots_array']
                 Time_series_subregion(
-                    obs_subregion_mean, obs_name, model_subregion_mean,
-                    model_names, False, file_name, row, column,
+                    reference_subregion_mean, reference_name, 
target_subregion_mean,
+                    target_names, False, file_name, row, column,
                     x_tick=['Y'+str(i+1)
-                            for i in np.arange(model_subregion_mean.shape[1])])
+                            for i in 
np.arange(target_subregion_mean.shape[1])])
 
             if (metrics_name == 'Timeseries_plot_subregion_annual_cycle'
                 and not average_each_year and month_start==1 and 
month_end==12):
                 row, column = plot_info['subplots_array']
                 Time_series_subregion(
-                    obs_subregion_mean, obs_name,
-                    model_subregion_mean, model_names, True,
+                    reference_subregion_mean, reference_name,
+                    target_subregion_mean, target_names, True,
                     file_name, row, column,
                     x_tick=['J','F','M','A','M','J','J','A','S','O','N','D'])
 
             if (metrics_name == 
'Portrait_diagram_subregion_interannual_variability'
                 and average_each_year):
-                Portrait_diagram_subregion(obs_subregion_mean, obs_name,
-                                           model_subregion_mean, model_names,
+                Portrait_diagram_subregion(reference_subregion_mean, 
reference_name,
+                                           target_subregion_mean, target_names,
                                            False, file_name)
 
             if (metrics_name == 'Portrait_diagram_subregion_annual_cycle'
                 and not average_each_year and month_start==1 and 
month_end==12):
-                Portrait_diagram_subregion(obs_subregion_mean, obs_name,
-                                           model_subregion_mean, model_names,
+                Portrait_diagram_subregion(reference_subregion_mean, 
reference_name,
+                                           target_subregion_mean, target_names,
                                            True, file_name)
         else:
-            print 'please check the currently supported metrics'
+            print('please check the currently supported metrics')

[1/3] climate git commit: Update run_RCMES.py to treat obs and model datasets the same

Reply via email to