[1/3] climate git commit: CLIMATE-848 - Support CORDEX boundary options in RCMES

huikyole Wed, 10 Aug 2016 16:35:15 -0700

Repository: climate
Updated Branches:
  refs/heads/master 07db0a240 -> 05519e16c



CLIMATE-848 - Support CORDEX boundary options in RCMES

- Now users can provide configuration files with the CORDEX domain name to 
spatially subset datasets using RCMES.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/5622ff8b
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/5622ff8b
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/5622ff8b

Branch: refs/heads/master
Commit: 5622ff8bd595dfeaacbd29ab0176146dce490614
Parents: 07db0a2
Author: huikyole <huiky...@argo.jpl.nasa.gov>
Authored: Wed Aug 10 11:42:28 2016 -0700
Committer: huikyole <huiky...@argo.jpl.nasa.gov>
Committed: Wed Aug 10 11:42:28 2016 -0700

----------------------------------------------------------------------
 .../CMIP5_precipitation_evaluation.yaml         |  43 ++
 RCMES/run_RCMES.py                              | 632 ++++++++++---------
 ocw/dataset_processor.py                        |   3 +-
 ocw/utils.py                                    |  31 +-
 4 files changed, 382 insertions(+), 327 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/5622ff8b/RCMES/configuration_files/CMIP5_precipitation_evaluation.yaml
----------------------------------------------------------------------
diff --git a/RCMES/configuration_files/CMIP5_precipitation_evaluation.yaml 
b/RCMES/configuration_files/CMIP5_precipitation_evaluation.yaml
new file mode 100644
index 0000000..b970ebf
--- /dev/null
+++ b/RCMES/configuration_files/CMIP5_precipitation_evaluation.yaml
@@ -0,0 +1,43 @@
+workdir: ./CMIP5_examples/Central_America
+output_netcdf_filename: CMIP5_prec.nc
+
+# (RCMES will temporally subset data between month_start and month_end.
+# If average_each_year is True (False), seasonal mean in each year is (not) 
calculated and used for metrics calculation.)
+time:
+    maximum_overlap_period: False
+    start_time: 1998-01-01
+    end_time: 1999-12-31
+    temporal_resolution: monthly
+    month_start: 1
+    month_end: 12
+    average_each_year: Yes  
+
+space:
+    boundary_type: CORDEX Central America
+
+regrid:
+    regrid_on_reference: False
+    regrid_dlat: 0.44
+    regrid_dlon: 0.44
+
+datasets:
+    reference:
+        - loader_name: rcmed
+          name: TRMM
+          dataset_id: 3
+          parameter_id: 36
+
+    targets:
+        - loader_name: local
+          file_path: ./data/CMIP5/prec/CMIP5_*prec.nc
+          variable_name: prec
+
+number_of_metrics_and_plots: 1
+
+metrics1: Map_plot_bias_of_multiyear_climatology
+
+plots1:
+    file_name: CMIP5_precipitation_evaluation_using_TRMM
+    subplots_array: !!python/tuple [3,4]
+
+use_subregions: False

http://git-wip-us.apache.org/repos/asf/climate/blob/5622ff8b/RCMES/run_RCMES.py
----------------------------------------------------------------------
diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py
index 65d5e3c..7609865 100644
--- a/RCMES/run_RCMES.py
+++ b/RCMES/run_RCMES.py
@@ -1,315 +1,323 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import sys
-import ssl
-import yaml
-import operator
-from datetime import datetime
-from glob import glob
-from getpass import getpass
-import numpy as np
-import ocw.utils as utils
-import ocw.dataset_processor as dsp
-from ocw.dataset import Bounds
-from ocw.dataset_loader import DatasetLoader
-from metrics_and_plots import *
-
-def load_datasets_from_config(extra_opts, *loader_opts):
-    '''
-    Generic dataset loading function.
-    '''
-    for opt in loader_opts:
-        loader_name = opt['loader_name']
-        if loader_name == 'esgf':
-            if extra_opts['password'] is None:
-                extra_opts['username'] = raw_input('Enter your ESGF OpenID:\n')
-                extra_opts['password'] = getpass(
-                                        prompt='Enter your ESGF password:\n')
-
-            opt['esgf_username'] = extra_opts['username']
-            opt['esgf_password'] = extra_opts['password']
-        elif loader_name == 'rcmed':
-            opt['min_lat'] = extra_opts['min_lat']
-            opt['max_lat'] = extra_opts['max_lat']
-            opt['min_lon'] = extra_opts['min_lon']
-            opt['max_lon'] = extra_opts['max_lon']
-            opt['start_time'] = extra_opts['start_time']
-            opt['end_time'] = extra_opts['end_time']
-
-        loader = DatasetLoader(*loader_opts)
-        loader.load_datasets()
-        return loader.datasets
-
-if hasattr(ssl, '_create_unverified_context'):
-  ssl._create_default_https_context = ssl._create_unverified_context
-
-config_file = str(sys.argv[1])
-
-print 'Reading the configuration file ', config_file
-config = yaml.load(open(config_file))
-time_info = config['time']
-temporal_resolution = time_info['temporal_resolution']
-
-# Read time info
-start_time = 
datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d')
-end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d')
-
-# Read space info
-space_info = config['space']
-min_lat = space_info['min_lat']
-max_lat = space_info['max_lat']
-min_lon = space_info['min_lon']
-max_lon = space_info['max_lon']
-
-# Additional arguments for the DatasetLoader
-extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon,
-              'max_lon': max_lon, 'start_time': start_time,
-              'end_time': end_time, 'username': None, 'password': None}
-
-# Get the dataset loader options
-obs_data_info = config['datasets']['reference']
-model_data_info = config['datasets']['targets']
-
-# Extract info we don't want to put into the loader config
-# Multiplying Factor to scale obs by
-multiplying_factor = np.ones(len(obs_data_info))
-for i, info in enumerate(obs_data_info):
-    if 'multiplying_factor' in info:
-        multiplying_factor[i] = info.pop('multiplying_factor')
-
-# If models are GCMs we can skip boundary check. Probably need to find a more
-# elegant way to express this in the config file API.
-boundary_check = True
-for i, info in enumerate(model_data_info):
-    if 'boundary_check' in info:
-        boundary_check = info.pop('boundary_check')
-
-""" Step 1: Load the observation data """
-print 'Loading observation datasets:\n',obs_data_info
-obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info)
-obs_names = [dataset.name for dataset in obs_datasets]
-for i, dataset in enumerate(obs_datasets):
-    if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
-        obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
-                                                          temporal_resolution)
-
-    if multiplying_factor[i] != 1:
-        obs_dataset.values *= multiplying_factor[i]
-
-""" Step 2: Load model NetCDF Files into OCW Dataset Objects """
-model_datasets = load_datasets_from_config(extra_opts, *model_data_info)
-model_names = [dataset.name for dataset in model_datasets]
-if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
-    for i, dataset in enumerate(model_datasets):
-        model_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
-                                                            
temporal_resolution)
-
-""" Step 3: Subset the data for temporal and spatial domain """
-# Create a Bounds object to use for subsetting
-if time_info['maximum_overlap_period']:
-    start_time, end_time = 
utils.get_temporal_overlap(obs_datasets+model_datasets)
-    print 'Maximum overlap period'
-    print 'start_time:', start_time
-    print 'end_time:', end_time
-
-if temporal_resolution == 'monthly' and end_time.day !=1:
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import sys
+import ssl
+import yaml
+import operator
+from datetime import datetime
+from glob import glob
+from getpass import getpass
+import numpy as np
+import ocw.utils as utils
+import ocw.dataset_processor as dsp
+from ocw.dataset import Bounds
+from ocw.dataset_loader import DatasetLoader
+from metrics_and_plots import *
+
+def load_datasets_from_config(extra_opts, *loader_opts):
+    '''
+    Generic dataset loading function.
+    '''
+    for opt in loader_opts:
+        loader_name = opt['loader_name']
+        if loader_name == 'esgf':
+            if extra_opts['password'] is None:
+                extra_opts['username'] = raw_input('Enter your ESGF OpenID:\n')
+                extra_opts['password'] = getpass(
+                                        prompt='Enter your ESGF password:\n')
+
+            opt['esgf_username'] = extra_opts['username']
+            opt['esgf_password'] = extra_opts['password']
+        elif loader_name == 'rcmed':
+            opt['min_lat'] = extra_opts['min_lat']
+            opt['max_lat'] = extra_opts['max_lat']
+            opt['min_lon'] = extra_opts['min_lon']
+            opt['max_lon'] = extra_opts['max_lon']
+            opt['start_time'] = extra_opts['start_time']
+            opt['end_time'] = extra_opts['end_time']
+
+        loader = DatasetLoader(*loader_opts)
+        loader.load_datasets()
+        return loader.datasets
+
+if hasattr(ssl, '_create_unverified_context'):
+    ssl._create_default_https_context = ssl._create_unverified_context
+
+config_file = str(sys.argv[1])
+
+print 'Reading the configuration file ', config_file
+config = yaml.load(open(config_file))
+time_info = config['time']
+temporal_resolution = time_info['temporal_resolution']
+
+# Read time info
+start_time = 
datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d')
+end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d')
+
+# Read space info
+space_info = config['space']
+if not 'boundary_type' in space_info:
+    min_lat = space_info['min_lat']
+    max_lat = space_info['max_lat']
+    min_lon = space_info['min_lon']
+    max_lon = space_info['max_lon']
+else:
+    min_lat, max_lat, min_lon, max_lon = 
utils.CORDEX_boundary(space_info['boundary_type'][6:].replace(" ","").lower())
+
+# Additional arguments for the DatasetLoader
+extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon,
+              'max_lon': max_lon, 'start_time': start_time,
+              'end_time': end_time, 'username': None, 'password': None}
+
+# Get the dataset loader options
+obs_data_info = config['datasets']['reference']
+model_data_info = config['datasets']['targets']
+
+# Extract info we don't want to put into the loader config
+# Multiplying Factor to scale obs by
+multiplying_factor = np.ones(len(obs_data_info))
+for i, info in enumerate(obs_data_info):
+    if 'multiplying_factor' in info:
+        multiplying_factor[i] = info.pop('multiplying_factor')
+
+# If models are GCMs we can skip boundary check. Probably need to find a more
+# elegant way to express this in the config file API.
+boundary_check = True
+for i, info in enumerate(model_data_info):
+    if 'boundary_check' in info:
+        boundary_check = info.pop('boundary_check')
+
+""" Step 1: Load the observation data """
+print 'Loading observation datasets:\n',obs_data_info
+obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info)
+obs_names = [dataset.name for dataset in obs_datasets]
+for i, dataset in enumerate(obs_datasets):
+    if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
+        obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
+                                                          temporal_resolution)
+
+    if multiplying_factor[i] != 1:
+        obs_dataset.values *= multiplying_factor[i]
+
+""" Step 2: Load model NetCDF Files into OCW Dataset Objects """
+model_datasets = load_datasets_from_config(extra_opts, *model_data_info)
+model_names = [dataset.name for dataset in model_datasets]
+if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
+    for i, dataset in enumerate(model_datasets):
+        model_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
+                                                            
temporal_resolution)
+
+""" Step 3: Subset the data for temporal and spatial domain """
+# Create a Bounds object to use for subsetting
+if time_info['maximum_overlap_period']:
+    start_time, end_time = 
utils.get_temporal_overlap(obs_datasets+model_datasets)
+    print 'Maximum overlap period'
+    print 'start_time:', start_time
+    print 'end_time:', end_time
+
+if temporal_resolution == 'monthly' and end_time.day !=1:
     end_time = end_time.replace(day=1)
-
+
 for i, dataset in enumerate(obs_datasets):
-    min_lat = np.max([min_lat, dataset.lats.min()])
-    max_lat = np.min([max_lat, dataset.lats.max()])
-    min_lon = np.max([min_lon, dataset.lons.min()])
+    min_lat = np.max([min_lat, dataset.lats.min()])
+    max_lat = np.min([max_lat, dataset.lats.max()])
+    min_lon = np.max([min_lon, dataset.lons.min()])
     max_lon = np.min([max_lon, dataset.lons.max()])
-
-bounds = Bounds(lat_min=min_lat,
-                lat_max=max_lat,
-                lon_min=min_lon,
-                lon_max=max_lon,
-                start=start_time,
-                end=end_time)
-
-for i, dataset in enumerate(obs_datasets):
-    obs_datasets[i] = dsp.subset(dataset, bounds)
-    if dataset.temporal_resolution() != temporal_resolution:
-        obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
-
-for i, dataset in enumerate(model_datasets):
-    model_datasets[i] = dsp.subset(dataset, bounds)
-    if dataset.temporal_resolution() != temporal_resolution:
-        model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
-
-# Temporally subset both observation and model datasets
-# for the user specified season
-month_start = time_info['month_start']
-month_end = time_info['month_end']
-average_each_year = time_info['average_each_year']
-
-# TODO: Fully support multiple observation / reference datasets.
-# For now we will only use the first reference dataset listed in the config 
file
-obs_dataset = obs_datasets[0]
-obs_name = obs_names[0]
-obs_dataset = dsp.temporal_subset(obs_dataset,month_start, 
month_end,average_each_year)
-for i, dataset in enumerate(model_datasets):
-    model_datasets[i] = dsp.temporal_subset(dataset, month_start, month_end,
-                                            average_each_year)
-
-# generate grid points for regridding
-if config['regrid']['regrid_on_reference']:
-    new_lat = obs_dataset.lats
-    new_lon = obs_dataset.lons
-else:
-    delta_lat = config['regrid']['regrid_dlat']
-    delta_lon = config['regrid']['regrid_dlon']
-    nlat = (max_lat - min_lat)/delta_lat+1
-    nlon = (max_lon - min_lon)/delta_lon+1
-    new_lat = np.linspace(min_lat, max_lat, nlat)
-    new_lon = np.linspace(min_lon, max_lon, nlon)
-
-# number of models
-nmodel = len(model_datasets)
-print 'Dataset loading completed'
-print 'Observation data:', obs_name
-print 'Number of model datasets:',nmodel
-for model_name in model_names:
-    print model_name
-
-""" Step 4: Spatial regriding of the reference datasets """
-print 'Regridding datasets: ', config['regrid']
-if not config['regrid']['regrid_on_reference']:
-    obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon)
-    print 'Reference dataset has been regridded'
-for i, dataset in enumerate(model_datasets):
-    model_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon,
-                                           boundary_check=boundary_check)
-    print model_names[i]+' has been regridded'
-print 'Propagating missing data information'
-obs_dataset = dsp.mask_missing_data([obs_dataset]+model_datasets)[0]
-model_datasets = dsp.mask_missing_data([obs_dataset]+model_datasets)[1:]
-
-""" Step 5: Checking and converting variable units """
-print 'Checking and converting variable units'
-obs_dataset = dsp.variable_unit_conversion(obs_dataset)
-for idata,dataset in enumerate(model_datasets):
-    model_datasets[idata] = dsp.variable_unit_conversion(dataset)
-
-
-print 'Generating multi-model ensemble'
-if len(model_datasets) >= 2.:
-    model_datasets.append(dsp.ensemble(model_datasets))
-    model_names.append('ENS')
-
-""" Step 6: Generate subregion average and standard deviation """
-if config['use_subregions']:
-    # sort the subregion by region names and make a list
-    subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0))
-
-    # number of subregions
-    nsubregion = len(subregions)
-
-    print ('Calculating spatial averages and standard deviations of ',
-        str(nsubregion),' subregions')
-
-    obs_subregion_mean, obs_subregion_std, subregion_array = (
-        utils.calc_subregion_area_mean_and_std([obs_dataset], subregions))
-    model_subregion_mean, model_subregion_std, subregion_array = (
-        utils.calc_subregion_area_mean_and_std(model_datasets, subregions))
-
-""" Step 7: Write a netCDF file """
-workdir = config['workdir']
-if workdir[-1] != '/':
-    workdir = workdir+'/'
-print 'Writing a netcdf file: ',workdir+config['output_netcdf_filename']
-if not os.path.exists(workdir):
-    os.system("mkdir -p "+workdir)
-
-if config['use_subregions']:
-    dsp.write_netcdf_multiple_datasets_with_subregions(
-        obs_dataset, obs_name, model_datasets, model_names,
-        path=workdir+config['output_netcdf_filename'],
-        subregions=subregions, subregion_array=subregion_array,
-        ref_subregion_mean=obs_subregion_mean,
-        ref_subregion_std=obs_subregion_std,
-        model_subregion_mean=model_subregion_mean,
-        model_subregion_std=model_subregion_std)
-else:
-    dsp.write_netcdf_multiple_datasets_with_subregions(
-                                obs_dataset, obs_name, model_datasets,
-                                model_names,
-                                path=workdir+config['output_netcdf_filename'])
-
-""" Step 8: Calculate metrics and draw plots """
-nmetrics = config['number_of_metrics_and_plots']
-if config['use_subregions']:
-    Map_plot_subregion(subregions, obs_dataset, workdir)
-
-if nmetrics > 0:
-    print 'Calculating metrics and generating plots'
-    for imetric in np.arange(nmetrics)+1:
-        metrics_name = config['metrics'+'%1d' %imetric]
-        plot_info = config['plots'+'%1d' %imetric]
-        file_name = workdir+plot_info['file_name']
-
-        print 'metrics '+str(imetric)+'/'+str(nmetrics)+': ', metrics_name
-        if metrics_name == 'Map_plot_bias_of_multiyear_climatology':
-            row, column = plot_info['subplots_array']
-            if 'map_projection' in plot_info.keys():
-                Map_plot_bias_of_multiyear_climatology(
-                    obs_dataset, obs_name, model_datasets, model_names,
-                    file_name, row, column,
-                    map_projection=plot_info['map_projection'])
-            else:
-                Map_plot_bias_of_multiyear_climatology(
-                    obs_dataset, obs_name, model_datasets, model_names,
-                    file_name, row, column)
-        elif metrics_name == 
'Taylor_diagram_spatial_pattern_of_multiyear_climatology':
-            Taylor_diagram_spatial_pattern_of_multiyear_climatology(
-                obs_dataset, obs_name, model_datasets, model_names,
-                file_name)
-        elif config['use_subregions']:
-            if (metrics_name == 
'Timeseries_plot_subregion_interannual_variability'
-                and average_each_year):
-                row, column = plot_info['subplots_array']
-                Time_series_subregion(
-                    obs_subregion_mean, obs_name, model_subregion_mean,
-                    model_names, False, file_name, row, column,
-                    x_tick=['Y'+str(i+1)
-                            for i in np.arange(model_subregion_mean.shape[1])])
-
-            if (metrics_name == 'Timeseries_plot_subregion_annual_cycle'
-                and not average_each_year and month_start==1 and 
month_end==12):
-                row, column = plot_info['subplots_array']
-                Time_series_subregion(
-                    obs_subregion_mean, obs_name,
-                    model_subregion_mean, model_names, True,
-                    file_name, row, column,
-                    x_tick=['J','F','M','A','M','J','J','A','S','O','N','D'])
-
-            if (metrics_name == 
'Portrait_diagram_subregion_interannual_variability'
-                and average_each_year):
-                Portrait_diagram_subregion(obs_subregion_mean, obs_name,
-                                           model_subregion_mean, model_names,
-                                           False, file_name)
-
-            if (metrics_name == 'Portrait_diagram_subregion_annual_cycle'
-                and not average_each_year and month_start==1 and 
month_end==12):
-                Portrait_diagram_subregion(obs_subregion_mean, obs_name,
-                                           model_subregion_mean, model_names,
-                                           True, file_name)
-        else:
-            print 'please check the currently supported metrics'
+
+if not 'boundary_type' in space_info:
+    bounds = Bounds(lat_min=min_lat,
+                    lat_max=max_lat,
+                    lon_min=min_lon,
+                    lon_max=max_lon,
+                    start=start_time,
+                    end=end_time)
+else:
+    bounds = Bounds(boundary_type=space_info['boundary_type'], 
+                    start=start_time,
+                    end=end_time)
+
+for i, dataset in enumerate(obs_datasets):
+    obs_datasets[i] = dsp.subset(dataset, bounds)
+    if dataset.temporal_resolution() != temporal_resolution:
+        obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
+
+for i, dataset in enumerate(model_datasets):
+    model_datasets[i] = dsp.subset(dataset, bounds)
+    if dataset.temporal_resolution() != temporal_resolution:
+        model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
+
+# Temporally subset both observation and model datasets
+# for the user specified season
+month_start = time_info['month_start']
+month_end = time_info['month_end']
+average_each_year = time_info['average_each_year']
+
+# TODO: Fully support multiple observation / reference datasets.
+# For now we will only use the first reference dataset listed in the config 
file
+obs_dataset = obs_datasets[0]
+obs_name = obs_names[0]
+obs_dataset = dsp.temporal_subset(obs_dataset,month_start, 
month_end,average_each_year)
+for i, dataset in enumerate(model_datasets):
+    model_datasets[i] = dsp.temporal_subset(dataset, month_start, month_end,
+                                            average_each_year)
+
+# generate grid points for regridding
+if config['regrid']['regrid_on_reference']:
+    new_lat = obs_dataset.lats
+    new_lon = obs_dataset.lons
+else:
+    delta_lat = config['regrid']['regrid_dlat']
+    delta_lon = config['regrid']['regrid_dlon']
+    nlat = (max_lat - min_lat)/delta_lat+1
+    nlon = (max_lon - min_lon)/delta_lon+1
+    new_lat = np.linspace(min_lat, max_lat, nlat)
+    new_lon = np.linspace(min_lon, max_lon, nlon)
+
+# number of models
+nmodel = len(model_datasets)
+print 'Dataset loading completed'
+print 'Observation data:', obs_name
+print 'Number of model datasets:',nmodel
+for model_name in model_names:
+    print model_name
+
+""" Step 4: Spatial regriding of the reference datasets """
+print 'Regridding datasets: ', config['regrid']
+if not config['regrid']['regrid_on_reference']:
+    obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon)
+    print 'Reference dataset has been regridded'
+for i, dataset in enumerate(model_datasets):
+    model_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon,
+                                           boundary_check=boundary_check)
+    print model_names[i]+' has been regridded'
+print 'Propagating missing data information'
+obs_dataset = dsp.mask_missing_data([obs_dataset]+model_datasets)[0]
+model_datasets = dsp.mask_missing_data([obs_dataset]+model_datasets)[1:]
+
+""" Step 5: Checking and converting variable units """
+print 'Checking and converting variable units'
+obs_dataset = dsp.variable_unit_conversion(obs_dataset)
+for idata,dataset in enumerate(model_datasets):
+    model_datasets[idata] = dsp.variable_unit_conversion(dataset)
+
+
+print 'Generating multi-model ensemble'
+if len(model_datasets) >= 2.:
+    model_datasets.append(dsp.ensemble(model_datasets))
+    model_names.append('ENS')
+
+""" Step 6: Generate subregion average and standard deviation """
+if config['use_subregions']:
+    # sort the subregion by region names and make a list
+    subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0))
+
+    # number of subregions
+    nsubregion = len(subregions)
+
+    print ('Calculating spatial averages and standard deviations of ',
+        str(nsubregion),' subregions')
+
+    obs_subregion_mean, obs_subregion_std, subregion_array = (
+        utils.calc_subregion_area_mean_and_std([obs_dataset], subregions))
+    model_subregion_mean, model_subregion_std, subregion_array = (
+        utils.calc_subregion_area_mean_and_std(model_datasets, subregions))
+
+""" Step 7: Write a netCDF file """
+workdir = config['workdir']
+if workdir[-1] != '/':
+    workdir = workdir+'/'
+print 'Writing a netcdf file: ',workdir+config['output_netcdf_filename']
+if not os.path.exists(workdir):
+    os.system("mkdir -p "+workdir)
+
+if config['use_subregions']:
+    dsp.write_netcdf_multiple_datasets_with_subregions(
+        obs_dataset, obs_name, model_datasets, model_names,
+        path=workdir+config['output_netcdf_filename'],
+        subregions=subregions, subregion_array=subregion_array,
+        ref_subregion_mean=obs_subregion_mean,
+        ref_subregion_std=obs_subregion_std,
+        model_subregion_mean=model_subregion_mean,
+        model_subregion_std=model_subregion_std)
+else:
+    dsp.write_netcdf_multiple_datasets_with_subregions(
+                                obs_dataset, obs_name, model_datasets,
+                                model_names,
+                                path=workdir+config['output_netcdf_filename'])
+
+""" Step 8: Calculate metrics and draw plots """
+nmetrics = config['number_of_metrics_and_plots']
+if config['use_subregions']:
+    Map_plot_subregion(subregions, obs_dataset, workdir)
+
+if nmetrics > 0:
+    print 'Calculating metrics and generating plots'
+    for imetric in np.arange(nmetrics)+1:
+        metrics_name = config['metrics'+'%1d' %imetric]
+        plot_info = config['plots'+'%1d' %imetric]
+        file_name = workdir+plot_info['file_name']
+
+        print 'metrics '+str(imetric)+'/'+str(nmetrics)+': ', metrics_name
+        if metrics_name == 'Map_plot_bias_of_multiyear_climatology':
+            row, column = plot_info['subplots_array']
+            if 'map_projection' in plot_info.keys():
+                Map_plot_bias_of_multiyear_climatology(
+                    obs_dataset, obs_name, model_datasets, model_names,
+                    file_name, row, column,
+                    map_projection=plot_info['map_projection'])
+            else:
+                Map_plot_bias_of_multiyear_climatology(
+                    obs_dataset, obs_name, model_datasets, model_names,
+                    file_name, row, column)
+        elif metrics_name == 
'Taylor_diagram_spatial_pattern_of_multiyear_climatology':
+            Taylor_diagram_spatial_pattern_of_multiyear_climatology(
+                obs_dataset, obs_name, model_datasets, model_names,
+                file_name)
+        elif config['use_subregions']:
+            if (metrics_name == 
'Timeseries_plot_subregion_interannual_variability'
+                and average_each_year):
+                row, column = plot_info['subplots_array']
+                Time_series_subregion(
+                    obs_subregion_mean, obs_name, model_subregion_mean,
+                    model_names, False, file_name, row, column,
+                    x_tick=['Y'+str(i+1)
+                            for i in np.arange(model_subregion_mean.shape[1])])
+
+            if (metrics_name == 'Timeseries_plot_subregion_annual_cycle'
+                and not average_each_year and month_start==1 and 
month_end==12):
+                row, column = plot_info['subplots_array']
+                Time_series_subregion(
+                    obs_subregion_mean, obs_name,
+                    model_subregion_mean, model_names, True,
+                    file_name, row, column,
+                    x_tick=['J','F','M','A','M','J','J','A','S','O','N','D'])
+
+            if (metrics_name == 
'Portrait_diagram_subregion_interannual_variability'
+                and average_each_year):
+                Portrait_diagram_subregion(obs_subregion_mean, obs_name,
+                                           model_subregion_mean, model_names,
+                                           False, file_name)
+
+            if (metrics_name == 'Portrait_diagram_subregion_annual_cycle'
+                and not average_each_year and month_start==1 and 
month_end==12):
+                Portrait_diagram_subregion(obs_subregion_mean, obs_name,
+                                           model_subregion_mean, model_names,
+                                           True, file_name)
+        else:
+            print 'please check the currently supported metrics'

http://git-wip-us.apache.org/repos/asf/climate/blob/5622ff8b/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 7379ef4..79a7196 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -396,7 +396,8 @@ def subset(target_dataset, subregion, subregion_name=None, 
extract=True, user_ma
 
     if hasattr(subregion, 'lat_min'):
         # If boundary_type is 'rectangular' or 'CORDEX ***', ensure that the 
subregion information is well formed
-        _are_bounds_contained_by_dataset(target_dataset, subregion)
+        # this boundary check may not be necessary with the updated Bounds and 
subset
+        #_are_bounds_contained_by_dataset(target_dataset, subregion)   # this 
boundary check is not necessary with the updated Bounds and subset
 
         if target_dataset.lats.ndim == 2 and target_dataset.lons.ndim == 2:
             start_time_index = np.where(

http://git-wip-us.apache.org/repos/asf/climate/blob/5622ff8b/ocw/utils.py
----------------------------------------------------------------------
diff --git a/ocw/utils.py b/ocw/utils.py
index 1820786..3de1776 100755
--- a/ocw/utils.py
+++ b/ocw/utils.py
@@ -483,33 +483,36 @@ def CORDEX_boundary(domain_name):
     '''
     if domain_name =='southamerica':
         return -57.61, 18.50, 254.28-360., 343.02-360.
-    if domain_name =='centralamerica':
+    elif domain_name =='centralamerica':
         return -19.46, 34.83, 235.74-360., 337.78-360.
-    if domain_name =='northamerica':
+    elif domain_name =='northamerica':
         return  12.55, 75.88, 189.26-360., 336.74-360.
-    if domain_name =='europe':
+    elif domain_name =='europe':
         return  22.20, 71.84, 338.23-360., 64.4
-    if domain_name =='africa':
+    elif domain_name =='africa':
         return -45.76, 42.24, 335.36-360., 60.28
-    if domain_name =='southasia':
+    elif domain_name =='southasia':
         return -15.23, 45.07, 19.88, 115.55
-    if domain_name =='eastasia':
+    elif domain_name =='eastasia':
         return  -0.10, 61.90, 51.59, 179.99
-    if domain_name =='centralasia':
+    elif domain_name =='centralasia':
         return  18.34, 69.37, 11.05, 139.13
-    if domain_name =='australasia':
+    elif domain_name =='australasia':
         return -52.36, 12.21, 89.25, 179.99
-    if domain_name =='antartica':
+    elif domain_name =='antartica':
         return -89.48,-56.00, -179.00, 179.00
-    if domain_name =='artic':
+    elif domain_name =='artic':
         return 46.06, 89.50, -179.00, 179.00
-    if domain_name =='mediterranean':
+    elif domain_name =='mediterranean':
         return  25.63, 56.66, 339.79-360.00, 50.85
-    if domain_name =='middleeastnorthafrica':
+    elif domain_name =='middleeastnorthafrica':
         return  -7.00, 45.00, 333.00-360.00, 76.00
-    if domain_name =='southeastasia':
+    elif domain_name =='southeastasia':
         return  -15.14, 27.26, 89.26, 146.96
-
+    else:
+        err ="Invalid CORDEX domain name"
+        raise ValueError(err)
+       
 def mask_using_shapefile_info(lons, lats, masked_regions, extract = True):
     if lons.ndim == 2 and lats.ndim == 2:
         lons_2d = lons

[1/3] climate git commit: CLIMATE-848 - Support CORDEX boundary options in RCMES

Reply via email to