Repository: climate Updated Branches: refs/heads/master 07db0a240 -> 05519e16c
CLIMATE-848 - Support CORDEX boundary options in RCMES - Now users can provide configuration files with the CORDEX domain name to spatially subset datasets using RCMES. Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/5622ff8b Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/5622ff8b Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/5622ff8b Branch: refs/heads/master Commit: 5622ff8bd595dfeaacbd29ab0176146dce490614 Parents: 07db0a2 Author: huikyole <huiky...@argo.jpl.nasa.gov> Authored: Wed Aug 10 11:42:28 2016 -0700 Committer: huikyole <huiky...@argo.jpl.nasa.gov> Committed: Wed Aug 10 11:42:28 2016 -0700 ---------------------------------------------------------------------- .../CMIP5_precipitation_evaluation.yaml | 43 ++ RCMES/run_RCMES.py | 632 ++++++++++--------- ocw/dataset_processor.py | 3 +- ocw/utils.py | 31 +- 4 files changed, 382 insertions(+), 327 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/5622ff8b/RCMES/configuration_files/CMIP5_precipitation_evaluation.yaml ---------------------------------------------------------------------- diff --git a/RCMES/configuration_files/CMIP5_precipitation_evaluation.yaml b/RCMES/configuration_files/CMIP5_precipitation_evaluation.yaml new file mode 100644 index 0000000..b970ebf --- /dev/null +++ b/RCMES/configuration_files/CMIP5_precipitation_evaluation.yaml @@ -0,0 +1,43 @@ +workdir: ./CMIP5_examples/Central_America +output_netcdf_filename: CMIP5_prec.nc + +# (RCMES will temporally subset data between month_start and month_end. +# If average_each_year is True (False), seasonal mean in each year is (not) calculated and used for metrics calculation.) +time: + maximum_overlap_period: False + start_time: 1998-01-01 + end_time: 1999-12-31 + temporal_resolution: monthly + month_start: 1 + month_end: 12 + average_each_year: Yes + +space: + boundary_type: CORDEX Central America + +regrid: + regrid_on_reference: False + regrid_dlat: 0.44 + regrid_dlon: 0.44 + +datasets: + reference: + - loader_name: rcmed + name: TRMM + dataset_id: 3 + parameter_id: 36 + + targets: + - loader_name: local + file_path: ./data/CMIP5/prec/CMIP5_*prec.nc + variable_name: prec + +number_of_metrics_and_plots: 1 + +metrics1: Map_plot_bias_of_multiyear_climatology + +plots1: + file_name: CMIP5_precipitation_evaluation_using_TRMM + subplots_array: !!python/tuple [3,4] + +use_subregions: False http://git-wip-us.apache.org/repos/asf/climate/blob/5622ff8b/RCMES/run_RCMES.py ---------------------------------------------------------------------- diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py index 65d5e3c..7609865 100644 --- a/RCMES/run_RCMES.py +++ b/RCMES/run_RCMES.py @@ -1,315 +1,323 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import os -import sys -import ssl -import yaml -import operator -from datetime import datetime -from glob import glob -from getpass import getpass -import numpy as np -import ocw.utils as utils -import ocw.dataset_processor as dsp -from ocw.dataset import Bounds -from ocw.dataset_loader import DatasetLoader -from metrics_and_plots import * - -def load_datasets_from_config(extra_opts, *loader_opts): - ''' - Generic dataset loading function. - ''' - for opt in loader_opts: - loader_name = opt['loader_name'] - if loader_name == 'esgf': - if extra_opts['password'] is None: - extra_opts['username'] = raw_input('Enter your ESGF OpenID:\n') - extra_opts['password'] = getpass( - prompt='Enter your ESGF password:\n') - - opt['esgf_username'] = extra_opts['username'] - opt['esgf_password'] = extra_opts['password'] - elif loader_name == 'rcmed': - opt['min_lat'] = extra_opts['min_lat'] - opt['max_lat'] = extra_opts['max_lat'] - opt['min_lon'] = extra_opts['min_lon'] - opt['max_lon'] = extra_opts['max_lon'] - opt['start_time'] = extra_opts['start_time'] - opt['end_time'] = extra_opts['end_time'] - - loader = DatasetLoader(*loader_opts) - loader.load_datasets() - return loader.datasets - -if hasattr(ssl, '_create_unverified_context'): - ssl._create_default_https_context = ssl._create_unverified_context - -config_file = str(sys.argv[1]) - -print 'Reading the configuration file ', config_file -config = yaml.load(open(config_file)) -time_info = config['time'] -temporal_resolution = time_info['temporal_resolution'] - -# Read time info -start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') -end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') - -# Read space info -space_info = config['space'] -min_lat = space_info['min_lat'] -max_lat = space_info['max_lat'] -min_lon = space_info['min_lon'] -max_lon = space_info['max_lon'] - -# Additional arguments for the DatasetLoader -extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, - 'max_lon': max_lon, 'start_time': start_time, - 'end_time': end_time, 'username': None, 'password': None} - -# Get the dataset loader options -obs_data_info = config['datasets']['reference'] -model_data_info = config['datasets']['targets'] - -# Extract info we don't want to put into the loader config -# Multiplying Factor to scale obs by -multiplying_factor = np.ones(len(obs_data_info)) -for i, info in enumerate(obs_data_info): - if 'multiplying_factor' in info: - multiplying_factor[i] = info.pop('multiplying_factor') - -# If models are GCMs we can skip boundary check. Probably need to find a more -# elegant way to express this in the config file API. -boundary_check = True -for i, info in enumerate(model_data_info): - if 'boundary_check' in info: - boundary_check = info.pop('boundary_check') - -""" Step 1: Load the observation data """ -print 'Loading observation datasets:\n',obs_data_info -obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info) -obs_names = [dataset.name for dataset in obs_datasets] -for i, dataset in enumerate(obs_datasets): - if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset, - temporal_resolution) - - if multiplying_factor[i] != 1: - obs_dataset.values *= multiplying_factor[i] - -""" Step 2: Load model NetCDF Files into OCW Dataset Objects """ -model_datasets = load_datasets_from_config(extra_opts, *model_data_info) -model_names = [dataset.name for dataset in model_datasets] -if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - for i, dataset in enumerate(model_datasets): - model_datasets[i] = dsp.normalize_dataset_datetimes(dataset, - temporal_resolution) - -""" Step 3: Subset the data for temporal and spatial domain """ -# Create a Bounds object to use for subsetting -if time_info['maximum_overlap_period']: - start_time, end_time = utils.get_temporal_overlap(obs_datasets+model_datasets) - print 'Maximum overlap period' - print 'start_time:', start_time - print 'end_time:', end_time - -if temporal_resolution == 'monthly' and end_time.day !=1: +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import sys +import ssl +import yaml +import operator +from datetime import datetime +from glob import glob +from getpass import getpass +import numpy as np +import ocw.utils as utils +import ocw.dataset_processor as dsp +from ocw.dataset import Bounds +from ocw.dataset_loader import DatasetLoader +from metrics_and_plots import * + +def load_datasets_from_config(extra_opts, *loader_opts): + ''' + Generic dataset loading function. + ''' + for opt in loader_opts: + loader_name = opt['loader_name'] + if loader_name == 'esgf': + if extra_opts['password'] is None: + extra_opts['username'] = raw_input('Enter your ESGF OpenID:\n') + extra_opts['password'] = getpass( + prompt='Enter your ESGF password:\n') + + opt['esgf_username'] = extra_opts['username'] + opt['esgf_password'] = extra_opts['password'] + elif loader_name == 'rcmed': + opt['min_lat'] = extra_opts['min_lat'] + opt['max_lat'] = extra_opts['max_lat'] + opt['min_lon'] = extra_opts['min_lon'] + opt['max_lon'] = extra_opts['max_lon'] + opt['start_time'] = extra_opts['start_time'] + opt['end_time'] = extra_opts['end_time'] + + loader = DatasetLoader(*loader_opts) + loader.load_datasets() + return loader.datasets + +if hasattr(ssl, '_create_unverified_context'): + ssl._create_default_https_context = ssl._create_unverified_context + +config_file = str(sys.argv[1]) + +print 'Reading the configuration file ', config_file +config = yaml.load(open(config_file)) +time_info = config['time'] +temporal_resolution = time_info['temporal_resolution'] + +# Read time info +start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') +end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') + +# Read space info +space_info = config['space'] +if not 'boundary_type' in space_info: + min_lat = space_info['min_lat'] + max_lat = space_info['max_lat'] + min_lon = space_info['min_lon'] + max_lon = space_info['max_lon'] +else: + min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(space_info['boundary_type'][6:].replace(" ","").lower()) + +# Additional arguments for the DatasetLoader +extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, + 'max_lon': max_lon, 'start_time': start_time, + 'end_time': end_time, 'username': None, 'password': None} + +# Get the dataset loader options +obs_data_info = config['datasets']['reference'] +model_data_info = config['datasets']['targets'] + +# Extract info we don't want to put into the loader config +# Multiplying Factor to scale obs by +multiplying_factor = np.ones(len(obs_data_info)) +for i, info in enumerate(obs_data_info): + if 'multiplying_factor' in info: + multiplying_factor[i] = info.pop('multiplying_factor') + +# If models are GCMs we can skip boundary check. Probably need to find a more +# elegant way to express this in the config file API. +boundary_check = True +for i, info in enumerate(model_data_info): + if 'boundary_check' in info: + boundary_check = info.pop('boundary_check') + +""" Step 1: Load the observation data """ +print 'Loading observation datasets:\n',obs_data_info +obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info) +obs_names = [dataset.name for dataset in obs_datasets] +for i, dataset in enumerate(obs_datasets): + if temporal_resolution == 'daily' or temporal_resolution == 'monthly': + obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset, + temporal_resolution) + + if multiplying_factor[i] != 1: + obs_dataset.values *= multiplying_factor[i] + +""" Step 2: Load model NetCDF Files into OCW Dataset Objects """ +model_datasets = load_datasets_from_config(extra_opts, *model_data_info) +model_names = [dataset.name for dataset in model_datasets] +if temporal_resolution == 'daily' or temporal_resolution == 'monthly': + for i, dataset in enumerate(model_datasets): + model_datasets[i] = dsp.normalize_dataset_datetimes(dataset, + temporal_resolution) + +""" Step 3: Subset the data for temporal and spatial domain """ +# Create a Bounds object to use for subsetting +if time_info['maximum_overlap_period']: + start_time, end_time = utils.get_temporal_overlap(obs_datasets+model_datasets) + print 'Maximum overlap period' + print 'start_time:', start_time + print 'end_time:', end_time + +if temporal_resolution == 'monthly' and end_time.day !=1: end_time = end_time.replace(day=1) - + for i, dataset in enumerate(obs_datasets): - min_lat = np.max([min_lat, dataset.lats.min()]) - max_lat = np.min([max_lat, dataset.lats.max()]) - min_lon = np.max([min_lon, dataset.lons.min()]) + min_lat = np.max([min_lat, dataset.lats.min()]) + max_lat = np.min([max_lat, dataset.lats.max()]) + min_lon = np.max([min_lon, dataset.lons.min()]) max_lon = np.min([max_lon, dataset.lons.max()]) - -bounds = Bounds(lat_min=min_lat, - lat_max=max_lat, - lon_min=min_lon, - lon_max=max_lon, - start=start_time, - end=end_time) - -for i, dataset in enumerate(obs_datasets): - obs_datasets[i] = dsp.subset(dataset, bounds) - if dataset.temporal_resolution() != temporal_resolution: - obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) - -for i, dataset in enumerate(model_datasets): - model_datasets[i] = dsp.subset(dataset, bounds) - if dataset.temporal_resolution() != temporal_resolution: - model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) - -# Temporally subset both observation and model datasets -# for the user specified season -month_start = time_info['month_start'] -month_end = time_info['month_end'] -average_each_year = time_info['average_each_year'] - -# TODO: Fully support multiple observation / reference datasets. -# For now we will only use the first reference dataset listed in the config file -obs_dataset = obs_datasets[0] -obs_name = obs_names[0] -obs_dataset = dsp.temporal_subset(obs_dataset,month_start, month_end,average_each_year) -for i, dataset in enumerate(model_datasets): - model_datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, - average_each_year) - -# generate grid points for regridding -if config['regrid']['regrid_on_reference']: - new_lat = obs_dataset.lats - new_lon = obs_dataset.lons -else: - delta_lat = config['regrid']['regrid_dlat'] - delta_lon = config['regrid']['regrid_dlon'] - nlat = (max_lat - min_lat)/delta_lat+1 - nlon = (max_lon - min_lon)/delta_lon+1 - new_lat = np.linspace(min_lat, max_lat, nlat) - new_lon = np.linspace(min_lon, max_lon, nlon) - -# number of models -nmodel = len(model_datasets) -print 'Dataset loading completed' -print 'Observation data:', obs_name -print 'Number of model datasets:',nmodel -for model_name in model_names: - print model_name - -""" Step 4: Spatial regriding of the reference datasets """ -print 'Regridding datasets: ', config['regrid'] -if not config['regrid']['regrid_on_reference']: - obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon) - print 'Reference dataset has been regridded' -for i, dataset in enumerate(model_datasets): - model_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, - boundary_check=boundary_check) - print model_names[i]+' has been regridded' -print 'Propagating missing data information' -obs_dataset = dsp.mask_missing_data([obs_dataset]+model_datasets)[0] -model_datasets = dsp.mask_missing_data([obs_dataset]+model_datasets)[1:] - -""" Step 5: Checking and converting variable units """ -print 'Checking and converting variable units' -obs_dataset = dsp.variable_unit_conversion(obs_dataset) -for idata,dataset in enumerate(model_datasets): - model_datasets[idata] = dsp.variable_unit_conversion(dataset) - - -print 'Generating multi-model ensemble' -if len(model_datasets) >= 2.: - model_datasets.append(dsp.ensemble(model_datasets)) - model_names.append('ENS') - -""" Step 6: Generate subregion average and standard deviation """ -if config['use_subregions']: - # sort the subregion by region names and make a list - subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0)) - - # number of subregions - nsubregion = len(subregions) - - print ('Calculating spatial averages and standard deviations of ', - str(nsubregion),' subregions') - - obs_subregion_mean, obs_subregion_std, subregion_array = ( - utils.calc_subregion_area_mean_and_std([obs_dataset], subregions)) - model_subregion_mean, model_subregion_std, subregion_array = ( - utils.calc_subregion_area_mean_and_std(model_datasets, subregions)) - -""" Step 7: Write a netCDF file """ -workdir = config['workdir'] -if workdir[-1] != '/': - workdir = workdir+'/' -print 'Writing a netcdf file: ',workdir+config['output_netcdf_filename'] -if not os.path.exists(workdir): - os.system("mkdir -p "+workdir) - -if config['use_subregions']: - dsp.write_netcdf_multiple_datasets_with_subregions( - obs_dataset, obs_name, model_datasets, model_names, - path=workdir+config['output_netcdf_filename'], - subregions=subregions, subregion_array=subregion_array, - ref_subregion_mean=obs_subregion_mean, - ref_subregion_std=obs_subregion_std, - model_subregion_mean=model_subregion_mean, - model_subregion_std=model_subregion_std) -else: - dsp.write_netcdf_multiple_datasets_with_subregions( - obs_dataset, obs_name, model_datasets, - model_names, - path=workdir+config['output_netcdf_filename']) - -""" Step 8: Calculate metrics and draw plots """ -nmetrics = config['number_of_metrics_and_plots'] -if config['use_subregions']: - Map_plot_subregion(subregions, obs_dataset, workdir) - -if nmetrics > 0: - print 'Calculating metrics and generating plots' - for imetric in np.arange(nmetrics)+1: - metrics_name = config['metrics'+'%1d' %imetric] - plot_info = config['plots'+'%1d' %imetric] - file_name = workdir+plot_info['file_name'] - - print 'metrics '+str(imetric)+'/'+str(nmetrics)+': ', metrics_name - if metrics_name == 'Map_plot_bias_of_multiyear_climatology': - row, column = plot_info['subplots_array'] - if 'map_projection' in plot_info.keys(): - Map_plot_bias_of_multiyear_climatology( - obs_dataset, obs_name, model_datasets, model_names, - file_name, row, column, - map_projection=plot_info['map_projection']) - else: - Map_plot_bias_of_multiyear_climatology( - obs_dataset, obs_name, model_datasets, model_names, - file_name, row, column) - elif metrics_name == 'Taylor_diagram_spatial_pattern_of_multiyear_climatology': - Taylor_diagram_spatial_pattern_of_multiyear_climatology( - obs_dataset, obs_name, model_datasets, model_names, - file_name) - elif config['use_subregions']: - if (metrics_name == 'Timeseries_plot_subregion_interannual_variability' - and average_each_year): - row, column = plot_info['subplots_array'] - Time_series_subregion( - obs_subregion_mean, obs_name, model_subregion_mean, - model_names, False, file_name, row, column, - x_tick=['Y'+str(i+1) - for i in np.arange(model_subregion_mean.shape[1])]) - - if (metrics_name == 'Timeseries_plot_subregion_annual_cycle' - and not average_each_year and month_start==1 and month_end==12): - row, column = plot_info['subplots_array'] - Time_series_subregion( - obs_subregion_mean, obs_name, - model_subregion_mean, model_names, True, - file_name, row, column, - x_tick=['J','F','M','A','M','J','J','A','S','O','N','D']) - - if (metrics_name == 'Portrait_diagram_subregion_interannual_variability' - and average_each_year): - Portrait_diagram_subregion(obs_subregion_mean, obs_name, - model_subregion_mean, model_names, - False, file_name) - - if (metrics_name == 'Portrait_diagram_subregion_annual_cycle' - and not average_each_year and month_start==1 and month_end==12): - Portrait_diagram_subregion(obs_subregion_mean, obs_name, - model_subregion_mean, model_names, - True, file_name) - else: - print 'please check the currently supported metrics' + +if not 'boundary_type' in space_info: + bounds = Bounds(lat_min=min_lat, + lat_max=max_lat, + lon_min=min_lon, + lon_max=max_lon, + start=start_time, + end=end_time) +else: + bounds = Bounds(boundary_type=space_info['boundary_type'], + start=start_time, + end=end_time) + +for i, dataset in enumerate(obs_datasets): + obs_datasets[i] = dsp.subset(dataset, bounds) + if dataset.temporal_resolution() != temporal_resolution: + obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) + +for i, dataset in enumerate(model_datasets): + model_datasets[i] = dsp.subset(dataset, bounds) + if dataset.temporal_resolution() != temporal_resolution: + model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) + +# Temporally subset both observation and model datasets +# for the user specified season +month_start = time_info['month_start'] +month_end = time_info['month_end'] +average_each_year = time_info['average_each_year'] + +# TODO: Fully support multiple observation / reference datasets. +# For now we will only use the first reference dataset listed in the config file +obs_dataset = obs_datasets[0] +obs_name = obs_names[0] +obs_dataset = dsp.temporal_subset(obs_dataset,month_start, month_end,average_each_year) +for i, dataset in enumerate(model_datasets): + model_datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, + average_each_year) + +# generate grid points for regridding +if config['regrid']['regrid_on_reference']: + new_lat = obs_dataset.lats + new_lon = obs_dataset.lons +else: + delta_lat = config['regrid']['regrid_dlat'] + delta_lon = config['regrid']['regrid_dlon'] + nlat = (max_lat - min_lat)/delta_lat+1 + nlon = (max_lon - min_lon)/delta_lon+1 + new_lat = np.linspace(min_lat, max_lat, nlat) + new_lon = np.linspace(min_lon, max_lon, nlon) + +# number of models +nmodel = len(model_datasets) +print 'Dataset loading completed' +print 'Observation data:', obs_name +print 'Number of model datasets:',nmodel +for model_name in model_names: + print model_name + +""" Step 4: Spatial regriding of the reference datasets """ +print 'Regridding datasets: ', config['regrid'] +if not config['regrid']['regrid_on_reference']: + obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon) + print 'Reference dataset has been regridded' +for i, dataset in enumerate(model_datasets): + model_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, + boundary_check=boundary_check) + print model_names[i]+' has been regridded' +print 'Propagating missing data information' +obs_dataset = dsp.mask_missing_data([obs_dataset]+model_datasets)[0] +model_datasets = dsp.mask_missing_data([obs_dataset]+model_datasets)[1:] + +""" Step 5: Checking and converting variable units """ +print 'Checking and converting variable units' +obs_dataset = dsp.variable_unit_conversion(obs_dataset) +for idata,dataset in enumerate(model_datasets): + model_datasets[idata] = dsp.variable_unit_conversion(dataset) + + +print 'Generating multi-model ensemble' +if len(model_datasets) >= 2.: + model_datasets.append(dsp.ensemble(model_datasets)) + model_names.append('ENS') + +""" Step 6: Generate subregion average and standard deviation """ +if config['use_subregions']: + # sort the subregion by region names and make a list + subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0)) + + # number of subregions + nsubregion = len(subregions) + + print ('Calculating spatial averages and standard deviations of ', + str(nsubregion),' subregions') + + obs_subregion_mean, obs_subregion_std, subregion_array = ( + utils.calc_subregion_area_mean_and_std([obs_dataset], subregions)) + model_subregion_mean, model_subregion_std, subregion_array = ( + utils.calc_subregion_area_mean_and_std(model_datasets, subregions)) + +""" Step 7: Write a netCDF file """ +workdir = config['workdir'] +if workdir[-1] != '/': + workdir = workdir+'/' +print 'Writing a netcdf file: ',workdir+config['output_netcdf_filename'] +if not os.path.exists(workdir): + os.system("mkdir -p "+workdir) + +if config['use_subregions']: + dsp.write_netcdf_multiple_datasets_with_subregions( + obs_dataset, obs_name, model_datasets, model_names, + path=workdir+config['output_netcdf_filename'], + subregions=subregions, subregion_array=subregion_array, + ref_subregion_mean=obs_subregion_mean, + ref_subregion_std=obs_subregion_std, + model_subregion_mean=model_subregion_mean, + model_subregion_std=model_subregion_std) +else: + dsp.write_netcdf_multiple_datasets_with_subregions( + obs_dataset, obs_name, model_datasets, + model_names, + path=workdir+config['output_netcdf_filename']) + +""" Step 8: Calculate metrics and draw plots """ +nmetrics = config['number_of_metrics_and_plots'] +if config['use_subregions']: + Map_plot_subregion(subregions, obs_dataset, workdir) + +if nmetrics > 0: + print 'Calculating metrics and generating plots' + for imetric in np.arange(nmetrics)+1: + metrics_name = config['metrics'+'%1d' %imetric] + plot_info = config['plots'+'%1d' %imetric] + file_name = workdir+plot_info['file_name'] + + print 'metrics '+str(imetric)+'/'+str(nmetrics)+': ', metrics_name + if metrics_name == 'Map_plot_bias_of_multiyear_climatology': + row, column = plot_info['subplots_array'] + if 'map_projection' in plot_info.keys(): + Map_plot_bias_of_multiyear_climatology( + obs_dataset, obs_name, model_datasets, model_names, + file_name, row, column, + map_projection=plot_info['map_projection']) + else: + Map_plot_bias_of_multiyear_climatology( + obs_dataset, obs_name, model_datasets, model_names, + file_name, row, column) + elif metrics_name == 'Taylor_diagram_spatial_pattern_of_multiyear_climatology': + Taylor_diagram_spatial_pattern_of_multiyear_climatology( + obs_dataset, obs_name, model_datasets, model_names, + file_name) + elif config['use_subregions']: + if (metrics_name == 'Timeseries_plot_subregion_interannual_variability' + and average_each_year): + row, column = plot_info['subplots_array'] + Time_series_subregion( + obs_subregion_mean, obs_name, model_subregion_mean, + model_names, False, file_name, row, column, + x_tick=['Y'+str(i+1) + for i in np.arange(model_subregion_mean.shape[1])]) + + if (metrics_name == 'Timeseries_plot_subregion_annual_cycle' + and not average_each_year and month_start==1 and month_end==12): + row, column = plot_info['subplots_array'] + Time_series_subregion( + obs_subregion_mean, obs_name, + model_subregion_mean, model_names, True, + file_name, row, column, + x_tick=['J','F','M','A','M','J','J','A','S','O','N','D']) + + if (metrics_name == 'Portrait_diagram_subregion_interannual_variability' + and average_each_year): + Portrait_diagram_subregion(obs_subregion_mean, obs_name, + model_subregion_mean, model_names, + False, file_name) + + if (metrics_name == 'Portrait_diagram_subregion_annual_cycle' + and not average_each_year and month_start==1 and month_end==12): + Portrait_diagram_subregion(obs_subregion_mean, obs_name, + model_subregion_mean, model_names, + True, file_name) + else: + print 'please check the currently supported metrics' http://git-wip-us.apache.org/repos/asf/climate/blob/5622ff8b/ocw/dataset_processor.py ---------------------------------------------------------------------- diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 7379ef4..79a7196 100755 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -396,7 +396,8 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma if hasattr(subregion, 'lat_min'): # If boundary_type is 'rectangular' or 'CORDEX ***', ensure that the subregion information is well formed - _are_bounds_contained_by_dataset(target_dataset, subregion) + # this boundary check may not be necessary with the updated Bounds and subset + #_are_bounds_contained_by_dataset(target_dataset, subregion) # this boundary check is not necessary with the updated Bounds and subset if target_dataset.lats.ndim == 2 and target_dataset.lons.ndim == 2: start_time_index = np.where( http://git-wip-us.apache.org/repos/asf/climate/blob/5622ff8b/ocw/utils.py ---------------------------------------------------------------------- diff --git a/ocw/utils.py b/ocw/utils.py index 1820786..3de1776 100755 --- a/ocw/utils.py +++ b/ocw/utils.py @@ -483,33 +483,36 @@ def CORDEX_boundary(domain_name): ''' if domain_name =='southamerica': return -57.61, 18.50, 254.28-360., 343.02-360. - if domain_name =='centralamerica': + elif domain_name =='centralamerica': return -19.46, 34.83, 235.74-360., 337.78-360. - if domain_name =='northamerica': + elif domain_name =='northamerica': return 12.55, 75.88, 189.26-360., 336.74-360. - if domain_name =='europe': + elif domain_name =='europe': return 22.20, 71.84, 338.23-360., 64.4 - if domain_name =='africa': + elif domain_name =='africa': return -45.76, 42.24, 335.36-360., 60.28 - if domain_name =='southasia': + elif domain_name =='southasia': return -15.23, 45.07, 19.88, 115.55 - if domain_name =='eastasia': + elif domain_name =='eastasia': return -0.10, 61.90, 51.59, 179.99 - if domain_name =='centralasia': + elif domain_name =='centralasia': return 18.34, 69.37, 11.05, 139.13 - if domain_name =='australasia': + elif domain_name =='australasia': return -52.36, 12.21, 89.25, 179.99 - if domain_name =='antartica': + elif domain_name =='antartica': return -89.48,-56.00, -179.00, 179.00 - if domain_name =='artic': + elif domain_name =='artic': return 46.06, 89.50, -179.00, 179.00 - if domain_name =='mediterranean': + elif domain_name =='mediterranean': return 25.63, 56.66, 339.79-360.00, 50.85 - if domain_name =='middleeastnorthafrica': + elif domain_name =='middleeastnorthafrica': return -7.00, 45.00, 333.00-360.00, 76.00 - if domain_name =='southeastasia': + elif domain_name =='southeastasia': return -15.14, 27.26, 89.26, 146.96 - + else: + err ="Invalid CORDEX domain name" + raise ValueError(err) + def mask_using_shapefile_info(lons, lats, masked_regions, extract = True): if lons.ndim == 2 and lats.ndim == 2: lons_2d = lons