replaces ocw-config-runner to ocw_config_runner
Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/e926a581 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/e926a581 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/e926a581 Branch: refs/heads/master Commit: e926a5815e97fff22ac3210cbd8afad49ee0b97e Parents: a63049b Author: Maziyar Boustani <[email protected]> Authored: Thu Sep 3 14:40:22 2015 -0700 Committer: Maziyar Boustani <[email protected]> Committed: Thu Sep 3 14:40:22 2015 -0700 ---------------------------------------------------------------------- ocw-config-runner/configuration_parsing.py | 285 ------- ocw-config-runner/configuration_writer.py | 302 ------- ocw-config-runner/evaluation_creation.py | 178 ---- ocw-config-runner/example/portrait_diagram.yaml | 30 - .../example/simple_model_to_model_bias.yaml | 35 - ocw-config-runner/example/subregion.yaml | 30 - .../example/taylor_diagram_example.yaml | 36 - .../example/time_series_plot_example.yaml | 31 - ocw-config-runner/ocw_evaluation_from_config.py | 72 -- ocw-config-runner/plot_generation.py | 204 ----- ocw-config-runner/tests/test_config_parsing.py | 806 ------------------- ocw-config-runner/tests/test_config_writer.py | 768 ------------------ .../tests/test_evaluation_creation.py | 49 -- ocw_config_runner/configuration_parsing.py | 285 +++++++ ocw_config_runner/configuration_writer.py | 302 +++++++ ocw_config_runner/evaluation_creation.py | 178 ++++ ocw_config_runner/example/portrait_diagram.yaml | 30 + .../example/simple_model_to_model_bias.yaml | 35 + ocw_config_runner/example/subregion.yaml | 30 + .../example/taylor_diagram_example.yaml | 36 + .../example/time_series_plot_example.yaml | 31 + ocw_config_runner/ocw_evaluation_from_config.py | 72 ++ ocw_config_runner/plot_generation.py | 204 +++++ ocw_config_runner/tests/test_config_parsing.py | 806 +++++++++++++++++++ ocw_config_runner/tests/test_config_writer.py | 768 ++++++++++++++++++ .../tests/test_evaluation_creation.py | 49 ++ 26 files changed, 2826 insertions(+), 2826 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/configuration_parsing.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/configuration_parsing.py b/ocw-config-runner/configuration_parsing.py deleted file mode 100644 index 5c28249..0000000 --- a/ocw-config-runner/configuration_parsing.py +++ /dev/null @@ -1,285 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import logging -import re -import sys - -import ocw.metrics as metrics - -import yaml - -logging.basicConfig() -logger = logging.getLogger(__name__) - -def is_config_valid(config_data): - """ Validate supplied evaluation configuration data. - - :param config_data: Dictionary of the data parsed from the supplied YAML - configuration file. - :type config_data: :func:`dict` - - :returns: True if the configuration data is sufficient for an evaluation and - seems to be well formed, False otherwise. - """ - if not _valid_minimal_config(config_data): - logger.error('Insufficient configuration file data for an evaluation') - return False - - if not _config_is_well_formed(config_data): - logger.error('Configuration data is not well formed') - return False - - return True - -def _valid_minimal_config(config_data): - """""" - if not 'datasets' in config_data.keys(): - logger.error('No datasets specified in configuration data.') - return False - - if not 'metrics' in config_data.keys(): - logger.error('No metrics specified in configuration data.') - return False - - if _contains_unary_metrics(config_data['metrics']): - if (not 'reference' in config_data['datasets'].keys() and - not 'targets' in config_data['datasets'].keys()): - err = ( - 'Unary metric in configuration data requires either a reference ' - 'or target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - logger.error(err) - return False - - if _contains_binary_metrics(config_data['metrics']): - if (not 'reference' in config_data['datasets'].keys() or - not 'targets' in config_data['datasets'].keys()): - logger.error( - 'Binary metric in configuration requires both a reference ' - 'and target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - return False - - return True - -def _config_is_well_formed(config_data): - """""" - is_well_formed = True - - if 'reference' in config_data['datasets']: - if not _valid_dataset_config_data(config_data['datasets']['reference']): - is_well_formed = False - - if 'targets' in config_data['datasets']: - targets = config_data['datasets']['targets'] - if type(targets) != type(list()): - err = ( - 'Expected to find list of target datasets but instead found ' - 'object of type {}' - ).format(type(targets)) - logger.error(err) - is_well_formed = False - else: - for t in targets: - if not _valid_dataset_config_data(t): - is_well_formed = False - - available_metrics = _fetch_built_in_metrics() - for metric in config_data['metrics']: - if metric not in available_metrics: - warning = ( - 'Unable to locate metric name {} in built-in metrics. If this ' - 'is not a user defined metric then please check for potential ' - 'misspellings.' - ).format(metric) - logger.warn(warning) - is_well_formed = False - - if 'subregions' in config_data: - for subregion in config_data['subregions']: - if not _valid_subregion_config_data(subregion): - is_well_formed = False - - if 'plots' in config_data: - for plot in config_data['plots']: - if not _valid_plot_config_data(plot): - is_well_formed = False - # Ensure that if we're trying to make a plot that require - # subregion info that the config has this present. - elif plot['type'] in ['subregion', 'portrait']: - if ('subregions' not in config_data or - len(config_data['subregions']) < 1): - logger.error( - 'Plot config that requires subregion information is present ' - 'in a config file without adequate subregion information ' - 'provided. Please ensure that you have properly supplied 1 or ' - 'more subregion config values.' - ) - is_well_formed = False - - - return is_well_formed - -def _contains_unary_metrics(config_metric_data): - """""" - unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] - return any(metric in unarys for metric in config_metric_data) - -def _contains_binary_metrics(config_metric_data): - """""" - binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] - return any(metric in binarys for metric in config_metric_data) - -def _fetch_built_in_metrics(): - """""" - unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] - binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] - return unarys + binarys - -def _valid_dataset_config_data(dataset_config_data): - """""" - try: - data_source = dataset_config_data['data_source'] - except KeyError: - logger.error('Dataset does not contain a data_source attribute.') - return False - - if data_source == 'local': - required_keys = set(['data_source', 'file_count', 'path', 'variable']) - elif data_source == 'rcmed': - required_keys = set([ - 'dataset_id', - 'parameter_id', - 'min_lat', - 'max_lat', - 'min_lon', - 'max_lon', - 'start_time', - 'end_time', - ]) - elif data_source == 'esgf': - required_keys = set([ - 'data_source', - 'dataset_id', - 'variable', - 'esgf_username', - 'esgf_password' - ]) - elif data_source == 'dap': - required_keys = set({'url', 'variable'}) - else: - logger.error('Dataset does not contain a valid data_source location.') - return False - - present_keys = set(dataset_config_data.keys()) - missing_keys = required_keys - present_keys - contains_required = len(missing_keys) == 0 - - if contains_required: - if data_source == 'local' and dataset_config_data['file_count'] > 1: - # If the dataset is a multi-file dataset then we need to make sure - # that the file glob pattern is included. - if not 'file_glob_pattern' in dataset_config_data: - logger.error( - 'Multi-file local dataset is missing key: file_glob_pattern' - ) - return False - return True - else: - missing = sorted(list(missing_keys)) - logger.error( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - return False - -def _valid_plot_config_data(plot_config_data): - """""" - try: - plot_type = plot_config_data['type'] - except KeyError: - logger.error('Plot config does not include a type attribute.') - return False - - if plot_type == 'contour': - required_keys = set([ - 'results_indices', - 'lats', - 'lons', - 'output_name' - ]) - elif plot_type == 'taylor': - required_keys = set([ - 'stddev_results_indices', - 'pattern_corr_results_indices', - 'output_name' - ]) - elif plot_type == 'subregion': - required_keys = set([ - 'lats', - 'lons', - 'output_name' - ]) - elif plot_type == 'time_series': - required_keys = set([ - 'time_range' - ]) - elif plot_type == 'portrait': - required_keys = set([ - 'metric_index', - 'output_name' - ]) - else: - logger.error('Invalid plot type specified.') - return False - - present_keys = set(plot_config_data.keys()) - missing_keys = required_keys - present_keys - contains_required = len(missing_keys) == 0 - - if not contains_required: - missing = sorted(list(missing_keys)) - logger.error( - 'Plot config does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - return False - - return True - -def _valid_subregion_config_data(subregion_config_data): - """""" - if type(subregion_config_data) != type([]): - logger.error( - 'Subregions should be passed as a list of lists where ' - 'each sub-list contains a bounding box of the form: ' - '[lat_min, lat_max, lon_min, lon_max].' - ) - return False - - if len(subregion_config_data) != 4: - logger.error( - 'Subregions should be passed as a list of lists where ' - 'each sub-list contains a bounding box of the form: ' - '[lat_min, lat_max, lon_min, lon_max].' - ) - return False - - return True http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/configuration_writer.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/configuration_writer.py b/ocw-config-runner/configuration_writer.py deleted file mode 100644 index 8fc9242..0000000 --- a/ocw-config-runner/configuration_writer.py +++ /dev/null @@ -1,302 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datetime as dt -import logging - -import yaml - -logging.basicConfig() -logger = logging.getLogger(__name__) - -def export_evaluation_to_config(evaluation, file_path='./exported_eval.yaml'): - ''' Export an evaluation to a config file - - :param evaluation: The evaluation object to export. - :type evaluation: :class:`evaluation.Evaluation` - - :param file_path: Optional file path where the config file should be saved. - :type file_path: :mod:`string` - ''' - config = {} - - config['evaluation'] = generate_evaluation_information(evaluation) - config['datasets'] = generate_dataset_information(evaluation) - config['metrics'] = generate_metric_information(evaluation) - config['subregions'] = generate_subregion_information(evaluation) - - yaml.dump(config, file(file_path, 'w')) - -def generate_dataset_information(evaluation): - ''' Generate dataset config file output for a given Evaluation object. - - :param evaluation: The evaluation object from which to extract metrics. - :type evaluation: :class:`evaluation.Evaluation` - - :returns: A :func:`dict` of dataset configuration information for export - to a configuration file. - :rtype: :func:`dict` - ''' - datasets = {} - - if evaluation.ref_dataset: - datasets['reference'] = generate_dataset_config(evaluation.ref_dataset) - - if len(evaluation.target_datasets) > 0: - datasets['targets'] = [ - generate_dataset_config(target) - for target in evaluation.target_datasets - ] - - return datasets - -def generate_dataset_config(dataset): - ''' Generate dataset config file output for a given Dataset object. - - :param dataset: The dataset from which to extract configuration - information. - :type dataset: :class:`dataset.Dataset` - - :returns: :func:`dict` containing necessary information for - dataset to be saved into a configuration object. - - :raises AttributeError: If dataset does not contain expected source data. - ''' - dataset_source = dataset.origin['source'] - - if dataset_source == 'local': - info = _extract_local_dataset_info(dataset) - elif dataset_source == 'rcmed': - info = _extract_rcmed_dataset_info(dataset) - elif dataset_source == 'esgf': - info = _extract_esgf_dataset_info(dataset) - elif dataset_source == 'dap': - info = _extract_dap_dataset_info(dataset) - else: - err = ( - "Unexpected source in dataset origin information." - "Found {}." - ).format(dataset_source) - logger.error(err) - raise AttributeError(err) - - info['optional_args']['name'] = dataset.name - info['optional_args']['units'] = dataset.units - - return info - -def generate_metric_information(evaluation): - ''' Generate metric config file output from a given Evaluation object. - - :param evaluation: The evaluation object from which to extract metrics. - :type evaluation: :class:`evaluation.Evaluation` - - :returns: A :func:`list` of :mod:`metrics` object names for output into - a configuration file. - :rtype: :func:`list` of :mod:`metrics` - ''' - unary_metrics = [x.__class__.__name__ for x in evaluation.unary_metrics] - binary_metrics = [x.__class__.__name__ for x in evaluation.metrics] - - return unary_metrics + binary_metrics - -def generate_evaluation_information(evaluation): - ''' Generate evaluation-related config file output. - - Attempts to parse out temporal and spatial rebinning/regridding information - from the supplied evaluation object. If no datasets can be found, values - are defaulted to sane defaults or (potentially) excluded entirely. - - It's important to note that this function does its best to extrapolate the - configuration information. It's possible that you will encounter a scenario - where the guessed values are not what you want/expect. Please double - check the output before blinding trusting what this generates. - - :param evaluation: The evaluation object from which to extract metrics. - :type evaluation: :class:`evaluation.Evaluation` - - :returns: A dictionary of valid `evaluation` section settings for export - to a configuration file. - :rtype: :func:`dict` - ''' - eval_config = { - 'temporal_time_delta': 999, - 'spatial_regrid_lats': (-90, 90, 1), - 'spatial_regrid_lons': (-180, 180, 1), - 'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"], - } - - datasets = [] - - if evaluation.ref_dataset: - datasets.append(evaluation.ref_dataset) - - if evaluation.target_datasets: - datasets += evaluation.target_datasets - - if len(datasets) > 0: - eval_config['temporal_time_delta'] = _calc_temporal_bin_size(datasets) - - lats, lons = _calc_spatial_lat_lon_grid(datasets) - eval_config['spatial_regrid_lats'] = lats - eval_config['spatial_regrid_lons'] = lons - - eval_config['subset'] = _calc_subset_config(datasets) - - return eval_config - -def generate_subregion_information(evaluation): - ''' Generate subregion config file output from a given Evaluation object. - - :param evaluation: The evaluation object from which to extract metrics. - :type evaluation: :class:`evaluation.Evaluation` - - :returns: A :func:`list` of :func:`list` objects containing bounding - box info for export into a configuration file - :rtype: :func:`list` of :func:`list` - ''' - subregions = [] - for s in evaluation.subregions: - subregions.append([s.lat_min, s.lat_max, s.lon_min, s.lon_max]) - - return subregions - -def _extract_local_dataset_info(dataset): - '''''' - dataset_info = {'optional_args': {}} - - dataset_info['data_source'] = 'local' - dataset_info['file_count'] = 1 - dataset_info['path'] = dataset.origin['path'] - dataset_info['variable'] = dataset.variable - - dataset_info['optional_args']['lat_name'] = dataset.origin['lat_name'] - dataset_info['optional_args']['lon_name'] = dataset.origin['lon_name'] - dataset_info['optional_args']['time_name'] = dataset.origin['time_name'] - - if 'elevation_index' in dataset.origin: - elev = dataset.origin['elevation_index'] - dataset_info['optional_args']['elevation_index'] = elev - - return dataset_info - -def _extract_rcmed_dataset_info(dataset): - '''''' - dataset_info = {'optional_args': {}} - - min_lat, max_lat, min_lon, max_lon = dataset.spatial_boundaries() - start_time, end_time = dataset.time_range() - - dataset_info['data_source'] = 'rcmed' - dataset_info['dataset_id'] = dataset.origin['dataset_id'] - dataset_info['parameter_id'] = dataset.origin['parameter_id'] - dataset_info['min_lat'] = min_lat - dataset_info['max_lat'] = max_lat - dataset_info['min_lon'] = min_lon - dataset_info['max_lon'] = max_lon - dataset_info['start_time'] = str(start_time) - dataset_info['end_time'] = str(end_time) - - return dataset_info - -def _extract_esgf_dataset_info(dataset): - '''''' - dataset_info = {'optional_args': {}} - - dataset_info['data_source'] = 'esgf' - dataset_info['dataset_id'] = dataset.origin['dataset_id'] - dataset_info['variable'] = dataset.origin['variable'] - dataset_info['esgf_username'] = 'Put your ESGF Username here' - dataset_info['esgf_password'] = 'Put your ESGF Password here' - - return dataset_info - -def _extract_dap_dataset_info(dataset): - '''''' - dataset_info = {'optional_args': {}} - - dataset_info['data_source'] = 'dap' - dataset_info['url'] = dataset.origin['url'] - dataset_info['variable'] = dataset.variable - - return dataset_info - -def _calc_temporal_bin_size(datasets): - '''''' - times = datasets[0].times - time_delta = times[1] - times[0] - - if time_delta.days == 0: - return 1 - elif time_delta.days <= 31: - return 31 - elif time_delta.days <= 366: - return 366 - else: - return 999 - -def _calc_spatial_lat_lon_grid(datasets): - '''''' - lat_min, lat_max, lon_min, lon_max = datasets[0].spatial_boundaries() - - lats = datasets[0].lats - lons = datasets[0].lons - # These explicit float casts are needed to ensure that the type of the - # lat/lon steps are not numpy values. PyYAML will choke on export if it - # encounters a Numpy value. - lat_step = float(abs(lats[1] - lats[0])) - lon_step = float(abs(lons[1] - lons[0])) - - # We need to add an extra step value onto the end so when we generate a - # range with these values we don't lose one that we're expecting. - if lat_max != 90: lat_max += lat_step - if lon_max != 180: lon_max += lon_step - - return ((lat_min, lat_max, lat_step), (lon_min, lon_max, lon_step)) - -def _calc_subset_config(datasets): - '''''' - lat_min = 90 - lat_max = -90 - lon_min = 180 - lon_max = -180 - start = dt.datetime(2500, 1, 1) - end = dt.datetime(1500, 1, 1) - - for ds in datasets: - ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = ds.spatial_boundaries() - ds_start, ds_end = ds.time_range() - - if ds_lat_min < lat_min: - lat_min = ds_lat_min - - if ds_lat_max > lat_max: - lat_max = ds_lat_max - - if ds_lon_min < lon_min: - lon_min = ds_lon_min - - if ds_lon_max > lon_max: - lon_max = ds_lon_max - - if ds_start < start: - start = ds_start - - if ds_end > end: - end = ds_end - - return [lat_min, lat_max, lon_min, lon_max, str(start), str(end)] http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/evaluation_creation.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/evaluation_creation.py b/ocw-config-runner/evaluation_creation.py deleted file mode 100644 index 88394de..0000000 --- a/ocw-config-runner/evaluation_creation.py +++ /dev/null @@ -1,178 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import dateutil.parser -from datetime import timedelta -import logging - -from ocw.dataset import Bounds -from ocw.evaluation import Evaluation -import ocw.dataset_processor as dsp -import ocw.data_source.local as local -import ocw.data_source.rcmed as rcmed -import ocw.data_source.esgf as esgf -import ocw.data_source.dap as dap -import ocw.metrics as metrics - -import numpy as np - -logging.basicConfig() -logger = logging.getLogger(__name__) - -def generate_evaluation_from_config(config_data): - """ Generate an Evaluation object from configuration data. - - :param config_data: Dictionary of the data parsed from the supplied YAML - configuration file. - :type config_data: :func:`dict` - - :returns: An Evaluation object containing the data specified in the - supplied configuration data. - """ - # Load datasets - reference = None - targets = [] - if config_data['datasets']: - if 'reference' in config_data['datasets']: - reference = _load_dataset(config_data['datasets']['reference']) - - if 'targets' in config_data['datasets']: - targets = [_load_dataset(t) for t in config_data['datasets']['targets']] - - reference, targets = _prepare_datasets_for_evaluation(reference, - targets, - config_data) - # Load metrics - eval_metrics = [] - if config_data['metrics']: - eval_metrics = [_load_metric(m)() for m in config_data['metrics']] - - # Load Subregions (if present) - subregions = None - if 'subregions' in config_data: - subregions = [_load_subregion(s) for s in config_data['subregions']] - - return Evaluation(reference, targets, eval_metrics, subregions=subregions) - -def _load_dataset(dataset_config_data): - """""" - if dataset_config_data['data_source'] == 'local': - if dataset_config_data['file_count'] > 1: - logger.error( - 'Multi-file datasets are currently not supported. Cancelling load ' - 'of the following dataset: {}'.format(dataset_config_data) - ) - return None - - return local.load_file(dataset_config_data['path'], - dataset_config_data['variable'], - **dataset_config_data.get('optional_args', {})) - elif dataset_config_data['data_source'] == 'rcmed': - return rcmed.parameter_dataset(dataset_config_data['dataset_id'], - dataset_config_data['parameter_id'], - dataset_config_data['min_lat'], - dataset_config_data['max_lat'], - dataset_config_data['min_lon'], - dataset_config_data['min_lon'], - dataset_config_data['start_time'], - dataset_config_data['end_time'], - **dataset_config_data.get('optional_args', {})) - elif dataset_config_data['data_source'] == 'esgf': - return esgf.load_dataset(dataset_config_data['dataset_id'], - dataset_config_data['variable'], - dataset_config_data['esgf_username'], - dataset_config_data['esgf_password'], - **dataset_config_data.get('optional_args', {})) - elif dataset_config_data['data_source'] == 'dap': - return dap.load(dataset_config_data['url'], - dataset_config_data['variable'], - **dataset_config_data('optional_args', {})) - -def _prepare_datasets_for_evaluation(reference, targets, config_data): - """""" - subset = config_data['evaluation'].get('subset', None) - temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) - spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) - spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) - - # If we have a temporal time delta and it's daily (i.e., 1) we will - # normalize the data as daily data (which means we adjust the start times - # for each bucket of data to be consistent). By default we will normalize - # the data as monthly. Note that this will not break yearly data so it's - # safer to do this no matter what. This keeps us from ending up with 1-off - # errors in the resulting dataset shape post-temporal/spatial adjustments - # that break evaluations. - string_time_delta = 'monthly' - if temporal_time_delta and temporal_time_delta == 1: - string_time_delta = 'daily' - - reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) - targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets] - - if subset: - start = dateutil.parser.parse(subset[4]) - end = dateutil.parser.parse(subset[5]) - bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) - - if reference: - reference = dsp.safe_subset(bounds, reference) - - if targets: - targets = [dsp.safe_subset(bounds, t) for t in targets] - - if temporal_time_delta: - resolution = timedelta(temporal_time_delta) - - if reference: - reference = dsp.temporal_rebin(reference, resolution) - - if targets: - targets = [dsp.temporal_rebin(t, resolution) for t in targets] - - if spatial_regrid_lats and spatial_regrid_lons: - lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) - lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) - - if reference: - reference = dsp.spatial_regrid(reference, lats, lons) - - if targets: - targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] - - return reference, targets - -def _load_metric(metric_config_data): - """""" - # If the dataset is user defined outside of ocw.metrics we won't currently - # handle loading it. - if '.' in metric_config_data: - logger.error( - 'User-defined metrics outside of the ocw.metrics module ' - 'cannot currently be loaded. If you just wanted a metric ' - 'found in ocw.metrics then do not specify the full ' - 'package and module names. See the documentation for examples.' - ) - return None - - return getattr(metrics, metric_config_data) - -def _load_subregion(subregion_config_data): - """""" - return Bounds(float(subregion_config_data[0]), - float(subregion_config_data[1]), - float(subregion_config_data[2]), - float(subregion_config_data[3])) http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/example/portrait_diagram.yaml ---------------------------------------------------------------------- diff --git a/ocw-config-runner/example/portrait_diagram.yaml b/ocw-config-runner/example/portrait_diagram.yaml deleted file mode 100644 index f3e45de..0000000 --- a/ocw-config-runner/example/portrait_diagram.yaml +++ /dev/null @@ -1,30 +0,0 @@ -evaluation: - temporal_time_delta: 365 - spatial_regrid_lats: !!python/tuple [-40, 40, 1] - spatial_regrid_lons: !!python/tuple [-40, 40, 1] - -datasets: - reference: - data_source: local - file_count: 1 - path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc - variable: tasmax - - targets: - - data_source: local - file_count: 1 - path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc - variable: tasmax - optional_args: - name: WRF -metrics: - - PatternCorrelation - -plots: - - type: portrait - metric_index: 0 - output_name: portrait_test - -subregions: - - [-10.0, 0.0, 29.0, 36.5] - - [0.0, 10.0, 29.0, 37.5] http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/example/simple_model_to_model_bias.yaml ---------------------------------------------------------------------- diff --git a/ocw-config-runner/example/simple_model_to_model_bias.yaml b/ocw-config-runner/example/simple_model_to_model_bias.yaml deleted file mode 100644 index bd59fbd..0000000 --- a/ocw-config-runner/example/simple_model_to_model_bias.yaml +++ /dev/null @@ -1,35 +0,0 @@ -evaluation: - temporal_time_delta: 365 - spatial_regrid_lats: !!python/tuple [-20, 20, 1] - spatial_regrid_lons: !!python/tuple [-20, 20, 1] - -datasets: - reference: - data_source: local - file_count: 1 - path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc - variable: tasmax - - targets: - - data_source: local - file_count: 1 - path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc - variable: tasmax -metrics: - - Bias - -plots: - - type: contour - results_indices: - - !!python/tuple [0, 0] - lats: - range_min: -20 - range_max: 20 - range_step: 1 - lons: - range_min: -20 - range_max: 20 - range_step: 1 - output_name: wrf_bias_compared_to_knmi - optional_args: - gridshape: !!python/tuple [6, 6] http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/example/subregion.yaml ---------------------------------------------------------------------- diff --git a/ocw-config-runner/example/subregion.yaml b/ocw-config-runner/example/subregion.yaml deleted file mode 100644 index d39e39b..0000000 --- a/ocw-config-runner/example/subregion.yaml +++ /dev/null @@ -1,30 +0,0 @@ -subregions: - - [-10.0, 0.0, 29.0, 36.5] - - [0.0, 10.0, 29.0, 37.5] - - [10.0, 20.0, 25.0, 32.5] - - [20.0, 33.0, 25.0, 32.5] - - [-19.3,-10.2,12.0, 20.0] - - [15.0, 30.0, 15.0, 25.0] - - [-10.0, 10.0, 7.3, 15.0] - - [-10.9, 10.0, 5.0, 7.3] - - [33.9, 40.0, 6.9, 15.0] - - [10.0, 25.0, 0.0, 10.0] - - [10.0, 25.0,-10.0, 0.0] - - [30.0, 40.0,-15.0, 0.0] - - [33.0, 40.0, 25.0, 35.0] - -datasets: - -metrics: - -plots: - - type: subregion - output_name: subregion_plot - lats: - range_min: -45.0 - range_max: 42.24 - range_step: 0.5 - lons: - range_min: -24.0 - range_max: 60.0 - range_step: 0.5 http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/example/taylor_diagram_example.yaml ---------------------------------------------------------------------- diff --git a/ocw-config-runner/example/taylor_diagram_example.yaml b/ocw-config-runner/example/taylor_diagram_example.yaml deleted file mode 100644 index 66eab3b..0000000 --- a/ocw-config-runner/example/taylor_diagram_example.yaml +++ /dev/null @@ -1,36 +0,0 @@ -evaluation: - temporal_time_delta: 30 - spatial_regrid_lats: !!python/tuple [-45, 42, 1] - spatial_regrid_lons: !!python/tuple [-24, 60, 1] - subset: [-45, 42, -24, 60, "1989-01-01", "1989-12-01"] - -datasets: - reference: - data_source: local - file_count: 1 - path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc - variable: tasmax - optional_args: - name: dataset1 - - targets: - - data_source: local - file_count: 1 - path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc - variable: tasmax - optional_args: - name: dataset2 -metrics: - - StdDevRatio - - PatternCorrelation - -plots: - - type: taylor - stddev_results_indices: - - !!python/tuple [0, 0] - pattern_corr_results_indices: - - !!python/tuple [0, 1] - output_name: taylor_plot - optional_args: - fmt: png - frameon: False http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/example/time_series_plot_example.yaml ---------------------------------------------------------------------- diff --git a/ocw-config-runner/example/time_series_plot_example.yaml b/ocw-config-runner/example/time_series_plot_example.yaml deleted file mode 100644 index b5599cc..0000000 --- a/ocw-config-runner/example/time_series_plot_example.yaml +++ /dev/null @@ -1,31 +0,0 @@ -evaluation: - temporal_time_delta: 30 - spatial_regrid_lats: !!python/tuple [-20, 20, 1] - spatial_regrid_lons: !!python/tuple [-20, 20, 1] - subset: [-180, 180, -90, 90, "1989-01-01", "1990-12-01"] - -datasets: - reference: - data_source: local - file_count: 1 - path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc - variable: tasmax - optional_args: - name: KNMI - - targets: - - data_source: local - file_count: 1 - path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc - variable: tasmax - optional_args: - name: WRF -metrics: - -plots: - - type: time_series - time_range: monthly - -subregions: - - [-10.0, 0.0, -19.0, 19.0] - - [0.0, 10.0, -10.0, 10.0] http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/ocw_evaluation_from_config.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/ocw_evaluation_from_config.py b/ocw-config-runner/ocw_evaluation_from_config.py deleted file mode 100644 index 93b411a..0000000 --- a/ocw-config-runner/ocw_evaluation_from_config.py +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import argparse -import logging - -from configuration_parsing import is_config_valid -from evaluation_creation import generate_evaluation_from_config -from plot_generation import plot_from_config - -import yaml - -logging.basicConfig() -logger = logging.getLogger(__name__) - -def run_evaluation_from_config(config_file_path, ignore_config_errors=False): - """ Run an OCW evaluation specified by a config file. - - :param config_file_path: The file path to a OCW compliant YAML file - specifying how to run the evaluation. For additional information on - the valid options that you can set in the config please check the - project wiki https://cwiki.apache.org/confluence/display/climate/home#'. - :type config_file_path: :mod:`string` - - :param ignore_config_errors: When this is true configuration parsing errors - will NOT interrupt the evaluation run. Note, it is very unlikely that - you will want this value set. However it is possible that you will want - to graph something that doesn't require a full evaluation run. This is - provided for that situation. - :type ignore_config_errors: :func:`bool` - """ - config = yaml.load(open(config_file_path, 'r')) - - if not ignore_config_errors and not is_config_valid(config): - logger.warning( - 'Unable to validate configuration file. Exiting evaluation. ' - 'Please check documentation for config information.' - ) - - sys.exit(1) - - evaluation = generate_evaluation_from_config(config) - - if evaluation._evaluation_is_valid(): - evaluation.run() - - plot_from_config(evaluation, config) - -if __name__ == '__main__': - description = 'OCW Config Based Evaluation' - epilog = 'Additional information at https://cwiki.apache.org/confluence/display/climate/home#' - - parser = argparse.ArgumentParser(description=description, epilog=epilog) - parser.add_argument('config', help='Path to YAML config file for the evaluation') - parser.add_argument('ignore_config_errors', nargs='?', default=False, type=bool) - args = parser.parse_args() - - run_evaluation_from_config(args.config, args.ignore_config_errors) http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/plot_generation.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/plot_generation.py b/ocw-config-runner/plot_generation.py deleted file mode 100644 index 392331d..0000000 --- a/ocw-config-runner/plot_generation.py +++ /dev/null @@ -1,204 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import logging - -import ocw.dataset_processor as dsp -import ocw.plotter as plots -import ocw.utils as utils - -import numpy as np - -logging.basicConfig() -logger = logging.getLogger(__name__) - -def plot_from_config(evaluation, config_data): - """ Generate plots for an evaluation from configuration data. - - :param evaluation: The Evaluation for which to generate plots. - :type evaluation: :class:`ocw.evaluation.Evaluation` - :param config_data: Dictionary of the data parsed from the supplied YAML - configuration file. - :type: :func:`dict` - """ - for plot in config_data['plots']: - if plot['type'] == 'contour': - _draw_contour_plot(evaluation, plot) - elif plot['type'] == 'subregion': - _draw_subregion_diagram(evaluation, plot) - elif plot['type'] == 'taylor': - _draw_taylor_diagram(evaluation, plot) - elif plot['type'] == 'time_series': - _draw_time_series_plot(evaluation, plot) - elif plot['type'] == 'portrait': - _draw_portrait_diagram(evaluation, plot) - else: - logger.error('Unrecognized plot type requested: {}'.format(plot['type'])) - -def _draw_contour_plot(evaluation, plot_config): - """""" - lats = plot_config['lats'] - if type(lats) != type(list): - lats = np.arange(lats['range_min'], lats['range_max'], lats['range_step']) - - lons = plot_config['lons'] - if type(lons) != type(list): - lons = np.arange(lons['range_min'], lons['range_max'], lons['range_step']) - - for i, index in enumerate(plot_config['results_indices']): - if len(index) == 2: - target, metric = index - vals = evaluation.results[target][metric] - elif len(index) == 3: - target, metric, subregion = index - vals = evaluation.results[target][metric][subregion] - - plot_name = plot_config['output_name'] + '_{}'.format(i) - plots.draw_contour_map(vals, - np.array(lats), - np.array(lons), - plot_name, - **plot_config.get('optional_args', {})) - -def _draw_taylor_diagram(evaluation, plot_config): - """""" - plot_name = plot_config['output_name'] - ref_dataset_name = evaluation.ref_dataset.name - target_dataset_names = [t.name for t in evaluation.target_datasets] - - if len(plot_config['stddev_results_indices'][0]) == 2: - stddev_results = [ - evaluation.results[tar][met] - for (tar, met) in plot_config['stddev_results_indices'] - ] - - pattern_corr_results = [ - evaluation.results[tar][met] - for (tar, met) in plot_config['pattern_corr_results_indices'] - ] - elif len(plot_config['stddev_results_indices'][0]) == 3: - stddev_results = [ - evaluation.results[tar][met][sub] - for (tar, met, sub) in plot_config['stddev_results_indices'] - ] - - pattern_corr_results = [ - evaluation.results[tar][met][sub] - for (tar, met, sub) in plot_config['pattern_corr_results_indices'] - ] - - plot_data = np.array([stddev_results, pattern_corr_results]).transpose() - - plots.draw_taylor_diagram(plot_data, - target_dataset_names, - ref_dataset_name, - fname=plot_name, - **plot_config.get('optional_args', {})) - -def _draw_subregion_diagram(evaluation, plot_config): - """""" - lats = plot_config['lats'] - if type(lats) != type(list): - lats = np.arange(lats['range_min'], lats['range_max'], lats['range_step']) - - lons = plot_config['lons'] - if type(lons) != type(list): - lons = np.arange(lons['range_min'], lons['range_max'], lons['range_step']) - - plots.draw_subregions(evaluation.subregions, - lats, - lons, - plot_config['output_name'], - **plot_config.get('optional_args', {})) - -def _draw_portrait_diagram(evaluation, plot_config): - """""" - metric_index = plot_config['metric_index'] - - diagram_data = np.array(evaluation.results[:][metric_index][:]) - subregion_names = ["R{}".format(i) for i in range(len(evaluation.subregions))] - target_names = [t.name for t in evaluation.target_datasets] - - plots.draw_portrait_diagram(diagram_data, - target_names, - subregion_names, - fname=plot_config['output_name'], - **plot_config.get('optional_args', {})) - -def _draw_time_series_plot(evaluation, plot_config): - """""" - time_range_info = plot_config['time_range'] - ref_ds = evaluation.ref_dataset - target_ds = evaluation.target_datasets - - if time_range_info == 'monthly': - ref_ds.values, ref_ds.times = utils.calc_climatology_monthly(ref_ds) - - for t in target_ds: - t.values, t.times = utils.calc_climatology_monthly(t) - else: - logger.error( - 'Invalid time range provided. Only monthly is supported ' - 'at the moment' - ) - return - - if evaluation.subregions: - for bound_count, bound in enumerate(evaluation.subregions): - results = [] - labels = [] - - subset = dsp.subset( - bound, - ref_ds, - subregion_name="R{}_{}".format(bound_count, ref_ds.name) - ) - - results.append(utils.calc_time_series(subset)) - labels.append(subset.name) - - for t in target_ds: - subset = dsp.subset( - bound, - t, - subregion_name="R{}_{}".format(bound_count, t.name) - ) - results.append(utils.calc_time_series(subset)) - labels.append(subset.name) - - plots.draw_time_series(np.array(results), - ref_ds.times, - labels, - 'R{}'.format(bound_count), - **plot_config.get('optional_args', {})) - - else: - results = [] - labels = [] - - results.append(utils.calc_time_series(ref_ds)) - labels.append(ref_ds.name) - - for t in target_ds: - results.append(utils.calc_time_series(t)) - labels.append(t.name) - - plots.draw_time_series(np.array(results), - ref_ds.times, - labels, - 'time_series', - **plot_config.get('optional_args', {})) http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/tests/test_config_parsing.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/tests/test_config_parsing.py b/ocw-config-runner/tests/test_config_parsing.py deleted file mode 100644 index e918405..0000000 --- a/ocw-config-runner/tests/test_config_parsing.py +++ /dev/null @@ -1,806 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from mock import patch -import unittest - -import configuration_parsing as parser -import ocw.metrics as metrics - -import yaml - - -class TestIsConfigValid(unittest.TestCase): - @classmethod - def setUpClass(self): - not_minimal_config = """ - datasets: - """ - self.not_minimal = yaml.load(not_minimal_config) - - not_well_formed_config = """ - datasets: - reference: - data_source: local - file_count: 1 - path: /a/fake/path/file.py - variable: pr - - targets: - - data_source: local - file_count: 5 - file_glob_pattern: something for globbing files here - variable: pr - optional_args: - name: Target1 - - - data_source: esgf - dataset_id: fake dataset id - variable: pr - esgf_username: my esgf username - esgf_password: my esgf password - - metrics: - - Bias - - TemporalStdDev - """ - self.not_well_formed = yaml.load(not_well_formed_config) - - @patch('configuration_parsing.logger') - def test_not_minimal_config(self, mock_logger): - ret = parser.is_config_valid(self.not_minimal) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Insufficient configuration file data for an evaluation' - ) - - @patch('configuration_parsing.logger') - def test_not_valid_config(self, mock_logger): - ret = parser.is_config_valid(self.not_well_formed) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Configuration data is not well formed' - ) - - -class TestValidMinimalConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - no_datasets_config = """ - metrics: - - Bias - """ - self.no_datasets = yaml.load(no_datasets_config) - - no_metrics_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - """ - self.no_metrics = yaml.load(no_metrics_config) - - unary_with_reference_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - TemporalStdDev - """ - self.unary_with_reference = yaml.load(unary_with_reference_config) - - unary_with_target_config = """ - datasets: - targets: - - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - TemporalStdDev - """ - self.unary_with_target = yaml.load(unary_with_target_config) - - unary_no_reference_or_target = """ - datasets: - not_ref_or_target: - - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - TemporalStdDev - """ - self.unary_no_ref_or_target = yaml.load(unary_no_reference_or_target) - - binary_valid_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - targets: - - data_source: dap - url: afakeurl.com - variable: pr - metrics: - - Bias - """ - self.binary_valid = yaml.load(binary_valid_config) - - binary_no_reference_config = """ - datasets: - targets: - - data_source: dap - url: afakeurl.com - variable: pr - metrics: - - Bias - """ - self.binary_no_reference = yaml.load(binary_no_reference_config) - - binary_no_target_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - Bias - """ - self.binary_no_target = yaml.load(binary_no_target_config) - - @patch('configuration_parsing.logger') - def test_no_datasets(self, mock_logger): - ret = parser._valid_minimal_config(self.no_datasets) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'No datasets specified in configuration data.' - ) - - @patch('configuration_parsing.logger') - def test_no_metrics(self, mock_logger): - ret = parser._valid_minimal_config(self.no_metrics) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'No metrics specified in configuration data.' - ) - - def test_unary_with_reference(self): - ret = parser._valid_minimal_config(self.unary_with_reference) - self.assertTrue(ret) - - def test_unary_with_target(self): - ret = parser._valid_minimal_config(self.unary_with_target) - self.assertTrue(ret) - - @patch('configuration_parsing.logger') - def test_unary_no_datasets(self, mock_logger): - ret = parser._valid_minimal_config(self.unary_no_ref_or_target) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Unary metric in configuration data requires either a reference ' - 'or target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - - def test_valid_binary(self): - ret = parser._valid_minimal_config(self.binary_valid) - self.assertTrue(ret) - - @patch('configuration_parsing.logger') - def test_binary_no_reference(self, mock_logger): - ret = parser._valid_minimal_config(self.binary_no_reference) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Binary metric in configuration requires both a reference ' - 'and target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - - @patch('configuration_parsing.logger') - def test_binary_no_target(self, mock_logger): - ret = parser._valid_minimal_config(self.binary_no_target) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Binary metric in configuration requires both a reference ' - 'and target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - - -class TestConfigIsWellFormed(unittest.TestCase): - @classmethod - def setUpClass(self): - malformed_reference_config = """ - datasets: - reference: - data_source: notavalidlocation - - metrics: - - Bias - """ - self.malformed_reference_conf = yaml.load(malformed_reference_config) - - malformed_target_list_config = """ - datasets: - targets: - notalist: - a_key: a_value - - alsonotalist: - a_key: a_value - - metrics: - - Bias - """ - self.malformed_target_list = yaml.load(malformed_target_list_config) - - missing_metric_name_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - NotABuiltInMetric - """ - self.missing_metric_name = yaml.load(missing_metric_name_config) - - bad_plot_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - Bias - - plots: - - type: NotARealPlotName - """ - bad_plot = yaml.load(bad_plot_config) - - bad_subregion_config_type = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - Bias - - subregions: - - this is a string instead of a list - """ - self.bad_subregion_type = yaml.load(bad_subregion_config_type) - - bad_subregion_config_length = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - Bias - - subregions: - - [1, 2, 3, 4, 5] - """ - self.bad_subregion_length = yaml.load(bad_subregion_config_length) - - def test_malformed_reference_config(self): - ret = parser._config_is_well_formed(self.malformed_reference_conf) - self.assertFalse(ret) - - @patch('configuration_parsing.logger') - def test_malformed_target_dataset_list(self, mock_logger): - ret = parser._config_is_well_formed(self.malformed_target_list) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - "Expected to find list of target datasets but instead found " - "object of type <type 'dict'>" - ) - - def test_not_builtin_metric(self): - ret = parser._config_is_well_formed(self.missing_metric_name) - self.assertFalse(ret) - - @patch('configuration_parsing.logger') - def test_warns_regarding_not_builtin_metric(self, mock_logger): - ret = parser._config_is_well_formed(self.missing_metric_name) - mock_logger.warn.assert_called_with( - 'Unable to locate metric name NotABuiltInMetric in built-in ' - 'metrics. If this is not a user defined metric then please check ' - 'for potential misspellings.' - ) - - def test_bad_plot_config(self): - ret = parser._config_is_well_formed(self.missing_metric_name) - self.assertFalse(ret) - - def test_bad_subregion_type(self): - ret = parser._config_is_well_formed(self.bad_subregion_type) - self.assertFalse(ret) - - def test_bad_subregion_length(self): - ret = parser._config_is_well_formed(self.bad_subregion_length) - self.assertFalse(ret) - - -class MetricFetchTest(unittest.TestCase): - @classmethod - def setUpClass(self): - binary_config = """ - metrics: - - Bias - - StdDevRatio - """ - unary_config = """ - metrics: - - TemporalStdDev - """ - self.unary_conf = yaml.load(unary_config) - self.binary_conf = yaml.load(binary_config) - - def test_contains_binary_metric(self): - ret = parser._contains_binary_metrics(self.binary_conf['metrics']) - self.assertTrue(ret) - - def test_does_not_contain_binary_metric(self): - ret = parser._contains_binary_metrics(self.unary_conf['metrics']) - self.assertFalse(ret) - - def test_contains_unary_metric(self): - ret = parser._contains_unary_metrics(self.unary_conf['metrics']) - self.assertTrue(ret) - - def test_does_not_contain_unary_metric(self): - ret = parser._contains_unary_metrics(self.binary_conf['metrics']) - self.assertFalse(ret) - - -class InvalidDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - example_config_yaml = """ - - file_count: 1 - path: /a/fake/path - variable: pr - - - data_source: invalid_location_identifier - """ - conf = yaml.load(example_config_yaml) - self.missing_data_source = conf[0] - self.invalid_data_source = conf[1] - - @patch('configuration_parsing.logger') - def test_missing_data_source_config(self, mock_logger): - parser._valid_dataset_config_data(self.missing_data_source) - mock_logger.error.assert_called_with( - 'Dataset does not contain a data_source attribute.' - ) - - @patch('configuration_parsing.logger') - def test_invalid_data_source(self, mock_logger): - parser._valid_dataset_config_data(self.invalid_data_source) - mock_logger.error.assert_called_with( - 'Dataset does not contain a valid data_source location.' - ) - - -class TestLocalDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - self.required_local_keys = set(['data_source', 'file_count', 'path', 'variable']) - example_config_yaml = """ - - data_source: local - file_count: 1 - path: /a/fake/path - variable: pr - optional_args: - name: Target1 - - - data_source: local - - - data_source: local - file_count: 5 - file_glob_pattern: something for globbing files here - variable: pr - path: /a/fake/path - optional_args: - name: Target1 - - - data_source: local - file_count: 5 - variable: pr - path: /a/fake/path - """ - - conf = yaml.load(example_config_yaml) - self.valid_local_single = conf[0] - self.invalid_local_single = conf[1] - self.valid_local_multi = conf[2] - self.invalid_local_multi = conf[1] - self.invalid_local_multi_file_glob = conf[3] - - def test_valid_local_config_single_file(self): - ret = parser._valid_dataset_config_data(self.valid_local_single) - self.assertTrue(ret) - - def test_valid_local_config_multi_file(self): - ret = parser._valid_dataset_config_data(self.valid_local_multi) - self.assertTrue(ret) - - @patch('configuration_parsing.logger') - def test_invalid_local_config(self, mock_logger): - parser._valid_dataset_config_data(self.invalid_local_single) - - present_keys = set(self.invalid_local_single.keys()) - missing_keys = self.required_local_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - @patch('configuration_parsing.logger') - def test_invalid_local_config_multi_file(self, mock_logger): - # mutlifile config is handled slightly differently. We should see the - # same missing keys in this situation as we would on the single file - # local config. We will test for a missing file_glob_pattern in a - # different test. - parser._valid_dataset_config_data(self.invalid_local_multi) - - present_keys = set(self.invalid_local_multi.keys()) - missing_keys = self.required_local_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - @patch('configuration_parsing.logger') - def test_invalid_local_config_multi_file_missing_file_glob(self, mock_logger): - # We can't check for the file_glob_pattern pattern until after we have - # verified that the single local file config has been met. - parser._valid_dataset_config_data(self.invalid_local_multi_file_glob) - - mock_logger.error.assert_called_with( - 'Multi-file local dataset is missing key: file_glob_pattern' - ) - - -class TestRCMEDDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - self.required_rcmed_keys = set([ - 'dataset_id', - 'parameter_id', - 'min_lat', - 'max_lat', - 'min_lon', - 'max_lon', - 'start_time', - 'end_time' - ]) - example_config_yaml = """ - - data_source: rcmed - dataset_id: 4 - parameter_id: 4 - min_lat: -40 - max_lat: 40 - min_lon: -50 - max_lon: 50 - start_time: YYYY-MM-DDThh:mm:ss - end_time: YYYY-MM-DDThh:mm:ss - - - data_source: rcmed - """ - conf = yaml.load(example_config_yaml) - self.valid_rcmed = conf[0] - self.invalid_rcmed = conf[1] - - def test_valid_rcmed_config(self): - ret = parser._valid_dataset_config_data(self.valid_rcmed) - self.assertTrue(ret) - - @patch('configuration_parsing.logger') - def test_invalid_rcmed_config(self, mock_logger): - parser._valid_dataset_config_data(self.invalid_rcmed) - - present_keys = set(self.invalid_rcmed.keys()) - missing_keys = self.required_rcmed_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - -class TestESGFDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - self.required_esgf_keys = set([ - 'data_source', - 'dataset_id', - 'variable', - 'esgf_username', - 'esgf_password' - ]) - example_config_yaml = """ - - data_source: esgf - dataset_id: fake dataset id - variable: pr - esgf_username: my esgf username - esgf_password: my esgf password - - - data_source: esgf - """ - conf = yaml.load(example_config_yaml) - self.valid_esgf = conf[0] - self.invalid_esgf = conf[1] - - def test_valid_esgf_conf(self): - ret = parser._valid_dataset_config_data(self.valid_esgf) - self.assertTrue(ret) - - @patch('configuration_parsing.logger') - def test_invalid_esgf_conf(self, mock_logger): - parser._valid_dataset_config_data(self.invalid_esgf) - - present_keys = set(self.invalid_esgf.keys()) - missing_keys = self.required_esgf_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - -class TestDAPDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - self.required_dap_keys = set(['url', 'variable']) - example_config_yaml = """ - - data_source: dap - url: afakeurl.com - variable: pr - - - data_source: dap - """ - conf = yaml.load(example_config_yaml) - self.valid_dap = conf[0] - self.invalid_dap = conf[1] - - def test_valid_dap_config(self): - ret = parser._valid_dataset_config_data(self.valid_dap) - self.assertTrue(ret) - - @patch('configuration_parsing.logger') - def test_invalid_dap_config(self, mock_logger): - parser._valid_dataset_config_data(self.invalid_dap) - - present_keys = set(self.invalid_dap.keys()) - missing_keys = self.required_dap_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - -class ContourMapConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - valid_contour_config = """ - type: contour - results_indices: - - !!python/tuple [0, 0] - lats: - range_min: -20 - range_max: 20 - range_step: 1 - lons: - range_min: -20 - range_max: 20 - range_step: 1 - output_name: wrf_bias_compared_to_knmi - """ - self.valid_contour = yaml.load(valid_contour_config) - - missing_keys_contour_config = """ - type: contour - """ - self.missing_keys_contour = yaml.load(missing_keys_contour_config) - - self.required_contour_keys = set([ - 'results_indices', - 'lats', - 'lons', - 'output_name' - ]) - - def test_valid_contour(self): - ret = parser._valid_plot_config_data(self.valid_contour) - self.assertTrue(ret) - - @patch('configuration_parsing.logger') - def test_missing_keys_contour(self, mock_logger): - ret = parser._valid_plot_config_data(self.missing_keys_contour) - - present_keys = set(self.missing_keys_contour.keys()) - missing_keys = self.required_contour_keys - present_keys - missing = sorted(list(missing_keys)) - - err = ( - 'Plot config does not contain required keys. ' - 'The following keys are missing: {}' - ).format(', '.join(missing)) - mock_logger.error.assert_called_with(err) - - -class TestSubregionPlotConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - valid_subregion_config = """ - type: subregion - lats: - range_min: -20 - range_max: 20 - range_step: 1 - lons: - range_min: -20 - range_max: 20 - range_step: 1 - output_name: fake_plot_name - """ - self.valid_subregion = yaml.load(valid_subregion_config) - - missing_keys_subregion_config = """ - type: subregion - """ - self.missing_keys_subregion = yaml.load(missing_keys_subregion_config) - - self.required_subregion_keys = set([ - 'lats', - 'lons', - 'output_name' - ]) - - def test_valid_subregion(self): - ret = parser._valid_plot_config_data(self.valid_subregion) - self.assertTrue(ret) - - @patch('configuration_parsing.logger') - def test_missing_keys_subregion(self, mock_logger): - ret = parser._valid_plot_config_data(self.missing_keys_subregion) - - present_keys = set(self.missing_keys_subregion.keys()) - missing_keys = self.required_subregion_keys - present_keys - missing = sorted(list(missing_keys)) - - err = ( - 'Plot config does not contain required keys. ' - 'The following keys are missing: {}' - ).format(', '.join(missing)) - mock_logger.error.assert_called_with(err) - - -class TestInvalidPlotConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - bad_plot_type_config = """ - type: NotAPlotType - """ - self.bad_plot_type = yaml.load(bad_plot_type_config) - - missing_plot_type_config = """ - results_indices: - - !!python/tuple [0, 0] - lats: - range_min: -20 - range_max: 20 - range_step: 1 - lons: - range_min: -20 - range_max: 20 - range_step: 1 - output_name: wrf_bias_compared_to_knmi - """ - self.missing_plot_type = yaml.load(missing_plot_type_config) - - missing_subregions_for_plot_type = """ - datasets: - - blah - - metrics: - - blah - - plots: - - type: subregion - results_indices: - - !!python/tuple [0, 0] - lats: - range_min: -20 - range_max: 20 - range_step: 1 - lons: - range_min: -20 - range_max: 20 - range_step: 1 - output_name: wrf_bias_compared_to_knmi - """ - self.missing_subregions = yaml.load(missing_subregions_for_plot_type) - - @patch('configuration_parsing.logger') - def test_invalid_plot_type(self, mock_logger): - ret = parser._valid_plot_config_data(self.bad_plot_type) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Invalid plot type specified.' - ) - - @patch('configuration_parsing.logger') - def test_missing_plot_type(self, mock_logger): - ret = parser._valid_plot_config_data(self.missing_plot_type) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Plot config does not include a type attribute.' - ) - - @patch('configuration_parsing.logger') - def test_missing_subregion(self, mock_logger): - ret = parser._config_is_well_formed(self.missing_subregions) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Plot config that requires subregion information is present ' - 'in a config file without adequate subregion information ' - 'provided. Please ensure that you have properly supplied 1 or ' - 'more subregion config values.' - )
