Repository: climate Updated Branches: refs/heads/master 58703e0cb -> 6e73c8a51
CLIMATE-588 - Refactor config based evaluation Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/c90440b2 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/c90440b2 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/c90440b2 Branch: refs/heads/master Commit: c90440b2cd792ff74407468e951c99b091d4de14 Parents: 58703e0 Author: Michael Joyce <[email protected]> Authored: Fri Feb 27 09:08:41 2015 -0800 Committer: Michael Joyce <[email protected]> Committed: Fri Feb 27 09:11:26 2015 -0800 ---------------------------------------------------------------------- ocw-config-runner/configuration_parsing.py | 218 ++++++ ocw-config-runner/evaluation_creation.py | 132 ++++ ocw-config-runner/ocw_evaluation_from_config.py | 386 +--------- ocw-config-runner/plot_generation.py | 54 ++ ocw-config-runner/tests/test_config_parsing.py | 669 +++++++++++++++++ ocw-config-runner/tests/test_config_runner.py | 711 ------------------- .../tests/test_evaluation_creation.py | 33 + 7 files changed, 1109 insertions(+), 1094 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/configuration_parsing.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/configuration_parsing.py b/ocw-config-runner/configuration_parsing.py new file mode 100644 index 0000000..600d7ac --- /dev/null +++ b/ocw-config-runner/configuration_parsing.py @@ -0,0 +1,218 @@ +import logging +import re +import sys + +import ocw.metrics as metrics + +import yaml + +logging.basicConfig() +logger = logging.getLogger(__name__) + +def is_config_valid(config_data): + """ Validate supplied evaluation configuration data. + + :param config_data: Dictionary of the data parsed from the supplied YAML + configuration file. + :type config_data: :func:`dict` + + :returns: True if the configuration data is sufficient for an evaluation and + seems to be well formed, False otherwise. + """ + if not _valid_minimal_config(config_data): + logger.error('Insufficient configuration file data for an evaluation') + return False + + if not _config_is_well_formed(config_data): + logger.error('Configuration data is not well formed') + return False + + return True + +def _valid_minimal_config(config_data): + """""" + if not 'datasets' in config_data.keys(): + logger.error('No datasets specified in configuration data.') + return False + + if not 'metrics' in config_data.keys(): + logger.error('No metrics specified in configuration data.') + return False + + if _contains_unary_metrics(config_data['metrics']): + if (not 'reference' in config_data['datasets'].keys() and + not 'targets' in config_data['datasets'].keys()): + err = ( + 'Unary metric in configuration data requires either a reference ' + 'or target dataset to be present for evaluation. Please ensure ' + 'that your config is well formed.' + ) + logger.error(err) + return False + + if _contains_binary_metrics(config_data['metrics']): + if (not 'reference' in config_data['datasets'].keys() or + not 'targets' in config_data['datasets'].keys()): + logger.error( + 'Binary metric in configuration requires both a reference ' + 'and target dataset to be present for evaluation. Please ensure ' + 'that your config is well formed.' + ) + return False + + return True + +def _config_is_well_formed(config_data): + """""" + is_well_formed = True + + if 'reference' in config_data['datasets']: + if not _valid_dataset_config_data(config_data['datasets']['reference']): + is_well_formed = False + + if 'targets' in config_data['datasets']: + targets = config_data['datasets']['targets'] + if type(targets) != type(list()): + err = ( + 'Expected to find list of target datasets but instead found ' + 'object of type {}' + ).format(type(targets)) + logger.error(err) + is_well_formed = False + else: + for t in targets: + if not _valid_dataset_config_data(t): + is_well_formed = False + + available_metrics = _fetch_built_in_metrics() + for metric in config_data['metrics']: + if metric not in available_metrics: + warning = ( + 'Unable to locate metric name {} in built-in metrics. If this ' + 'is not a user defined metric then please check for potential ' + 'misspellings.' + ).format(metric) + logger.warn(warning) + is_well_formed = False + + if 'plots' in config_data: + for plot in config_data['plots']: + if not _valid_plot_config_data(plot): + is_well_formed = False + + return is_well_formed + +def _contains_unary_metrics(config_metric_data): + """""" + unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] + return any(metric in unarys for metric in config_metric_data) + +def _contains_binary_metrics(config_metric_data): + """""" + binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] + return any(metric in binarys for metric in config_metric_data) + +def _fetch_built_in_metrics(): + """""" + unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] + binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] + return unarys + binarys + +def _valid_dataset_config_data(dataset_config_data): + """""" + try: + data_source = dataset_config_data['data_source'] + except KeyError: + logger.error('Dataset does not contain a data_source attribute.') + return False + + if data_source == 'local': + required_keys = set(['data_source', 'file_count', 'path', 'variable']) + elif data_source == 'rcmed': + required_keys = set([ + 'dataset_id', + 'parameter_id', + 'min_lat', + 'max_lat', + 'min_lon', + 'max_lon', + 'start_time', + 'end_time', + ]) + elif data_source == 'esgf': + required_keys = set([ + 'data_source', + 'dataset_id', + 'variable', + 'esgf_username', + 'esgf_password' + ]) + elif data_source == 'dap': + required_keys = set({'url', 'variable'}) + else: + logger.error('Dataset does not contain a valid data_source location.') + return False + + present_keys = set(dataset_config_data.keys()) + missing_keys = required_keys - present_keys + contains_required = len(missing_keys) == 0 + + if contains_required: + if data_source == 'local' and dataset_config_data['file_count'] > 1: + # If the dataset is a multi-file dataset then we need to make sure + # that the file glob pattern is included. + if not 'file_glob_pattern' in dataset_config_data: + logger.error( + 'Multi-file local dataset is missing key: file_glob_pattern' + ) + return False + return True + else: + missing = sorted(list(missing_keys)) + logger.error( + 'Dataset does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + return False + +def _valid_plot_config_data(plot_config_data): + """""" + try: + plot_type = plot_config_data['type'] + except KeyError: + logger.error('Plot config does not include a type attribute.') + return False + + if plot_type == 'contour': + required_keys = set([ + 'results_indeces', + 'lats', + 'lons', + 'output_name' + ]) + elif plot_type == 'taylor': + logger.warn('Taylor diagrams are currently unsupported. Skipping validation') + elif plot_type == 'subregion': + logger.warn('Subregion plots are currently unsupported. Skipping validation') + elif plot_type == 'time_series': + logger.warn('Time series plots are currently unsupported. Skipping validation') + elif plot_type == 'portrait': + logger.warn('Portrait diagrams are currently unsupported. Skipping validation') + else: + logger.error('Invalid plot type specified.') + return False + + present_keys = set(plot_config_data.keys()) + missing_keys = required_keys - present_keys + contains_required = len(missing_keys) == 0 + + if not contains_required: + missing = sorted(list(missing_keys)) + logger.error( + 'Plot config does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + return False + + return True + http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/evaluation_creation.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/evaluation_creation.py b/ocw-config-runner/evaluation_creation.py new file mode 100644 index 0000000..66794d9 --- /dev/null +++ b/ocw-config-runner/evaluation_creation.py @@ -0,0 +1,132 @@ +import dateutil.parser +from datetime import timedelta +import logging + +from ocw.dataset import Bounds +from ocw.evaluation import Evaluation +import ocw.dataset_processor as dsp +import ocw.data_source.local as local +import ocw.data_source.rcmed as rcmed +import ocw.data_source.esgf as esgf +import ocw.data_source.dap as dap +import ocw.metrics as metrics + +import numpy as np + +logging.basicConfig() +logger = logging.getLogger(__name__) + +def generate_evaluation_from_config(config_data): + """ Generate an Evaluation object from configuration data. + + :param config_data: Dictionary of the data parsed from the supplied YAML + configuration file. + :type config_data: :func:`dict` + + :returns: An Evaluation object containing the data specified in the + supplied configuration data. + """ + # Load datasets + reference = None + targets = None + if 'reference' in config_data['datasets']: + reference = _load_dataset(config_data['datasets']['reference']) + + if 'targets' in config_data['datasets']: + targets = [_load_dataset(t) for t in config_data['datasets']['targets']] + + reference, targets = _prepare_datasets_for_evaluation(reference, + targets, + config_data) + # Load metrics + eval_metrics = [_load_metric(m)() for m in config_data['metrics']] + + return Evaluation(reference, targets, eval_metrics) + +def _load_dataset(dataset_config_data): + """""" + if dataset_config_data['data_source'] == 'local': + if dataset_config_data['file_count'] > 1: + logger.error( + 'Multi-file datasets are currently not supported. Cancelling load ' + 'of the following dataset: {}'.format(dataset_config_data) + ) + return None + + return local.load_file(dataset_config_data['path'], + dataset_config_data['variable'], + **dataset_config_data.get('optional_args', {})) + elif dataset_config_data['data_source'] == 'rcmed': + return rcmed.parameter_dataset(dataset_config_data['dataset_id'], + dataset_config_data['parameter_id'], + dataset_config_data['min_lat'], + dataset_config_data['max_lat'], + dataset_config_data['min_lon'], + dataset_config_data['min_lon'], + dataset_config_data['start_time'], + dataset_config_data['end_time'], + **dataset_config_data.get('optional_args', {})) + elif dataset_config_data['data_source'] == 'esgf': + return esgf.load_dataset(dataset_config_data['dataset_id'], + dataset_config_data['variable'], + dataset_config_data['esgf_username'], + dataset_config_data['esgf_password'], + **dataset_config_data.get('optional_args', {})) + elif dataset_config_data['data_source'] == 'dap': + return dap.load(dataset_config_data['url'], + dataset_config_data['variable'], + **dataset_config_data('optional_args', {})) + +def _prepare_datasets_for_evaluation(reference, targets, config_data): + """""" + subset = config_data['evaluation'].get('subset', None) + temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) + spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) + spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) + + if subset: + start = dateutil.parser.parse(subset[4]) + end = dateutil.parser.parse(subset[5]) + bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) + + if reference: + reference = dsp.safe_subset(bounds, reference) + + if targets: + targets = [dsp.safe_subset(bounds, t) for t in targets] + + if temporal_time_delta: + resolution = timedelta(temporal_time_delta) + + if reference: + reference = dsp.temporal_rebin(reference, resolution) + + if targets: + targets = [dsp.temporal_rebin(t, resolution) for t in targets] + + if spatial_regrid_lats and spatial_regrid_lons: + lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) + lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) + + if reference: + reference = dsp.spatial_regrid(reference, lats, lons) + + if targets: + targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] + + return reference, targets + +def _load_metric(metric_config_data): + """""" + # If the dataset is user defined outside of ocw.metrics we won't currently + # handle loading it. + if '.' in metric_config_data: + logger.error( + 'User-defined metrics outside of the ocw.metrics module ' + 'cannot currently be loaded. If you just wanted a metric ' + 'found in ocw.metrics then do not specify the full ' + 'package and module names. See the documentation for examples.' + ) + return None + + return getattr(metrics, metric_config_data) http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/ocw_evaluation_from_config.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/ocw_evaluation_from_config.py b/ocw-config-runner/ocw_evaluation_from_config.py index aba6fef..e27acc2 100644 --- a/ocw-config-runner/ocw_evaluation_from_config.py +++ b/ocw-config-runner/ocw_evaluation_from_config.py @@ -16,23 +16,12 @@ # under the License. import argparse -import dateutil.parser -from datetime import timedelta import logging -import re -import sys -from ocw.dataset import Bounds -from ocw.evaluation import Evaluation -import ocw.metrics as metrics -import ocw.plotter as plots -import ocw.dataset_processor as dsp -import ocw.data_source.local as local -import ocw.data_source.rcmed as rcmed -import ocw.data_source.esgf as esgf -import ocw.data_source.dap as dap +from configuration_parsing import is_config_valid +from evaluation_creation import generate_evaluation_from_config +from plot_generation import plot_from_config -import numpy as np import yaml logging.basicConfig() @@ -61,375 +50,6 @@ def run_evaluation_from_config(config_file_path): plot_from_config(evaluation, config) -def is_config_valid(config_data): - """ Validate supplied evaluation configuration data. - - :param config_data: Dictionary of the data parsed from the supplied YAML - configuration file. - :type config_data: :func:`dict` - - :returns: True if the configuration data is sufficient for an evaluation and - seems to be well formed, False otherwise. - """ - if not _valid_minimal_config(config_data): - logger.error('Insufficient configuration file data for an evaluation') - return False - - if not _config_is_well_formed(config_data): - logger.error('Configuration data is not well formed') - return False - - return True - -def generate_evaluation_from_config(config_data): - """ Generate an Evaluation object from configuration data. - - :param config_data: Dictionary of the data parsed from the supplied YAML - configuration file. - :type config_data: :func:`dict` - - :returns: An Evaluation object containing the data specified in the - supplied configuration data. - """ - # Load datasets - reference = None - targets = None - if 'reference' in config_data['datasets']: - reference = _load_dataset(config_data['datasets']['reference']) - - if 'targets' in config_data['datasets']: - targets = [_load_dataset(t) for t in config_data['datasets']['targets']] - - reference, targets = _prepare_datasets_for_evaluation(reference, - targets, - config_data) - # Load metrics - eval_metrics = [_load_metric(m)() for m in config_data['metrics']] - - return Evaluation(reference, targets, eval_metrics) - -def plot_from_config(evaluation, config_data): - """ Generate plots for an evaluation from configuration data. - - :param evaluation: The Evaluation for which to generate plots. - :type evaluation: :class:`ocw.evaluation.Evaluation` - :param config_data: Dictionary of the data parsed from the supplied YAML - configuration file. - :type: :func:`dict` - """ - for plot in config_data['plots']: - if plot['type'] == 'contour': - _draw_contour_plot(evaluation, plot) - elif plot['type'] == 'subregion': - logger.warn('Subregion plots are currently unsupported. Skipping ...') - continue - elif plot['type'] == 'taylor': - logger.warn('Taylor diagrams are currently unsupported. Skipping ...') - continue - elif plot['type'] == 'time_series': - logger.warn('Time series plots are currently unsupported. Skipping ...') - continue - elif plot['type'] == 'portrait': - logger.warn('Portrait diagrams are currently unsupported. Skipping ...') - continue - else: - logger.error('Unrecognized plot type requested: {}'.format(plot['type'])) - continue - -def _valid_minimal_config(config_data): - """""" - if not 'datasets' in config_data.keys(): - logger.error('No datasets specified in configuration data.') - return False - - if not 'metrics' in config_data.keys(): - logger.error('No metrics specified in configuration data.') - return False - - if _contains_unary_metrics(config_data['metrics']): - if (not 'reference' in config_data['datasets'].keys() and - not 'targets' in config_data['datasets'].keys()): - err = ( - 'Unary metric in configuration data requires either a reference ' - 'or target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - logger.error(err) - return False - - if _contains_binary_metrics(config_data['metrics']): - if (not 'reference' in config_data['datasets'].keys() or - not 'targets' in config_data['datasets'].keys()): - logger.error( - 'Binary metric in configuration requires both a reference ' - 'and target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - return False - - return True - -def _config_is_well_formed(config_data): - """""" - is_well_formed = True - - if 'reference' in config_data['datasets']: - if not _valid_dataset_config_data(config_data['datasets']['reference']): - is_well_formed = False - - if 'targets' in config_data['datasets']: - targets = config_data['datasets']['targets'] - if type(targets) != type(list()): - err = ( - 'Expected to find list of target datasets but instead found ' - 'object of type {}' - ).format(type(targets)) - logger.error(err) - is_well_formed = False - else: - for t in targets: - if not _valid_dataset_config_data(t): - is_well_formed = False - - available_metrics = _fetch_built_in_metrics() - for metric in config_data['metrics']: - if metric not in available_metrics: - warning = ( - 'Unable to locate metric name {} in built-in metrics. If this ' - 'is not a user defined metric then please check for potential ' - 'misspellings.' - ).format(metric) - logger.warn(warning) - is_well_formed = False - - if 'plots' in config_data: - for plot in config_data['plots']: - if not _valid_plot_config_data(plot): - is_well_formed = False - - return is_well_formed - -def _contains_unary_metrics(config_metric_data): - """""" - unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] - return any(metric in unarys for metric in config_metric_data) - -def _contains_binary_metrics(config_metric_data): - """""" - binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] - return any(metric in binarys for metric in config_metric_data) - -def _fetch_built_in_metrics(): - """""" - unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] - binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] - return unarys + binarys - -def _valid_dataset_config_data(dataset_config_data): - """""" - try: - data_source = dataset_config_data['data_source'] - except KeyError: - logger.error('Dataset does not contain a data_source attribute.') - return False - - if data_source == 'local': - required_keys = set(['data_source', 'file_count', 'path', 'variable']) - elif data_source == 'rcmed': - required_keys = set([ - 'dataset_id', - 'parameter_id', - 'min_lat', - 'max_lat', - 'min_lon', - 'max_lon', - 'start_time', - 'end_time', - ]) - elif data_source == 'esgf': - required_keys = set([ - 'data_source', - 'dataset_id', - 'variable', - 'esgf_username', - 'esgf_password' - ]) - elif data_source == 'dap': - required_keys = set({'url', 'variable'}) - else: - logger.error('Dataset does not contain a valid data_source location.') - return False - - present_keys = set(dataset_config_data.keys()) - missing_keys = required_keys - present_keys - contains_required = len(missing_keys) == 0 - - if contains_required: - if data_source == 'local' and dataset_config_data['file_count'] > 1: - # If the dataset is a multi-file dataset then we need to make sure - # that the file glob pattern is included. - if not 'file_glob_pattern' in dataset_config_data: - logger.error( - 'Multi-file local dataset is missing key: file_glob_pattern' - ) - return False - return True - else: - missing = sorted(list(missing_keys)) - logger.error( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - return False - -def _valid_plot_config_data(plot_config_data): - """""" - try: - plot_type = plot_config_data['type'] - except KeyError: - logger.error('Plot config does not include a type attribute.') - return False - - if plot_type == 'contour': - required_keys = set([ - 'results_indeces', - 'lats', - 'lons', - 'output_name' - ]) - elif plot_type == 'taylor': - logger.warn('Taylor diagrams are currently unsupported. Skipping validation') - elif plot_type == 'subregion': - logger.warn('Subregion plots are currently unsupported. Skipping validation') - elif plot_type == 'time_series': - logger.warn('Time series plots are currently unsupported. Skipping validation') - elif plot_type == 'portrait': - logger.warn('Portrait diagrams are currently unsupported. Skipping validation') - else: - logger.error('Invalid plot type specified.') - return False - - present_keys = set(plot_config_data.keys()) - missing_keys = required_keys - present_keys - contains_required = len(missing_keys) == 0 - - if not contains_required: - missing = sorted(list(missing_keys)) - logger.error( - 'Plot config does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - return False - - return True - -def _load_dataset(dataset_config_data): - """""" - if dataset_config_data['data_source'] == 'local': - if dataset_config_data['file_count'] > 1: - logger.error( - 'Multi-file datasets are currently not supported. Cancelling load ' - 'of the following dataset: {}'.format(dataset_config_data) - ) - return None - - return local.load_file(dataset_config_data['path'], - dataset_config_data['variable'], - **dataset_config_data.get('optional_args', {})) - elif dataset_config_data['data_source'] == 'rcmed': - return rcmed.parameter_dataset(dataset_config_data['dataset_id'], - dataset_config_data['parameter_id'], - dataset_config_data['min_lat'], - dataset_config_data['max_lat'], - dataset_config_data['min_lon'], - dataset_config_data['min_lon'], - dataset_config_data['start_time'], - dataset_config_data['end_time'], - **dataset_config_data.get('optional_args', {})) - elif dataset_config_data['data_source'] == 'esgf': - return esgf.load_dataset(dataset_config_data['dataset_id'], - dataset_config_data['variable'], - dataset_config_data['esgf_username'], - dataset_config_data['esgf_password'], - **dataset_config_data.get('optional_args', {})) - elif dataset_config_data['data_source'] == 'dap': - return dap.load(dataset_config_data['url'], - dataset_config_data['variable'], - **dataset_config_data('optional_args', {})) - -def _prepare_datasets_for_evaluation(reference, target, config_data): - """""" - subset = config_data['evaluation'].get('subset', None) - temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) - spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) - spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) - - if subset: - start = dateutil.parser.parse(subset[4]) - end = dateutil.parser.parse(subset[5]) - bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) - - if reference: - reference = dsp.safe_subset(bounds, reference) - - if targets: - targets = [dsp.safe_subset(bounds, t) for t in targets] - - if temporal_time_delta: - resolution = timedelta(temporal_time_delta) - - if reference: - reference = dsp.temporal_rebin(reference, resolution) - - if targets: - targets = [dsp.temporal_rebin(t, resolution) for t in targets] - - if spatial_regrid_lats and spatial_regrid_lons: - lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) - lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) - - if reference: - reference = dsp.spatial_regrid(reference, lats, lons) - - if targets: - targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] - - return reference, target - - -def _load_metric(metric_config_data): - """""" - # If the dataset is user defined outside of ocw.metrics we won't currently - # handle loading it. - if '.' in metric_config_data: - logger.error( - 'User-defined metrics outside of the ocw.metrics module ' - 'cannot currently be loaded. If you just wanted a metric ' - 'found in ocw.metrics then do not specify the full ' - 'package and module names. See the documentation for examples.' - ) - return None - - return getattr(metrics, metric_config_data) - -def _draw_contour_plot(evaluation, plot_config): - """""" - row, col = plot_config['results_indeces'][0] - - lats = plot_config['lats'] - if type(lats) != type(list): - lats = range(lats['range_min'], lats['range_max'], lats['range_step']) - - lons = plot_config['lons'] - if type(lons) != type(list): - lons = range(lons['range_min'], lons['range_max'], lons['range_step']) - - plots.draw_contour_map(evaluation.results[row][col], - np.array(lats), - np.array(lons), - plot_config['output_name'], - **plot_config.get('optional_args', {})) - if __name__ == '__main__': description = 'OCW Config Based Evaluation' epilog = 'Additional information at https://cwiki.apache.org/confluence/display/climate/home#' http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/plot_generation.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/plot_generation.py b/ocw-config-runner/plot_generation.py new file mode 100644 index 0000000..c802af2 --- /dev/null +++ b/ocw-config-runner/plot_generation.py @@ -0,0 +1,54 @@ +import logging + +import ocw.plotter as plots + +import numpy as np + +logging.basicConfig() +logger = logging.getLogger(__name__) + +def plot_from_config(evaluation, config_data): + """ Generate plots for an evaluation from configuration data. + + :param evaluation: The Evaluation for which to generate plots. + :type evaluation: :class:`ocw.evaluation.Evaluation` + :param config_data: Dictionary of the data parsed from the supplied YAML + configuration file. + :type: :func:`dict` + """ + for plot in config_data['plots']: + if plot['type'] == 'contour': + _draw_contour_plot(evaluation, plot) + elif plot['type'] == 'subregion': + logger.warn('Subregion plots are currently unsupported. Skipping ...') + continue + elif plot['type'] == 'taylor': + logger.warn('Taylor diagrams are currently unsupported. Skipping ...') + continue + elif plot['type'] == 'time_series': + logger.warn('Time series plots are currently unsupported. Skipping ...') + continue + elif plot['type'] == 'portrait': + logger.warn('Portrait diagrams are currently unsupported. Skipping ...') + continue + else: + logger.error('Unrecognized plot type requested: {}'.format(plot['type'])) + continue + +def _draw_contour_plot(evaluation, plot_config): + """""" + row, col = plot_config['results_indeces'][0] + + lats = plot_config['lats'] + if type(lats) != type(list): + lats = range(lats['range_min'], lats['range_max'], lats['range_step']) + + lons = plot_config['lons'] + if type(lons) != type(list): + lons = range(lons['range_min'], lons['range_max'], lons['range_step']) + + plots.draw_contour_map(evaluation.results[row][col], + np.array(lats), + np.array(lons), + plot_config['output_name'], + **plot_config.get('optional_args', {})) http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/tests/test_config_parsing.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/tests/test_config_parsing.py b/ocw-config-runner/tests/test_config_parsing.py new file mode 100644 index 0000000..d51bf43 --- /dev/null +++ b/ocw-config-runner/tests/test_config_parsing.py @@ -0,0 +1,669 @@ +from mock import patch +import unittest + +import configuration_parsing as parser +import ocw.metrics as metrics + +import yaml + + +class TestIsConfigValid(unittest.TestCase): + @classmethod + def setUpClass(self): + not_minimal_config = """ + datasets: + """ + self.not_minimal = yaml.load(not_minimal_config) + + not_well_formed_config = """ + datasets: + reference: + data_source: local + file_count: 1 + path: /a/fake/path/file.py + variable: pr + + targets: + - data_source: local + file_count: 5 + file_glob_pattern: something for globbing files here + variable: pr + optional_args: + name: Target1 + + - data_source: esgf + dataset_id: fake dataset id + variable: pr + esgf_username: my esgf username + esgf_password: my esgf password + + metrics: + - Bias + - TemporalStdDev + """ + self.not_well_formed = yaml.load(not_well_formed_config) + + @patch('configuration_parsing.logger') + def test_not_minimal_config(self, mock_logger): + ret = parser.is_config_valid(self.not_minimal) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'Insufficient configuration file data for an evaluation' + ) + + @patch('configuration_parsing.logger') + def test_not_valid_config(self, mock_logger): + ret = parser.is_config_valid(self.not_well_formed) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'Configuration data is not well formed' + ) + + +class TestValidMinimalConfig(unittest.TestCase): + @classmethod + def setUpClass(self): + no_datasets_config = """ + metrics: + - Bias + """ + self.no_datasets = yaml.load(no_datasets_config) + + no_metrics_config = """ + datasets: + reference: + data_source: dap + url: afakeurl.com + variable: pr + """ + self.no_metrics = yaml.load(no_metrics_config) + + unary_with_reference_config = """ + datasets: + reference: + data_source: dap + url: afakeurl.com + variable: pr + + metrics: + - TemporalStdDev + """ + self.unary_with_reference = yaml.load(unary_with_reference_config) + + unary_with_target_config = """ + datasets: + targets: + - data_source: dap + url: afakeurl.com + variable: pr + + metrics: + - TemporalStdDev + """ + self.unary_with_target = yaml.load(unary_with_target_config) + + unary_no_reference_or_target = """ + datasets: + not_ref_or_target: + - data_source: dap + url: afakeurl.com + variable: pr + + metrics: + - TemporalStdDev + """ + self.unary_no_ref_or_target = yaml.load(unary_no_reference_or_target) + + binary_valid_config = """ + datasets: + reference: + data_source: dap + url: afakeurl.com + variable: pr + + targets: + - data_source: dap + url: afakeurl.com + variable: pr + metrics: + - Bias + """ + self.binary_valid = yaml.load(binary_valid_config) + + binary_no_reference_config = """ + datasets: + targets: + - data_source: dap + url: afakeurl.com + variable: pr + metrics: + - Bias + """ + self.binary_no_reference = yaml.load(binary_no_reference_config) + + binary_no_target_config = """ + datasets: + reference: + data_source: dap + url: afakeurl.com + variable: pr + + metrics: + - Bias + """ + self.binary_no_target = yaml.load(binary_no_target_config) + + @patch('configuration_parsing.logger') + def test_no_datasets(self, mock_logger): + ret = parser._valid_minimal_config(self.no_datasets) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'No datasets specified in configuration data.' + ) + + @patch('configuration_parsing.logger') + def test_no_metrics(self, mock_logger): + ret = parser._valid_minimal_config(self.no_metrics) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'No metrics specified in configuration data.' + ) + + def test_unary_with_reference(self): + ret = parser._valid_minimal_config(self.unary_with_reference) + self.assertTrue(ret) + + def test_unary_with_target(self): + ret = parser._valid_minimal_config(self.unary_with_target) + self.assertTrue(ret) + + @patch('configuration_parsing.logger') + def test_unary_no_datasets(self, mock_logger): + ret = parser._valid_minimal_config(self.unary_no_ref_or_target) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'Unary metric in configuration data requires either a reference ' + 'or target dataset to be present for evaluation. Please ensure ' + 'that your config is well formed.' + ) + + def test_valid_binary(self): + ret = parser._valid_minimal_config(self.binary_valid) + self.assertTrue(ret) + + @patch('configuration_parsing.logger') + def test_binary_no_reference(self, mock_logger): + ret = parser._valid_minimal_config(self.binary_no_reference) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'Binary metric in configuration requires both a reference ' + 'and target dataset to be present for evaluation. Please ensure ' + 'that your config is well formed.' + ) + + @patch('configuration_parsing.logger') + def test_binary_no_target(self, mock_logger): + ret = parser._valid_minimal_config(self.binary_no_target) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'Binary metric in configuration requires both a reference ' + 'and target dataset to be present for evaluation. Please ensure ' + 'that your config is well formed.' + ) + + +class TestConfigIsWellFormed(unittest.TestCase): + @classmethod + def setUpClass(self): + malformed_reference_config = """ + datasets: + reference: + data_source: notavalidlocation + + metrics: + - Bias + """ + self.malformed_reference_conf = yaml.load(malformed_reference_config) + + malformed_target_list_config = """ + datasets: + targets: + notalist: + a_key: a_value + + alsonotalist: + a_key: a_value + + metrics: + - Bias + """ + self.malformed_target_list = yaml.load(malformed_target_list_config) + + missing_metric_name_config = """ + datasets: + reference: + data_source: dap + url: afakeurl.com + variable: pr + + metrics: + - NotABuiltInMetric + """ + self.missing_metric_name = yaml.load(missing_metric_name_config) + + bad_plot_config = """ + datasets: + reference: + data_source: dap + url: afakeurl.com + variable: pr + + metrics: + - Bias + + plots: + - type: NotARealPlotName + """ + bad_plot = yaml.load(bad_plot_config) + + def test_malformed_reference_config(self): + ret = parser._config_is_well_formed(self.malformed_reference_conf) + self.assertFalse(ret) + + @patch('configuration_parsing.logger') + def test_malformed_target_dataset_list(self, mock_logger): + ret = parser._config_is_well_formed(self.malformed_target_list) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + "Expected to find list of target datasets but instead found " + "object of type <type 'dict'>" + ) + + def test_not_builtin_metric(self): + ret = parser._config_is_well_formed(self.missing_metric_name) + self.assertFalse(ret) + + @patch('configuration_parsing.logger') + def test_warns_regarding_not_builtin_metric(self, mock_logger): + ret = parser._config_is_well_formed(self.missing_metric_name) + mock_logger.warn.assert_called_with( + 'Unable to locate metric name NotABuiltInMetric in built-in ' + 'metrics. If this is not a user defined metric then please check ' + 'for potential misspellings.' + ) + + def test_bad_plot_config(self): + ret = parser._config_is_well_formed(self.missing_metric_name) + self.assertFalse(ret) + + +class MetricFetchTest(unittest.TestCase): + @classmethod + def setUpClass(self): + binary_config = """ + metrics: + - Bias + - StdDevRatio + """ + unary_config = """ + metrics: + - TemporalStdDev + """ + self.unary_conf = yaml.load(unary_config) + self.binary_conf = yaml.load(binary_config) + + def test_contains_binary_metric(self): + ret = parser._contains_binary_metrics(self.binary_conf['metrics']) + self.assertTrue(ret) + + def test_does_not_contain_binary_metric(self): + ret = parser._contains_binary_metrics(self.unary_conf['metrics']) + self.assertFalse(ret) + + def test_contains_unary_metric(self): + ret = parser._contains_unary_metrics(self.unary_conf['metrics']) + self.assertTrue(ret) + + def test_does_not_contain_unary_metric(self): + ret = parser._contains_unary_metrics(self.binary_conf['metrics']) + self.assertFalse(ret) + + +class InvalidDatasetConfig(unittest.TestCase): + @classmethod + def setUpClass(self): + example_config_yaml = """ + - file_count: 1 + path: /a/fake/path + variable: pr + + - data_source: invalid_location_identifier + """ + conf = yaml.load(example_config_yaml) + self.missing_data_source = conf[0] + self.invalid_data_source = conf[1] + + @patch('configuration_parsing.logger') + def test_missing_data_source_config(self, mock_logger): + parser._valid_dataset_config_data(self.missing_data_source) + mock_logger.error.assert_called_with( + 'Dataset does not contain a data_source attribute.' + ) + + @patch('configuration_parsing.logger') + def test_invalid_data_source(self, mock_logger): + parser._valid_dataset_config_data(self.invalid_data_source) + mock_logger.error.assert_called_with( + 'Dataset does not contain a valid data_source location.' + ) + + +class TestLocalDatasetConfig(unittest.TestCase): + @classmethod + def setUpClass(self): + self.required_local_keys = set(['data_source', 'file_count', 'path', 'variable']) + example_config_yaml = """ + - data_source: local + file_count: 1 + path: /a/fake/path + variable: pr + optional_args: + name: Target1 + + - data_source: local + + - data_source: local + file_count: 5 + file_glob_pattern: something for globbing files here + variable: pr + path: /a/fake/path + optional_args: + name: Target1 + + - data_source: local + file_count: 5 + variable: pr + path: /a/fake/path + """ + + conf = yaml.load(example_config_yaml) + self.valid_local_single = conf[0] + self.invalid_local_single = conf[1] + self.valid_local_multi = conf[2] + self.invalid_local_multi = conf[1] + self.invalid_local_multi_file_glob = conf[3] + + def test_valid_local_config_single_file(self): + ret = parser._valid_dataset_config_data(self.valid_local_single) + self.assertTrue(ret) + + def test_valid_local_config_multi_file(self): + ret = parser._valid_dataset_config_data(self.valid_local_multi) + self.assertTrue(ret) + + @patch('configuration_parsing.logger') + def test_invalid_local_config(self, mock_logger): + parser._valid_dataset_config_data(self.invalid_local_single) + + present_keys = set(self.invalid_local_single.keys()) + missing_keys = self.required_local_keys - present_keys + missing = sorted(list(missing_keys)) + + error = ( + 'Dataset does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + mock_logger.error.assert_called_with(error) + + @patch('configuration_parsing.logger') + def test_invalid_local_config_multi_file(self, mock_logger): + # mutlifile config is handled slightly differently. We should see the + # same missing keys in this situation as we would on the single file + # local config. We will test for a missing file_glob_pattern in a + # different test. + parser._valid_dataset_config_data(self.invalid_local_multi) + + present_keys = set(self.invalid_local_multi.keys()) + missing_keys = self.required_local_keys - present_keys + missing = sorted(list(missing_keys)) + + error = ( + 'Dataset does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + mock_logger.error.assert_called_with(error) + + @patch('configuration_parsing.logger') + def test_invalid_local_config_multi_file_missing_file_glob(self, mock_logger): + # We can't check for the file_glob_pattern pattern until after we have + # verified that the single local file config has been met. + parser._valid_dataset_config_data(self.invalid_local_multi_file_glob) + + mock_logger.error.assert_called_with( + 'Multi-file local dataset is missing key: file_glob_pattern' + ) + + +class TestRCMEDDatasetConfig(unittest.TestCase): + @classmethod + def setUpClass(self): + self.required_rcmed_keys = set([ + 'dataset_id', + 'parameter_id', + 'min_lat', + 'max_lat', + 'min_lon', + 'max_lon', + 'start_time', + 'end_time' + ]) + example_config_yaml = """ + - data_source: rcmed + dataset_id: 4 + parameter_id: 4 + min_lat: -40 + max_lat: 40 + min_lon: -50 + max_lon: 50 + start_time: YYYY-MM-DDThh:mm:ss + end_time: YYYY-MM-DDThh:mm:ss + + - data_source: rcmed + """ + conf = yaml.load(example_config_yaml) + self.valid_rcmed = conf[0] + self.invalid_rcmed = conf[1] + + def test_valid_rcmed_config(self): + ret = config_runner._valid_dataset_config_data(self.valid_rcmed) + self.assertTrue(ret) + + @patch('ocw_evaluation_from_config.logger') + def test_invalid_rcmed_config(self, mock_logger): + config_runner._valid_dataset_config_data(self.invalid_rcmed) + + present_keys = set(self.invalid_rcmed.keys()) + missing_keys = self.required_rcmed_keys - present_keys + missing = sorted(list(missing_keys)) + + error = ( + 'Dataset does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + mock_logger.error.assert_called_with(error) + + +class TestESGFDatasetConfig(unittest.TestCase): + @classmethod + def setUpClass(self): + self.required_esgf_keys = set([ + 'data_source', + 'dataset_id', + 'variable', + 'esgf_username', + 'esgf_password' + ]) + example_config_yaml = """ + - data_source: esgf + dataset_id: fake dataset id + variable: pr + esgf_username: my esgf username + esgf_password: my esgf password + + - data_source: esgf + """ + conf = yaml.load(example_config_yaml) + self.valid_esgf = conf[0] + self.invalid_esgf = conf[1] + + def test_valid_esgf_conf(self): + ret = parser._valid_dataset_config_data(self.valid_esgf) + self.assertTrue(ret) + + @patch('configuration_parsing.logger') + def test_invalid_esgf_conf(self, mock_logger): + parser._valid_dataset_config_data(self.invalid_esgf) + + present_keys = set(self.invalid_esgf.keys()) + missing_keys = self.required_esgf_keys - present_keys + missing = sorted(list(missing_keys)) + + error = ( + 'Dataset does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + mock_logger.error.assert_called_with(error) + + +class TestDAPDatasetConfig(unittest.TestCase): + @classmethod + def setUpClass(self): + self.required_dap_keys = set(['url', 'variable']) + example_config_yaml = """ + - data_source: dap + url: afakeurl.com + variable: pr + + - data_source: dap + """ + conf = yaml.load(example_config_yaml) + self.valid_dap = conf[0] + self.invalid_dap = conf[1] + + def test_valid_dap_config(self): + ret = parser._valid_dataset_config_data(self.valid_dap) + self.assertTrue(ret) + + @patch('configuration_parsing.logger') + def test_invalid_dap_config(self, mock_logger): + parser._valid_dataset_config_data(self.invalid_dap) + + present_keys = set(self.invalid_dap.keys()) + missing_keys = self.required_dap_keys - present_keys + missing = sorted(list(missing_keys)) + + error = ( + 'Dataset does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + mock_logger.error.assert_called_with(error) + + +class ContourMapConfig(unittest.TestCase): + @classmethod + def setUpClass(self): + valid_contour_config = """ + type: contour + results_indeces: + - !!python/tuple [0, 0] + lats: + range_min: -20 + range_max: 20 + range_step: 1 + lons: + range_min: -20 + range_max: 20 + range_step: 1 + output_name: wrf_bias_compared_to_knmi + """ + self.valid_contour = yaml.load(valid_contour_config) + + missing_keys_contour_config = """ + type: contour + """ + self.missing_keys_contour = yaml.load(missing_keys_contour_config) + + self.required_contour_keys = set([ + 'results_indeces', + 'lats', + 'lons', + 'output_name' + ]) + + def test_valid_contour(self): + ret = parser._valid_plot_config_data(self.valid_contour) + self.assertTrue(ret) + + @patch('configuration_parsing.logger') + def test_missing_keys_contour(self, mock_logger): + ret = parser._valid_plot_config_data(self.missing_keys_contour) + + present_keys = set(self.missing_keys_contour.keys()) + missing_keys = self.required_contour_keys - present_keys + missing = sorted(list(missing_keys)) + + err = ( + 'Plot config does not contain required keys. ' + 'The following keys are missing: {}' + ).format(', '.join(missing)) + mock_logger.error.assert_called_with(err) + + +class TestInvalidPlotConfig(unittest.TestCase): + @classmethod + def setUpClass(self): + bad_plot_type_config = """ + type: NotAPlotType + """ + self.bad_plot_type = yaml.load(bad_plot_type_config) + + missing_plot_type_config = """ + results_indeces: + - !!python/tuple [0, 0] + lats: + range_min: -20 + range_max: 20 + range_step: 1 + lons: + range_min: -20 + range_max: 20 + range_step: 1 + output_name: wrf_bias_compared_to_knmi + """ + self.missing_plot_type = yaml.load(missing_plot_type_config) + + @patch('configuration_parsing.logger') + def test_invalid_plot_type(self, mock_logger): + ret = parser._valid_plot_config_data(self.bad_plot_type) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'Invalid plot type specified.' + ) + + @patch('configuration_parsing.logger') + def test_missing_plot_type(self, mock_logger): + ret = parser._valid_plot_config_data(self.missing_plot_type) + self.assertFalse(ret) + + mock_logger.error.assert_called_with( + 'Plot config does not include a type attribute.' + ) http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/tests/test_config_runner.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/tests/test_config_runner.py b/ocw-config-runner/tests/test_config_runner.py deleted file mode 100644 index 993971e..0000000 --- a/ocw-config-runner/tests/test_config_runner.py +++ /dev/null @@ -1,711 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from mock import patch -import unittest - -import ocw_evaluation_from_config as config_runner -import ocw.metrics - -import yaml - -class TestMetricLoad(unittest.TestCase): - def test_valid_metric_load(self): - config = yaml.load(""" - metrics: - - Bias - """) - loaded_metrics = [config_runner._load_metric(m)() - for m in config['metrics']] - self.assertTrue(isinstance(loaded_metrics[0], ocw.metrics.Bias)) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_metric_load(self, mock_logger): - config = yaml.load(""" - metrics: - - ocw.metrics.Bias - """) - config_runner._load_metric(config['metrics'][0]) - error = ( - 'User-defined metrics outside of the ocw.metrics module ' - 'cannot currently be loaded. If you just wanted a metric ' - 'found in ocw.metrics then do not specify the full ' - 'package and module names. See the documentation for examples.' - ) - mock_logger.error.assert_called_with(error) - - -class TestRCMEDDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - self.required_rcmed_keys = set([ - 'dataset_id', - 'parameter_id', - 'min_lat', - 'max_lat', - 'min_lon', - 'max_lon', - 'start_time', - 'end_time' - ]) - example_config_yaml = """ - - data_source: rcmed - dataset_id: 4 - parameter_id: 4 - min_lat: -40 - max_lat: 40 - min_lon: -50 - max_lon: 50 - start_time: YYYY-MM-DDThh:mm:ss - end_time: YYYY-MM-DDThh:mm:ss - - - data_source: rcmed - """ - conf = yaml.load(example_config_yaml) - self.valid_rcmed = conf[0] - self.invalid_rcmed = conf[1] - - def test_valid_rcmed_config(self): - ret = config_runner._valid_dataset_config_data(self.valid_rcmed) - self.assertTrue(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_rcmed_config(self, mock_logger): - config_runner._valid_dataset_config_data(self.invalid_rcmed) - - present_keys = set(self.invalid_rcmed.keys()) - missing_keys = self.required_rcmed_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - -class TestLocalDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - self.required_local_keys = set(['data_source', 'file_count', 'path', 'variable']) - example_config_yaml = """ - - data_source: local - file_count: 1 - path: /a/fake/path - variable: pr - optional_args: - name: Target1 - - - data_source: local - - - data_source: local - file_count: 5 - file_glob_pattern: something for globbing files here - variable: pr - path: /a/fake/path - optional_args: - name: Target1 - - - data_source: local - file_count: 5 - variable: pr - path: /a/fake/path - """ - - conf = yaml.load(example_config_yaml) - self.valid_local_single = conf[0] - self.invalid_local_single = conf[1] - self.valid_local_multi = conf[2] - self.invalid_local_multi = conf[1] - self.invalid_local_multi_file_glob = conf[3] - - def test_valid_local_config_single_file(self): - ret = config_runner._valid_dataset_config_data(self.valid_local_single) - self.assertTrue(ret) - - def test_valid_local_config_multi_file(self): - ret = config_runner._valid_dataset_config_data(self.valid_local_multi) - self.assertTrue(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_local_config(self, mock_logger): - config_runner._valid_dataset_config_data(self.invalid_local_single) - - present_keys = set(self.invalid_local_single.keys()) - missing_keys = self.required_local_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_local_config_multi_file(self, mock_logger): - # mutlifile config is handled slightly differently. We should see the - # same missing keys in this situation as we would on the single file - # local config. We will test for a missing file_glob_pattern in a - # different test. - config_runner._valid_dataset_config_data(self.invalid_local_multi) - - present_keys = set(self.invalid_local_multi.keys()) - missing_keys = self.required_local_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_local_config_multi_file_missing_file_glob(self, mock_logger): - # We can't check for the file_glob_pattern pattern until after we have - # verified that the single local file config has been met. - config_runner._valid_dataset_config_data(self.invalid_local_multi_file_glob) - - mock_logger.error.assert_called_with( - 'Multi-file local dataset is missing key: file_glob_pattern' - ) - - -class TestESGFDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - self.required_esgf_keys = set([ - 'data_source', - 'dataset_id', - 'variable', - 'esgf_username', - 'esgf_password' - ]) - example_config_yaml = """ - - data_source: esgf - dataset_id: fake dataset id - variable: pr - esgf_username: my esgf username - esgf_password: my esgf password - - - data_source: esgf - """ - conf = yaml.load(example_config_yaml) - self.valid_esgf = conf[0] - self.invalid_esgf = conf[1] - - def test_valid_esgf_conf(self): - ret = config_runner._valid_dataset_config_data(self.valid_esgf) - self.assertTrue(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_esgf_conf(self, mock_logger): - config_runner._valid_dataset_config_data(self.invalid_esgf) - - present_keys = set(self.invalid_esgf.keys()) - missing_keys = self.required_esgf_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - -class TestDAPDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - self.required_dap_keys = set(['url', 'variable']) - example_config_yaml = """ - - data_source: dap - url: afakeurl.com - variable: pr - - - data_source: dap - """ - conf = yaml.load(example_config_yaml) - self.valid_dap = conf[0] - self.invalid_dap = conf[1] - - def test_valid_dap_config(self): - ret = config_runner._valid_dataset_config_data(self.valid_dap) - self.assertTrue(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_dap_config(self, mock_logger): - config_runner._valid_dataset_config_data(self.invalid_dap) - - present_keys = set(self.invalid_dap.keys()) - missing_keys = self.required_dap_keys - present_keys - missing = sorted(list(missing_keys)) - - error = ( - 'Dataset does not contain required keys. ' - 'The following keys are missing: {}'.format(', '.join(missing)) - ) - mock_logger.error.assert_called_with(error) - - -class InvalidDatasetConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - example_config_yaml = """ - - file_count: 1 - path: /a/fake/path - variable: pr - - - data_source: invalid_location_identifier - """ - conf = yaml.load(example_config_yaml) - self.missing_data_source = conf[0] - self.invalid_data_source = conf[1] - - @patch('ocw_evaluation_from_config.logger') - def test_missing_data_source_config(self, mock_logger): - config_runner._valid_dataset_config_data(self.missing_data_source) - mock_logger.error.assert_called_with( - 'Dataset does not contain a data_source attribute.' - ) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_data_source(self, mock_logger): - config_runner._valid_dataset_config_data(self.invalid_data_source) - mock_logger.error.assert_called_with( - 'Dataset does not contain a valid data_source location.' - ) - - -class MetricFetchTest(unittest.TestCase): - @classmethod - def setUpClass(self): - binary_config = """ - metrics: - - Bias - - StdDevRatio - """ - unary_config = """ - metrics: - - TemporalStdDev - """ - self.unary_conf = yaml.load(unary_config) - self.binary_conf = yaml.load(binary_config) - - def test_contains_binary_metric(self): - ret = config_runner._contains_binary_metrics(self.binary_conf['metrics']) - self.assertTrue(ret) - - def test_does_not_contain_binary_metric(self): - ret = config_runner._contains_binary_metrics(self.unary_conf['metrics']) - self.assertFalse(ret) - - def test_contains_unary_metric(self): - ret = config_runner._contains_unary_metrics(self.unary_conf['metrics']) - self.assertTrue(ret) - - def test_does_not_contain_unary_metric(self): - ret = config_runner._contains_unary_metrics(self.binary_conf['metrics']) - self.assertFalse(ret) - - -class ContourMapConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - valid_contour_config = """ - type: contour - results_indeces: - - !!python/tuple [0, 0] - lats: - range_min: -20 - range_max: 20 - range_step: 1 - lons: - range_min: -20 - range_max: 20 - range_step: 1 - output_name: wrf_bias_compared_to_knmi - """ - self.valid_contour = yaml.load(valid_contour_config) - - missing_keys_contour_config = """ - type: contour - """ - self.missing_keys_contour = yaml.load(missing_keys_contour_config) - - self.required_contour_keys = set([ - 'results_indeces', - 'lats', - 'lons', - 'output_name' - ]) - - def test_valid_contour(self): - ret = config_runner._valid_plot_config_data(self.valid_contour) - self.assertTrue(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_missing_keys_contour(self, mock_logger): - ret = config_runner._valid_plot_config_data(self.missing_keys_contour) - - present_keys = set(self.missing_keys_contour.keys()) - missing_keys = self.required_contour_keys - present_keys - missing = sorted(list(missing_keys)) - - err = ( - 'Plot config does not contain required keys. ' - 'The following keys are missing: {}' - ).format(', '.join(missing)) - mock_logger.error.assert_called_with(err) - - -class TestInvalidPlotConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - bad_plot_type_config = """ - type: NotAPlotType - """ - self.bad_plot_type = yaml.load(bad_plot_type_config) - - missing_plot_type_config = """ - results_indeces: - - !!python/tuple [0, 0] - lats: - range_min: -20 - range_max: 20 - range_step: 1 - lons: - range_min: -20 - range_max: 20 - range_step: 1 - output_name: wrf_bias_compared_to_knmi - """ - self.missing_plot_type = yaml.load(missing_plot_type_config) - - @patch('ocw_evaluation_from_config.logger') - def test_invalid_plot_type(self, mock_logger): - ret = config_runner._valid_plot_config_data(self.bad_plot_type) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Invalid plot type specified.' - ) - - @patch('ocw_evaluation_from_config.logger') - def test_missing_plot_type(self, mock_logger): - ret = config_runner._valid_plot_config_data(self.missing_plot_type) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Plot config does not include a type attribute.' - ) - - -class TestValidMinimalConfig(unittest.TestCase): - @classmethod - def setUpClass(self): - no_datasets_config = """ - metrics: - - Bias - """ - self.no_datasets = yaml.load(no_datasets_config) - - no_metrics_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - """ - self.no_metrics = yaml.load(no_metrics_config) - - unary_with_reference_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - TemporalStdDev - """ - self.unary_with_reference = yaml.load(unary_with_reference_config) - - unary_with_target_config = """ - datasets: - targets: - - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - TemporalStdDev - """ - self.unary_with_target = yaml.load(unary_with_target_config) - - unary_no_reference_or_target = """ - datasets: - not_ref_or_target: - - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - TemporalStdDev - """ - self.unary_no_ref_or_target = yaml.load(unary_no_reference_or_target) - - binary_valid_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - targets: - - data_source: dap - url: afakeurl.com - variable: pr - metrics: - - Bias - """ - self.binary_valid = yaml.load(binary_valid_config) - - binary_no_reference_config = """ - datasets: - targets: - - data_source: dap - url: afakeurl.com - variable: pr - metrics: - - Bias - """ - self.binary_no_reference = yaml.load(binary_no_reference_config) - - binary_no_target_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - Bias - """ - self.binary_no_target = yaml.load(binary_no_target_config) - - @patch('ocw_evaluation_from_config.logger') - def test_no_datasets(self, mock_logger): - ret = config_runner._valid_minimal_config(self.no_datasets) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'No datasets specified in configuration data.' - ) - - @patch('ocw_evaluation_from_config.logger') - def test_no_metrics(self, mock_logger): - ret = config_runner._valid_minimal_config(self.no_metrics) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'No metrics specified in configuration data.' - ) - - def test_unary_with_reference(self): - ret = config_runner._valid_minimal_config(self.unary_with_reference) - self.assertTrue(ret) - - def test_unary_with_target(self): - ret = config_runner._valid_minimal_config(self.unary_with_target) - self.assertTrue(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_unary_no_datasets(self, mock_logger): - ret = config_runner._valid_minimal_config(self.unary_no_ref_or_target) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Unary metric in configuration data requires either a reference ' - 'or target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - - def test_valid_binary(self): - ret = config_runner._valid_minimal_config(self.binary_valid) - self.assertTrue(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_binary_no_reference(self, mock_logger): - ret = config_runner._valid_minimal_config(self.binary_no_reference) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Binary metric in configuration requires both a reference ' - 'and target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - - @patch('ocw_evaluation_from_config.logger') - def test_binary_no_target(self, mock_logger): - ret = config_runner._valid_minimal_config(self.binary_no_target) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Binary metric in configuration requires both a reference ' - 'and target dataset to be present for evaluation. Please ensure ' - 'that your config is well formed.' - ) - - -class TestIsConfigValid(unittest.TestCase): - @classmethod - def setUpClass(self): - not_minimal_config = """ - datasets: - """ - self.not_minimal = yaml.load(not_minimal_config) - - not_well_formed_config = """ - datasets: - reference: - data_source: local - file_count: 1 - path: /a/fake/path/file.py - variable: pr - - targets: - - data_source: local - file_count: 5 - file_glob_pattern: something for globbing files here - variable: pr - optional_args: - name: Target1 - - - data_source: esgf - dataset_id: fake dataset id - variable: pr - esgf_username: my esgf username - esgf_password: my esgf password - - metrics: - - Bias - - TemporalStdDev - """ - self.not_well_formed = yaml.load(not_well_formed_config) - - @patch('ocw_evaluation_from_config.logger') - def test_not_minimal_config(self, mock_logger): - ret = config_runner.is_config_valid(self.not_minimal) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Insufficient configuration file data for an evaluation' - ) - - @patch('ocw_evaluation_from_config.logger') - def test_not_valid_config(self, mock_logger): - ret = config_runner.is_config_valid(self.not_well_formed) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - 'Configuration data is not well formed' - ) - - -class TestConfigIsWellFormed(unittest.TestCase): - @classmethod - def setUpClass(self): - malformed_reference_config = """ - datasets: - reference: - data_source: notavalidlocation - - metrics: - - Bias - """ - self.malformed_reference_conf = yaml.load(malformed_reference_config) - - malformed_target_list_config = """ - datasets: - targets: - notalist: - a_key: a_value - - alsonotalist: - a_key: a_value - - metrics: - - Bias - """ - self.malformed_target_list = yaml.load(malformed_target_list_config) - - missing_metric_name_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - NotABuiltInMetric - """ - self.missing_metric_name = yaml.load(missing_metric_name_config) - - bad_plot_config = """ - datasets: - reference: - data_source: dap - url: afakeurl.com - variable: pr - - metrics: - - Bias - - plots: - - type: NotARealPlotName - """ - bad_plot = yaml.load(bad_plot_config) - - def test_malformed_reference_config(self): - ret = config_runner._config_is_well_formed(self.malformed_reference_conf) - self.assertFalse(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_malformed_target_dataset_list(self, mock_logger): - ret = config_runner._config_is_well_formed(self.malformed_target_list) - self.assertFalse(ret) - - mock_logger.error.assert_called_with( - "Expected to find list of target datasets but instead found " - "object of type <type 'dict'>" - ) - - def test_not_builtin_metric(self): - ret = config_runner._config_is_well_formed(self.missing_metric_name) - self.assertFalse(ret) - - @patch('ocw_evaluation_from_config.logger') - def test_warns_regarding_not_builtin_metric(self, mock_logger): - ret = config_runner._config_is_well_formed(self.missing_metric_name) - mock_logger.warn.assert_called_with( - 'Unable to locate metric name NotABuiltInMetric in built-in ' - 'metrics. If this is not a user defined metric then please check ' - 'for potential misspellings.' - ) - - def test_bad_plot_config(self): - ret = config_runner._config_is_well_formed(self.missing_metric_name) - self.assertFalse(ret) http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/tests/test_evaluation_creation.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/tests/test_evaluation_creation.py b/ocw-config-runner/tests/test_evaluation_creation.py new file mode 100644 index 0000000..6fc79f8 --- /dev/null +++ b/ocw-config-runner/tests/test_evaluation_creation.py @@ -0,0 +1,33 @@ +from mock import patch +import unittest + +import evaluation_creation as eval_create +import ocw.metrics + +import yaml + + +class TestMetricLoad(unittest.TestCase): + def test_valid_metric_load(self): + config = yaml.load(""" + metrics: + - Bias + """) + loaded_metrics = [eval_create._load_metric(m)() + for m in config['metrics']] + self.assertTrue(isinstance(loaded_metrics[0], ocw.metrics.Bias)) + + @patch('evaluation_creation.logger') + def test_invalid_metric_load(self, mock_logger): + config = yaml.load(""" + metrics: + - ocw.metrics.Bias + """) + eval_create._load_metric(config['metrics'][0]) + error = ( + 'User-defined metrics outside of the ocw.metrics module ' + 'cannot currently be loaded. If you just wanted a metric ' + 'found in ocw.metrics then do not specify the full ' + 'package and module names. See the documentation for examples.' + ) + mock_logger.error.assert_called_with(error)
