http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/tests/test_config_writer.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/tests/test_config_writer.py b/ocw-config-runner/tests/test_config_writer.py deleted file mode 100644 index f163989..0000000 --- a/ocw-config-runner/tests/test_config_writer.py +++ /dev/null @@ -1,768 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from mock import patch -import os -import unittest - -from ocw.dataset import Dataset, Bounds -from ocw.evaluation import Evaluation -import ocw.metrics as metrics -import configuration_writer as writer - -import datetime as dt -import numpy as np -import yaml - - -class TestLocalDatasetExportGeneration(unittest.TestCase): - @classmethod - def setUpClass(self): - self.lats = np.array([10, 12, 14, 16, 18]) - self.lons = np.array([100, 102, 104, 106, 108]) - self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) - flat_array = np.array(range(300)) - self.values = flat_array.reshape(12, 5, 5) - self.variable = 'var' - self.units = 'units' - self.origin = { - 'source': 'local', - 'path': '/a/fake/path.nc', - 'lat_name': 'a lat name', - 'lon_name': 'a lon name', - 'time_name': 'a time name', - 'elevation_index': 2 - } - self.name = 'name' - - self.dataset = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - origin=self.origin, - name=self.name - ) - - self.exported_info = writer.generate_dataset_config(self.dataset) - - def test_proper_data_source_export(self): - self.assertTrue('data_source' in self.exported_info) - self.assertEqual(self.exported_info['data_source'], - self.origin['source']) - - def test_proper_path_export(self): - self.assertEqual(self.exported_info['path'], self.origin['path']) - - def test_proper_variable_name_export(self): - self.assertEqual(self.exported_info['variable'], self.variable) - - def test_proper_units_name_export(self): - self.assertEqual(self.exported_info['optional_args']['units'], - self.units) - - def test_proper_lats_name_export(self): - self.assertEqual(self.exported_info['optional_args']['lat_name'], - self.origin['lat_name']) - - def test_proper_lons_name_export(self): - self.assertEqual(self.exported_info['optional_args']['lon_name'], - self.origin['lon_name']) - - def test_proper_times_name_export(self): - self.assertEqual(self.exported_info['optional_args']['time_name'], - self.origin['time_name']) - - def test_proper_dataset_name_export(self): - self.assertEqual(self.exported_info['optional_args']['name'], - self.name) - - def test_proper_elevation_index_export(self): - self.assertEqual(self.exported_info['optional_args']['elevation_index'], - self.origin['elevation_index']) - - -class TestRCMEDDatasetExportGeneration(unittest.TestCase): - @classmethod - def setUpClass(self): - self.lats = np.array([10, 12, 14, 16, 18]) - self.lons = np.array([100, 102, 104, 106, 108]) - self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) - flat_array = np.array(range(300)) - self.values = flat_array.reshape(12, 5, 5) - self.variable = 'var' - self.units = 'units' - self.origin = { - 'source': 'rcmed', - 'dataset_id': 4, - 'parameter_id': 14 - } - self.name = 'name' - - self.dataset = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - origin=self.origin, - name=self.name - ) - - self.exported_info = writer.generate_dataset_config(self.dataset) - - def test_proper_data_source_export(self): - self.assertTrue('data_source' in self.exported_info) - self.assertEqual(self.exported_info['data_source'], - self.origin['source']) - - def test_proper_dataset_id_export(self): - self.assertEqual(self.exported_info['dataset_id'], - self.origin['dataset_id']) - - def test_proper_parameter_id_export(self): - self.assertEqual(self.exported_info['parameter_id'], - self.origin['parameter_id']) - - def test_proper_min_lat_export(self): - self.assertEqual(self.exported_info['min_lat'], min(self.lats)) - - def test_proper_max_lat_export(self): - self.assertEqual(self.exported_info['max_lat'], max(self.lats)) - - def test_proper_min_lon_export(self): - self.assertEqual(self.exported_info['min_lon'], min(self.lons)) - - def test_proper_max_lon_export(self): - self.assertEqual(self.exported_info['max_lon'], max(self.lons)) - - def test_proper_min_time_export(self): - self.assertEqual(self.exported_info['start_time'], str(min(self.times))) - - def test_proper_max_time_export(self): - self.assertEqual(self.exported_info['end_time'], str(max(self.times))) - - def test_proper_dataset_name_export(self): - self.assertEqual(self.exported_info['optional_args']['name'], - self.name) - - def test_proper_units_name_export(self): - self.assertEqual(self.exported_info['optional_args']['units'], - self.units) - - -class TestESGFDatasetExportGeneration(unittest.TestCase): - @classmethod - def setUpClass(self): - self.lats = np.array([10, 12, 14, 16, 18]) - self.lons = np.array([100, 102, 104, 106, 108]) - self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) - flat_array = np.array(range(300)) - self.values = flat_array.reshape(12, 5, 5) - self.variable = 'var' - self.units = 'units' - self.origin = { - 'source': 'esgf', - 'dataset_id': 'esgf dataset id', - 'variable': 'var' - } - self.name = 'name' - - self.dataset = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - origin=self.origin, - name=self.name - ) - - self.exported_info = writer.generate_dataset_config(self.dataset) - - def test_proper_data_source_export(self): - self.assertTrue('data_source' in self.exported_info) - self.assertEqual(self.exported_info['data_source'], - self.origin['source']) - - def test_proper_dataset_id_export(self): - self.assertEqual(self.exported_info['dataset_id'], - self.origin['dataset_id']) - - def test_proper_variable_export(self): - self.assertEqual(self.exported_info['variable'], - self.origin['variable']) - - def test_proper_dummy_username_export(self): - self.assertTrue('esgf_username' in self.exported_info) - - def test_proper_dummy_password_export(self): - self.assertTrue('esgf_password' in self.exported_info) - - def test_proper_dataset_name_export(self): - self.assertEqual(self.exported_info['optional_args']['name'], - self.name) - - def test_proper_units_name_export(self): - self.assertEqual(self.exported_info['optional_args']['units'], - self.units) - - -class TestDAPDatasetExportGeneration(unittest.TestCase): - @classmethod - def setUpClass(self): - self.lats = np.array([10, 12, 14, 16, 18]) - self.lons = np.array([100, 102, 104, 106, 108]) - self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) - flat_array = np.array(range(300)) - self.values = flat_array.reshape(12, 5, 5) - self.variable = 'var' - self.units = 'units' - self.origin = { - 'source': 'dap', - 'url': 'a fake url', - } - self.name = 'name' - - self.dataset = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - origin=self.origin, - name=self.name - ) - - self.exported_info = writer.generate_dataset_config(self.dataset) - - def test_proper_data_source_export(self): - self.assertTrue('data_source' in self.exported_info) - self.assertEqual(self.exported_info['data_source'], - self.origin['source']) - - def test_proper_url_export(self): - self.assertEqual(self.exported_info['url'], - self.origin['url']) - - def test_proper_dataset_name_export(self): - self.assertEqual(self.exported_info['optional_args']['name'], - self.name) - - def test_proper_units_name_export(self): - self.assertEqual(self.exported_info['optional_args']['units'], - self.units) - - -class TestDatasetExportFromEvaluation(unittest.TestCase): - @classmethod - def setUpClass(self): - self.lats = np.array([10, 12, 14, 16, 18]) - self.lons = np.array([100, 102, 104, 106, 108]) - self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) - flat_array = np.array(range(300)) - self.values = flat_array.reshape(12, 5, 5) - self.variable = 'var' - self.units = 'units' - self.name = 'name' - - self.local_origin = { - 'source': 'local', - 'path': '/a/fake/path.nc', - 'lat_name': 'a lat name', - 'lon_name': 'a lon name', - 'time_name': 'a time name', - 'elevation_index': 2 - } - - self.rcmed_origin = { - 'source': 'rcmed', - 'dataset_id': 4, - 'parameter_id': 14 - } - - self.esgf_origin = { - 'source': 'esgf', - 'dataset_id': 'esgf dataset id', - 'variable': 'var' - } - - self.dap_origin = { - 'source': 'dap', - 'url': 'a fake url', - } - - self.local_ds = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - name=self.name, - origin=self.local_origin - ) - - self.rcmed_ds = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - name=self.name, - origin=self.rcmed_origin - ) - - self.esgf_ds = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - name=self.name, - origin=self.esgf_origin - ) - - self.dap_ds = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - name=self.name, - origin=self.dap_origin - ) - - self.evaluation = Evaluation( - self.local_ds, - [self.rcmed_ds, self.esgf_ds, self.dap_ds], - [] - ) - - def test_contains_only_reference_dataset(self): - new_eval = Evaluation(self.local_ds, [], []) - out = writer.generate_dataset_information(new_eval) - - self.assertTrue('reference' in out) - self.assertTrue('targets' not in out) - - def test_contains_only_target_datasets(self): - new_eval = Evaluation(None, [self.local_ds], []) - out = writer.generate_dataset_information(new_eval) - - self.assertTrue('reference' not in out) - self.assertTrue('targets' in out) - - def test_proper_reference_dataset_export(self): - out = writer.generate_dataset_information(self.evaluation) - - self.assertTrue('reference' in out) - self.assertTrue(out['reference']['data_source'] == 'local') - - def test_proper_target_datasets_export(self): - out = writer.generate_dataset_information(self.evaluation) - - self.assertTrue('targets' in out) - self.assertTrue(type(out['targets']) == type(list())) - self.assertTrue(len(out['targets']) == 3) - - -class TestMetricExportGeneration(unittest.TestCase): - @classmethod - def setUpClass(self): - self.bias = metrics.Bias() - self.tmp_std_dev = metrics.TemporalStdDev() - loaded_metrics = [self.bias, self.tmp_std_dev] - - self.evaluation = Evaluation(None, [], loaded_metrics) - - def test_proper_export_format(self): - out = writer.generate_metric_information(self.evaluation) - - self.assertTrue(type(out) == type(list())) - - for name in out: - self.assertTrue(type(name) == type(str())) - - def test_proper_metric_name_export(self): - out = writer.generate_metric_information(self.evaluation) - - self.assertTrue(self.bias.__class__.__name__ in out) - self.assertTrue(self.tmp_std_dev.__class__.__name__ in out) - - def test_empty_metrics_in_evaluation(self): - new_eval = Evaluation(None, [], []) - out = writer.generate_metric_information(new_eval) - - self.assertTrue(type(out) == type(list())) - self.assertTrue(len(out) == 0) - - -class TestEvaluationSettingsGeneration(unittest.TestCase): - @classmethod - def setUpClass(self): - self.lats = np.array(range(-10, 10, 1)) - self.lons = np.array(range(-20, 20, 1)) - self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) - flat_array = np.array(range(9600)) - self.values = flat_array.reshape(12, 20, 40) - - self.dataset = Dataset( - self.lats, - self.lons, - self.times, - self.values, - ) - - self.evaluation = Evaluation(self.dataset, [], []) - - def test_default_data_return(self): - new_eval = Evaluation(None, [], []) - default_output = { - 'temporal_time_delta': 999, - 'spatial_regrid_lats': (-90, 90, 1), - 'spatial_regrid_lons': (-180, 180, 1), - 'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"], - } - - out = writer.generate_evaluation_information(new_eval) - - self.assertEquals(default_output, out) - - def test_handles_only_reference_dataset(self): - new_eval = Evaluation(self.dataset, [], []) - - default_output = { - 'temporal_time_delta': 999, - 'spatial_regrid_lats': (-90, 90, 1), - 'spatial_regrid_lons': (-180, 180, 1), - 'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"], - } - - out = writer.generate_evaluation_information(new_eval) - - self.assertNotEquals(default_output, out) - - def test_handles_only_target_dataset(self): - new_eval = Evaluation(None, [self.dataset], []) - - default_output = { - 'temporal_time_delta': 999, - 'spatial_regrid_lats': (-90, 90, 1), - 'spatial_regrid_lons': (-180, 180, 1), - 'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"], - } - - out = writer.generate_evaluation_information(new_eval) - - self.assertNotEquals(default_output, out) - - def test_daily_temporal_bin(self): - new_times = np.array([dt.datetime(2000, 1, 1, x) for x in range(1, 13)]) - - dataset = Dataset( - self.lats, - self.lons, - new_times, - self.values, - ) - new_eval = Evaluation(dataset, [], []) - - out = writer.generate_evaluation_information(new_eval) - - self.assertEquals(out['temporal_time_delta'], 1) - - def test_monthly_temporal_bin(self): - out = writer.generate_evaluation_information(self.evaluation) - - self.assertEquals(out['temporal_time_delta'], 31) - - def test_yearly_temporal_bin(self): - new_times = np.array([dt.datetime(2000 + x, 1, 1) for x in range(1, 13)]) - - dataset = Dataset( - self.lats, - self.lons, - new_times, - self.values, - ) - new_eval = Evaluation(dataset, [], []) - - out = writer.generate_evaluation_information(new_eval) - - self.assertEquals(out['temporal_time_delta'], 366) - - def test_spatial_regrid_lats(self): - out = writer.generate_evaluation_information(self.evaluation) - - lats = out['spatial_regrid_lats'] - lat_range = np.arange(lats[0], lats[1], lats[2]) - - self.assertTrue(np.array_equal(lat_range, self.lats)) - - def test_spatial_regrid_lons(self): - out = writer.generate_evaluation_information(self.evaluation) - - lons = out['spatial_regrid_lons'] - lat_range = np.arange(lons[0], lons[1], lons[2]) - - self.assertTrue(np.array_equal(lat_range, self.lons)) - - def test_subset_with_single_dataset(self): - out = writer.generate_evaluation_information(self.evaluation) - subset = out['subset'] - - ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = self.dataset.spatial_boundaries() - start, end = self.dataset.time_range() - - self.assertEqual(ds_lat_min, subset[0]) - self.assertEqual(ds_lat_max, subset[1]) - self.assertEqual(ds_lon_min, subset[2]) - self.assertEqual(ds_lon_max, subset[3]) - self.assertEquals(str(start), subset[4]) - self.assertEquals(str(end), subset[5]) - - def test_subset_with_multiple_datasets(self): - new_ds = Dataset( - np.arange(0, 20, 1), - self.lons, - self.times, - self.values - ) - new_eval = Evaluation(self.dataset, [new_ds], []) - - out = writer.generate_evaluation_information(new_eval) - subset = out['subset'] - - ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = self.dataset.spatial_boundaries() - start, end = self.dataset.time_range() - - self.assertEqual(ds_lat_min, subset[0]) - # Check that we actually used the different max lat value that we - # created by adding 'new_ds'. - self.assertEqual(max(new_ds.lats), subset[1]) - self.assertEqual(ds_lon_min, subset[2]) - self.assertEqual(ds_lon_max, subset[3]) - self.assertEquals(str(start), subset[4]) - self.assertEquals(str(end), subset[5]) - - -class FullExportTest(unittest.TestCase): - @classmethod - def setUpClass(self): - self.lats = np.array([10, 12, 14, 16, 18]) - self.lons = np.array([100, 102, 104, 106, 108]) - self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) - flat_array = np.array(range(300)) - self.values = flat_array.reshape(12, 5, 5) - self.variable = 'var' - self.units = 'units' - self.name = 'name' - - self.local_origin = { - 'source': 'local', - 'path': '/a/fake/path.nc', - 'lat_name': 'a lat name', - 'lon_name': 'a lon name', - 'time_name': 'a time name', - 'elevation_index': 2 - } - - self.rcmed_origin = { - 'source': 'rcmed', - 'dataset_id': 4, - 'parameter_id': 14 - } - - self.esgf_origin = { - 'source': 'esgf', - 'dataset_id': 'esgf dataset id', - 'variable': 'var' - } - - self.dap_origin = { - 'source': 'dap', - 'url': 'a fake url', - } - - self.local_ds = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - name=self.name, - origin=self.local_origin - ) - - self.rcmed_ds = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - name=self.name, - origin=self.rcmed_origin - ) - - self.esgf_ds = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - name=self.name, - origin=self.esgf_origin - ) - - self.dap_ds = Dataset( - self.lats, - self.lons, - self.times, - self.values, - variable=self.variable, - units=self.units, - name=self.name, - origin=self.dap_origin - ) - - self.subregions = [ - Bounds(-10, 10, -20, 20), - Bounds(-5, 5, -15, 15) - ] - - self.evaluation = Evaluation( - self.local_ds, - [self.rcmed_ds, self.esgf_ds, self.dap_ds], - [metrics.Bias(), metrics.TemporalStdDev()], - subregions=self.subregions - ) - - @classmethod - def tearDownClass(self): - if os.path.isfile('/tmp/test_config.yaml'): - os.remove('/tmp/test_config.yaml') - - def test_full_export(self): - file_path = '/tmp/test_config.yaml' - writer.export_evaluation_to_config( - self.evaluation, - file_path=file_path - ) - - self.assertTrue(os.path.isfile(file_path)) - - def test_proper_metric_export(self): - file_path = '/tmp/test_config.yaml' - writer.export_evaluation_to_config( - self.evaluation, - file_path=file_path - ) - - data = yaml.load(open(file_path, 'r')) - - self.assertTrue('metrics' in data) - self.assertTrue(type(data['metrics']) == type(list())) - - for metric in self.evaluation.metrics: - self.assertTrue(metric.__class__.__name__ in data['metrics']) - - for metric in self.evaluation.unary_metrics: - self.assertTrue(metric.__class__.__name__ in data['metrics']) - - total_eval_metrics = ( - len(self.evaluation.metrics) + - len(self.evaluation.unary_metrics) - ) - - self.assertTrue(total_eval_metrics, len(data['metrics'])) - - def test_proper_dataset_export(self): - file_path = '/tmp/test_config.yaml' - writer.export_evaluation_to_config( - self.evaluation, - file_path=file_path - ) - - data = yaml.load(open(file_path, 'r')) - - self.assertTrue('datasets' in data) - self.assertTrue('reference' in data['datasets']) - self.assertTrue('targets' in data['datasets']) - - self.assertAlmostEqual( - writer.generate_dataset_information(self.evaluation), - data['datasets'] - ) - - def test_proper_evaluation_setting_export(self): - file_path = '/tmp/test_config.yaml' - writer.export_evaluation_to_config( - self.evaluation, - file_path=file_path - ) - - data = yaml.load(open(file_path, 'r')) - - self.assertTrue('evaluation' in data) - self.assertTrue('temporal_time_delta' in data['evaluation']) - self.assertTrue('spatial_regrid_lats' in data['evaluation']) - self.assertTrue('spatial_regrid_lons' in data['evaluation']) - self.assertTrue('subset' in data['evaluation']) - - self.assertAlmostEqual( - writer.generate_evaluation_information(self.evaluation), - data['evaluation'] - ) - - def test_proper_subregion_export(self): - file_path = '/tmp/test_config.yaml' - writer.export_evaluation_to_config( - self.evaluation, - file_path=file_path - ) - - data = yaml.load(open(file_path, 'r')) - - self.assertTrue('subregions' in data) - - first_bounds = [ - self.subregions[0].lat_min, - self.subregions[0].lat_max, - self.subregions[0].lon_min, - self.subregions[0].lon_max, - ] - second_bounds = [ - self.subregions[1].lat_min, - self.subregions[1].lat_max, - self.subregions[1].lon_min, - self.subregions[1].lon_max, - ] - - self.assertEqual(first_bounds, data['subregions'][0]) - self.assertEqual(second_bounds, data['subregions'][1])
http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/tests/test_evaluation_creation.py ---------------------------------------------------------------------- diff --git a/ocw-config-runner/tests/test_evaluation_creation.py b/ocw-config-runner/tests/test_evaluation_creation.py deleted file mode 100644 index 41f998e..0000000 --- a/ocw-config-runner/tests/test_evaluation_creation.py +++ /dev/null @@ -1,49 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from mock import patch -import unittest - -import evaluation_creation as eval_create -import ocw.metrics - -import yaml - -class TestMetricLoad(unittest.TestCase): - def test_valid_metric_load(self): - config = yaml.load(""" - metrics: - - Bias - """) - loaded_metrics = [eval_create._load_metric(m)() - for m in config['metrics']] - self.assertTrue(isinstance(loaded_metrics[0], ocw.metrics.Bias)) - - @patch('evaluation_creation.logger') - def test_invalid_metric_load(self, mock_logger): - config = yaml.load(""" - metrics: - - ocw.metrics.Bias - """) - eval_create._load_metric(config['metrics'][0]) - error = ( - 'User-defined metrics outside of the ocw.metrics module ' - 'cannot currently be loaded. If you just wanted a metric ' - 'found in ocw.metrics then do not specify the full ' - 'package and module names. See the documentation for examples.' - ) - mock_logger.error.assert_called_with(error) http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/configuration_parsing.py ---------------------------------------------------------------------- diff --git a/ocw_config_runner/configuration_parsing.py b/ocw_config_runner/configuration_parsing.py new file mode 100644 index 0000000..5c28249 --- /dev/null +++ b/ocw_config_runner/configuration_parsing.py @@ -0,0 +1,285 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import logging +import re +import sys + +import ocw.metrics as metrics + +import yaml + +logging.basicConfig() +logger = logging.getLogger(__name__) + +def is_config_valid(config_data): + """ Validate supplied evaluation configuration data. + + :param config_data: Dictionary of the data parsed from the supplied YAML + configuration file. + :type config_data: :func:`dict` + + :returns: True if the configuration data is sufficient for an evaluation and + seems to be well formed, False otherwise. + """ + if not _valid_minimal_config(config_data): + logger.error('Insufficient configuration file data for an evaluation') + return False + + if not _config_is_well_formed(config_data): + logger.error('Configuration data is not well formed') + return False + + return True + +def _valid_minimal_config(config_data): + """""" + if not 'datasets' in config_data.keys(): + logger.error('No datasets specified in configuration data.') + return False + + if not 'metrics' in config_data.keys(): + logger.error('No metrics specified in configuration data.') + return False + + if _contains_unary_metrics(config_data['metrics']): + if (not 'reference' in config_data['datasets'].keys() and + not 'targets' in config_data['datasets'].keys()): + err = ( + 'Unary metric in configuration data requires either a reference ' + 'or target dataset to be present for evaluation. Please ensure ' + 'that your config is well formed.' + ) + logger.error(err) + return False + + if _contains_binary_metrics(config_data['metrics']): + if (not 'reference' in config_data['datasets'].keys() or + not 'targets' in config_data['datasets'].keys()): + logger.error( + 'Binary metric in configuration requires both a reference ' + 'and target dataset to be present for evaluation. Please ensure ' + 'that your config is well formed.' + ) + return False + + return True + +def _config_is_well_formed(config_data): + """""" + is_well_formed = True + + if 'reference' in config_data['datasets']: + if not _valid_dataset_config_data(config_data['datasets']['reference']): + is_well_formed = False + + if 'targets' in config_data['datasets']: + targets = config_data['datasets']['targets'] + if type(targets) != type(list()): + err = ( + 'Expected to find list of target datasets but instead found ' + 'object of type {}' + ).format(type(targets)) + logger.error(err) + is_well_formed = False + else: + for t in targets: + if not _valid_dataset_config_data(t): + is_well_formed = False + + available_metrics = _fetch_built_in_metrics() + for metric in config_data['metrics']: + if metric not in available_metrics: + warning = ( + 'Unable to locate metric name {} in built-in metrics. If this ' + 'is not a user defined metric then please check for potential ' + 'misspellings.' + ).format(metric) + logger.warn(warning) + is_well_formed = False + + if 'subregions' in config_data: + for subregion in config_data['subregions']: + if not _valid_subregion_config_data(subregion): + is_well_formed = False + + if 'plots' in config_data: + for plot in config_data['plots']: + if not _valid_plot_config_data(plot): + is_well_formed = False + # Ensure that if we're trying to make a plot that require + # subregion info that the config has this present. + elif plot['type'] in ['subregion', 'portrait']: + if ('subregions' not in config_data or + len(config_data['subregions']) < 1): + logger.error( + 'Plot config that requires subregion information is present ' + 'in a config file without adequate subregion information ' + 'provided. Please ensure that you have properly supplied 1 or ' + 'more subregion config values.' + ) + is_well_formed = False + + + return is_well_formed + +def _contains_unary_metrics(config_metric_data): + """""" + unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] + return any(metric in unarys for metric in config_metric_data) + +def _contains_binary_metrics(config_metric_data): + """""" + binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] + return any(metric in binarys for metric in config_metric_data) + +def _fetch_built_in_metrics(): + """""" + unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] + binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] + return unarys + binarys + +def _valid_dataset_config_data(dataset_config_data): + """""" + try: + data_source = dataset_config_data['data_source'] + except KeyError: + logger.error('Dataset does not contain a data_source attribute.') + return False + + if data_source == 'local': + required_keys = set(['data_source', 'file_count', 'path', 'variable']) + elif data_source == 'rcmed': + required_keys = set([ + 'dataset_id', + 'parameter_id', + 'min_lat', + 'max_lat', + 'min_lon', + 'max_lon', + 'start_time', + 'end_time', + ]) + elif data_source == 'esgf': + required_keys = set([ + 'data_source', + 'dataset_id', + 'variable', + 'esgf_username', + 'esgf_password' + ]) + elif data_source == 'dap': + required_keys = set({'url', 'variable'}) + else: + logger.error('Dataset does not contain a valid data_source location.') + return False + + present_keys = set(dataset_config_data.keys()) + missing_keys = required_keys - present_keys + contains_required = len(missing_keys) == 0 + + if contains_required: + if data_source == 'local' and dataset_config_data['file_count'] > 1: + # If the dataset is a multi-file dataset then we need to make sure + # that the file glob pattern is included. + if not 'file_glob_pattern' in dataset_config_data: + logger.error( + 'Multi-file local dataset is missing key: file_glob_pattern' + ) + return False + return True + else: + missing = sorted(list(missing_keys)) + logger.error( + 'Dataset does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + return False + +def _valid_plot_config_data(plot_config_data): + """""" + try: + plot_type = plot_config_data['type'] + except KeyError: + logger.error('Plot config does not include a type attribute.') + return False + + if plot_type == 'contour': + required_keys = set([ + 'results_indices', + 'lats', + 'lons', + 'output_name' + ]) + elif plot_type == 'taylor': + required_keys = set([ + 'stddev_results_indices', + 'pattern_corr_results_indices', + 'output_name' + ]) + elif plot_type == 'subregion': + required_keys = set([ + 'lats', + 'lons', + 'output_name' + ]) + elif plot_type == 'time_series': + required_keys = set([ + 'time_range' + ]) + elif plot_type == 'portrait': + required_keys = set([ + 'metric_index', + 'output_name' + ]) + else: + logger.error('Invalid plot type specified.') + return False + + present_keys = set(plot_config_data.keys()) + missing_keys = required_keys - present_keys + contains_required = len(missing_keys) == 0 + + if not contains_required: + missing = sorted(list(missing_keys)) + logger.error( + 'Plot config does not contain required keys. ' + 'The following keys are missing: {}'.format(', '.join(missing)) + ) + return False + + return True + +def _valid_subregion_config_data(subregion_config_data): + """""" + if type(subregion_config_data) != type([]): + logger.error( + 'Subregions should be passed as a list of lists where ' + 'each sub-list contains a bounding box of the form: ' + '[lat_min, lat_max, lon_min, lon_max].' + ) + return False + + if len(subregion_config_data) != 4: + logger.error( + 'Subregions should be passed as a list of lists where ' + 'each sub-list contains a bounding box of the form: ' + '[lat_min, lat_max, lon_min, lon_max].' + ) + return False + + return True http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/configuration_writer.py ---------------------------------------------------------------------- diff --git a/ocw_config_runner/configuration_writer.py b/ocw_config_runner/configuration_writer.py new file mode 100644 index 0000000..8fc9242 --- /dev/null +++ b/ocw_config_runner/configuration_writer.py @@ -0,0 +1,302 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt +import logging + +import yaml + +logging.basicConfig() +logger = logging.getLogger(__name__) + +def export_evaluation_to_config(evaluation, file_path='./exported_eval.yaml'): + ''' Export an evaluation to a config file + + :param evaluation: The evaluation object to export. + :type evaluation: :class:`evaluation.Evaluation` + + :param file_path: Optional file path where the config file should be saved. + :type file_path: :mod:`string` + ''' + config = {} + + config['evaluation'] = generate_evaluation_information(evaluation) + config['datasets'] = generate_dataset_information(evaluation) + config['metrics'] = generate_metric_information(evaluation) + config['subregions'] = generate_subregion_information(evaluation) + + yaml.dump(config, file(file_path, 'w')) + +def generate_dataset_information(evaluation): + ''' Generate dataset config file output for a given Evaluation object. + + :param evaluation: The evaluation object from which to extract metrics. + :type evaluation: :class:`evaluation.Evaluation` + + :returns: A :func:`dict` of dataset configuration information for export + to a configuration file. + :rtype: :func:`dict` + ''' + datasets = {} + + if evaluation.ref_dataset: + datasets['reference'] = generate_dataset_config(evaluation.ref_dataset) + + if len(evaluation.target_datasets) > 0: + datasets['targets'] = [ + generate_dataset_config(target) + for target in evaluation.target_datasets + ] + + return datasets + +def generate_dataset_config(dataset): + ''' Generate dataset config file output for a given Dataset object. + + :param dataset: The dataset from which to extract configuration + information. + :type dataset: :class:`dataset.Dataset` + + :returns: :func:`dict` containing necessary information for + dataset to be saved into a configuration object. + + :raises AttributeError: If dataset does not contain expected source data. + ''' + dataset_source = dataset.origin['source'] + + if dataset_source == 'local': + info = _extract_local_dataset_info(dataset) + elif dataset_source == 'rcmed': + info = _extract_rcmed_dataset_info(dataset) + elif dataset_source == 'esgf': + info = _extract_esgf_dataset_info(dataset) + elif dataset_source == 'dap': + info = _extract_dap_dataset_info(dataset) + else: + err = ( + "Unexpected source in dataset origin information." + "Found {}." + ).format(dataset_source) + logger.error(err) + raise AttributeError(err) + + info['optional_args']['name'] = dataset.name + info['optional_args']['units'] = dataset.units + + return info + +def generate_metric_information(evaluation): + ''' Generate metric config file output from a given Evaluation object. + + :param evaluation: The evaluation object from which to extract metrics. + :type evaluation: :class:`evaluation.Evaluation` + + :returns: A :func:`list` of :mod:`metrics` object names for output into + a configuration file. + :rtype: :func:`list` of :mod:`metrics` + ''' + unary_metrics = [x.__class__.__name__ for x in evaluation.unary_metrics] + binary_metrics = [x.__class__.__name__ for x in evaluation.metrics] + + return unary_metrics + binary_metrics + +def generate_evaluation_information(evaluation): + ''' Generate evaluation-related config file output. + + Attempts to parse out temporal and spatial rebinning/regridding information + from the supplied evaluation object. If no datasets can be found, values + are defaulted to sane defaults or (potentially) excluded entirely. + + It's important to note that this function does its best to extrapolate the + configuration information. It's possible that you will encounter a scenario + where the guessed values are not what you want/expect. Please double + check the output before blinding trusting what this generates. + + :param evaluation: The evaluation object from which to extract metrics. + :type evaluation: :class:`evaluation.Evaluation` + + :returns: A dictionary of valid `evaluation` section settings for export + to a configuration file. + :rtype: :func:`dict` + ''' + eval_config = { + 'temporal_time_delta': 999, + 'spatial_regrid_lats': (-90, 90, 1), + 'spatial_regrid_lons': (-180, 180, 1), + 'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"], + } + + datasets = [] + + if evaluation.ref_dataset: + datasets.append(evaluation.ref_dataset) + + if evaluation.target_datasets: + datasets += evaluation.target_datasets + + if len(datasets) > 0: + eval_config['temporal_time_delta'] = _calc_temporal_bin_size(datasets) + + lats, lons = _calc_spatial_lat_lon_grid(datasets) + eval_config['spatial_regrid_lats'] = lats + eval_config['spatial_regrid_lons'] = lons + + eval_config['subset'] = _calc_subset_config(datasets) + + return eval_config + +def generate_subregion_information(evaluation): + ''' Generate subregion config file output from a given Evaluation object. + + :param evaluation: The evaluation object from which to extract metrics. + :type evaluation: :class:`evaluation.Evaluation` + + :returns: A :func:`list` of :func:`list` objects containing bounding + box info for export into a configuration file + :rtype: :func:`list` of :func:`list` + ''' + subregions = [] + for s in evaluation.subregions: + subregions.append([s.lat_min, s.lat_max, s.lon_min, s.lon_max]) + + return subregions + +def _extract_local_dataset_info(dataset): + '''''' + dataset_info = {'optional_args': {}} + + dataset_info['data_source'] = 'local' + dataset_info['file_count'] = 1 + dataset_info['path'] = dataset.origin['path'] + dataset_info['variable'] = dataset.variable + + dataset_info['optional_args']['lat_name'] = dataset.origin['lat_name'] + dataset_info['optional_args']['lon_name'] = dataset.origin['lon_name'] + dataset_info['optional_args']['time_name'] = dataset.origin['time_name'] + + if 'elevation_index' in dataset.origin: + elev = dataset.origin['elevation_index'] + dataset_info['optional_args']['elevation_index'] = elev + + return dataset_info + +def _extract_rcmed_dataset_info(dataset): + '''''' + dataset_info = {'optional_args': {}} + + min_lat, max_lat, min_lon, max_lon = dataset.spatial_boundaries() + start_time, end_time = dataset.time_range() + + dataset_info['data_source'] = 'rcmed' + dataset_info['dataset_id'] = dataset.origin['dataset_id'] + dataset_info['parameter_id'] = dataset.origin['parameter_id'] + dataset_info['min_lat'] = min_lat + dataset_info['max_lat'] = max_lat + dataset_info['min_lon'] = min_lon + dataset_info['max_lon'] = max_lon + dataset_info['start_time'] = str(start_time) + dataset_info['end_time'] = str(end_time) + + return dataset_info + +def _extract_esgf_dataset_info(dataset): + '''''' + dataset_info = {'optional_args': {}} + + dataset_info['data_source'] = 'esgf' + dataset_info['dataset_id'] = dataset.origin['dataset_id'] + dataset_info['variable'] = dataset.origin['variable'] + dataset_info['esgf_username'] = 'Put your ESGF Username here' + dataset_info['esgf_password'] = 'Put your ESGF Password here' + + return dataset_info + +def _extract_dap_dataset_info(dataset): + '''''' + dataset_info = {'optional_args': {}} + + dataset_info['data_source'] = 'dap' + dataset_info['url'] = dataset.origin['url'] + dataset_info['variable'] = dataset.variable + + return dataset_info + +def _calc_temporal_bin_size(datasets): + '''''' + times = datasets[0].times + time_delta = times[1] - times[0] + + if time_delta.days == 0: + return 1 + elif time_delta.days <= 31: + return 31 + elif time_delta.days <= 366: + return 366 + else: + return 999 + +def _calc_spatial_lat_lon_grid(datasets): + '''''' + lat_min, lat_max, lon_min, lon_max = datasets[0].spatial_boundaries() + + lats = datasets[0].lats + lons = datasets[0].lons + # These explicit float casts are needed to ensure that the type of the + # lat/lon steps are not numpy values. PyYAML will choke on export if it + # encounters a Numpy value. + lat_step = float(abs(lats[1] - lats[0])) + lon_step = float(abs(lons[1] - lons[0])) + + # We need to add an extra step value onto the end so when we generate a + # range with these values we don't lose one that we're expecting. + if lat_max != 90: lat_max += lat_step + if lon_max != 180: lon_max += lon_step + + return ((lat_min, lat_max, lat_step), (lon_min, lon_max, lon_step)) + +def _calc_subset_config(datasets): + '''''' + lat_min = 90 + lat_max = -90 + lon_min = 180 + lon_max = -180 + start = dt.datetime(2500, 1, 1) + end = dt.datetime(1500, 1, 1) + + for ds in datasets: + ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = ds.spatial_boundaries() + ds_start, ds_end = ds.time_range() + + if ds_lat_min < lat_min: + lat_min = ds_lat_min + + if ds_lat_max > lat_max: + lat_max = ds_lat_max + + if ds_lon_min < lon_min: + lon_min = ds_lon_min + + if ds_lon_max > lon_max: + lon_max = ds_lon_max + + if ds_start < start: + start = ds_start + + if ds_end > end: + end = ds_end + + return [lat_min, lat_max, lon_min, lon_max, str(start), str(end)] http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/evaluation_creation.py ---------------------------------------------------------------------- diff --git a/ocw_config_runner/evaluation_creation.py b/ocw_config_runner/evaluation_creation.py new file mode 100644 index 0000000..88394de --- /dev/null +++ b/ocw_config_runner/evaluation_creation.py @@ -0,0 +1,178 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import dateutil.parser +from datetime import timedelta +import logging + +from ocw.dataset import Bounds +from ocw.evaluation import Evaluation +import ocw.dataset_processor as dsp +import ocw.data_source.local as local +import ocw.data_source.rcmed as rcmed +import ocw.data_source.esgf as esgf +import ocw.data_source.dap as dap +import ocw.metrics as metrics + +import numpy as np + +logging.basicConfig() +logger = logging.getLogger(__name__) + +def generate_evaluation_from_config(config_data): + """ Generate an Evaluation object from configuration data. + + :param config_data: Dictionary of the data parsed from the supplied YAML + configuration file. + :type config_data: :func:`dict` + + :returns: An Evaluation object containing the data specified in the + supplied configuration data. + """ + # Load datasets + reference = None + targets = [] + if config_data['datasets']: + if 'reference' in config_data['datasets']: + reference = _load_dataset(config_data['datasets']['reference']) + + if 'targets' in config_data['datasets']: + targets = [_load_dataset(t) for t in config_data['datasets']['targets']] + + reference, targets = _prepare_datasets_for_evaluation(reference, + targets, + config_data) + # Load metrics + eval_metrics = [] + if config_data['metrics']: + eval_metrics = [_load_metric(m)() for m in config_data['metrics']] + + # Load Subregions (if present) + subregions = None + if 'subregions' in config_data: + subregions = [_load_subregion(s) for s in config_data['subregions']] + + return Evaluation(reference, targets, eval_metrics, subregions=subregions) + +def _load_dataset(dataset_config_data): + """""" + if dataset_config_data['data_source'] == 'local': + if dataset_config_data['file_count'] > 1: + logger.error( + 'Multi-file datasets are currently not supported. Cancelling load ' + 'of the following dataset: {}'.format(dataset_config_data) + ) + return None + + return local.load_file(dataset_config_data['path'], + dataset_config_data['variable'], + **dataset_config_data.get('optional_args', {})) + elif dataset_config_data['data_source'] == 'rcmed': + return rcmed.parameter_dataset(dataset_config_data['dataset_id'], + dataset_config_data['parameter_id'], + dataset_config_data['min_lat'], + dataset_config_data['max_lat'], + dataset_config_data['min_lon'], + dataset_config_data['min_lon'], + dataset_config_data['start_time'], + dataset_config_data['end_time'], + **dataset_config_data.get('optional_args', {})) + elif dataset_config_data['data_source'] == 'esgf': + return esgf.load_dataset(dataset_config_data['dataset_id'], + dataset_config_data['variable'], + dataset_config_data['esgf_username'], + dataset_config_data['esgf_password'], + **dataset_config_data.get('optional_args', {})) + elif dataset_config_data['data_source'] == 'dap': + return dap.load(dataset_config_data['url'], + dataset_config_data['variable'], + **dataset_config_data('optional_args', {})) + +def _prepare_datasets_for_evaluation(reference, targets, config_data): + """""" + subset = config_data['evaluation'].get('subset', None) + temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) + spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) + spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) + + # If we have a temporal time delta and it's daily (i.e., 1) we will + # normalize the data as daily data (which means we adjust the start times + # for each bucket of data to be consistent). By default we will normalize + # the data as monthly. Note that this will not break yearly data so it's + # safer to do this no matter what. This keeps us from ending up with 1-off + # errors in the resulting dataset shape post-temporal/spatial adjustments + # that break evaluations. + string_time_delta = 'monthly' + if temporal_time_delta and temporal_time_delta == 1: + string_time_delta = 'daily' + + reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) + targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets] + + if subset: + start = dateutil.parser.parse(subset[4]) + end = dateutil.parser.parse(subset[5]) + bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) + + if reference: + reference = dsp.safe_subset(bounds, reference) + + if targets: + targets = [dsp.safe_subset(bounds, t) for t in targets] + + if temporal_time_delta: + resolution = timedelta(temporal_time_delta) + + if reference: + reference = dsp.temporal_rebin(reference, resolution) + + if targets: + targets = [dsp.temporal_rebin(t, resolution) for t in targets] + + if spatial_regrid_lats and spatial_regrid_lons: + lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) + lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) + + if reference: + reference = dsp.spatial_regrid(reference, lats, lons) + + if targets: + targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] + + return reference, targets + +def _load_metric(metric_config_data): + """""" + # If the dataset is user defined outside of ocw.metrics we won't currently + # handle loading it. + if '.' in metric_config_data: + logger.error( + 'User-defined metrics outside of the ocw.metrics module ' + 'cannot currently be loaded. If you just wanted a metric ' + 'found in ocw.metrics then do not specify the full ' + 'package and module names. See the documentation for examples.' + ) + return None + + return getattr(metrics, metric_config_data) + +def _load_subregion(subregion_config_data): + """""" + return Bounds(float(subregion_config_data[0]), + float(subregion_config_data[1]), + float(subregion_config_data[2]), + float(subregion_config_data[3])) http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/portrait_diagram.yaml ---------------------------------------------------------------------- diff --git a/ocw_config_runner/example/portrait_diagram.yaml b/ocw_config_runner/example/portrait_diagram.yaml new file mode 100644 index 0000000..f3e45de --- /dev/null +++ b/ocw_config_runner/example/portrait_diagram.yaml @@ -0,0 +1,30 @@ +evaluation: + temporal_time_delta: 365 + spatial_regrid_lats: !!python/tuple [-40, 40, 1] + spatial_regrid_lons: !!python/tuple [-40, 40, 1] + +datasets: + reference: + data_source: local + file_count: 1 + path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc + variable: tasmax + + targets: + - data_source: local + file_count: 1 + path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc + variable: tasmax + optional_args: + name: WRF +metrics: + - PatternCorrelation + +plots: + - type: portrait + metric_index: 0 + output_name: portrait_test + +subregions: + - [-10.0, 0.0, 29.0, 36.5] + - [0.0, 10.0, 29.0, 37.5] http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/simple_model_to_model_bias.yaml ---------------------------------------------------------------------- diff --git a/ocw_config_runner/example/simple_model_to_model_bias.yaml b/ocw_config_runner/example/simple_model_to_model_bias.yaml new file mode 100644 index 0000000..bd59fbd --- /dev/null +++ b/ocw_config_runner/example/simple_model_to_model_bias.yaml @@ -0,0 +1,35 @@ +evaluation: + temporal_time_delta: 365 + spatial_regrid_lats: !!python/tuple [-20, 20, 1] + spatial_regrid_lons: !!python/tuple [-20, 20, 1] + +datasets: + reference: + data_source: local + file_count: 1 + path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc + variable: tasmax + + targets: + - data_source: local + file_count: 1 + path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc + variable: tasmax +metrics: + - Bias + +plots: + - type: contour + results_indices: + - !!python/tuple [0, 0] + lats: + range_min: -20 + range_max: 20 + range_step: 1 + lons: + range_min: -20 + range_max: 20 + range_step: 1 + output_name: wrf_bias_compared_to_knmi + optional_args: + gridshape: !!python/tuple [6, 6] http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/subregion.yaml ---------------------------------------------------------------------- diff --git a/ocw_config_runner/example/subregion.yaml b/ocw_config_runner/example/subregion.yaml new file mode 100644 index 0000000..d39e39b --- /dev/null +++ b/ocw_config_runner/example/subregion.yaml @@ -0,0 +1,30 @@ +subregions: + - [-10.0, 0.0, 29.0, 36.5] + - [0.0, 10.0, 29.0, 37.5] + - [10.0, 20.0, 25.0, 32.5] + - [20.0, 33.0, 25.0, 32.5] + - [-19.3,-10.2,12.0, 20.0] + - [15.0, 30.0, 15.0, 25.0] + - [-10.0, 10.0, 7.3, 15.0] + - [-10.9, 10.0, 5.0, 7.3] + - [33.9, 40.0, 6.9, 15.0] + - [10.0, 25.0, 0.0, 10.0] + - [10.0, 25.0,-10.0, 0.0] + - [30.0, 40.0,-15.0, 0.0] + - [33.0, 40.0, 25.0, 35.0] + +datasets: + +metrics: + +plots: + - type: subregion + output_name: subregion_plot + lats: + range_min: -45.0 + range_max: 42.24 + range_step: 0.5 + lons: + range_min: -24.0 + range_max: 60.0 + range_step: 0.5 http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/taylor_diagram_example.yaml ---------------------------------------------------------------------- diff --git a/ocw_config_runner/example/taylor_diagram_example.yaml b/ocw_config_runner/example/taylor_diagram_example.yaml new file mode 100644 index 0000000..66eab3b --- /dev/null +++ b/ocw_config_runner/example/taylor_diagram_example.yaml @@ -0,0 +1,36 @@ +evaluation: + temporal_time_delta: 30 + spatial_regrid_lats: !!python/tuple [-45, 42, 1] + spatial_regrid_lons: !!python/tuple [-24, 60, 1] + subset: [-45, 42, -24, 60, "1989-01-01", "1989-12-01"] + +datasets: + reference: + data_source: local + file_count: 1 + path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc + variable: tasmax + optional_args: + name: dataset1 + + targets: + - data_source: local + file_count: 1 + path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc + variable: tasmax + optional_args: + name: dataset2 +metrics: + - StdDevRatio + - PatternCorrelation + +plots: + - type: taylor + stddev_results_indices: + - !!python/tuple [0, 0] + pattern_corr_results_indices: + - !!python/tuple [0, 1] + output_name: taylor_plot + optional_args: + fmt: png + frameon: False http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/time_series_plot_example.yaml ---------------------------------------------------------------------- diff --git a/ocw_config_runner/example/time_series_plot_example.yaml b/ocw_config_runner/example/time_series_plot_example.yaml new file mode 100644 index 0000000..b5599cc --- /dev/null +++ b/ocw_config_runner/example/time_series_plot_example.yaml @@ -0,0 +1,31 @@ +evaluation: + temporal_time_delta: 30 + spatial_regrid_lats: !!python/tuple [-20, 20, 1] + spatial_regrid_lons: !!python/tuple [-20, 20, 1] + subset: [-180, 180, -90, 90, "1989-01-01", "1990-12-01"] + +datasets: + reference: + data_source: local + file_count: 1 + path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc + variable: tasmax + optional_args: + name: KNMI + + targets: + - data_source: local + file_count: 1 + path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc + variable: tasmax + optional_args: + name: WRF +metrics: + +plots: + - type: time_series + time_range: monthly + +subregions: + - [-10.0, 0.0, -19.0, 19.0] + - [0.0, 10.0, -10.0, 10.0] http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/ocw_evaluation_from_config.py ---------------------------------------------------------------------- diff --git a/ocw_config_runner/ocw_evaluation_from_config.py b/ocw_config_runner/ocw_evaluation_from_config.py new file mode 100644 index 0000000..93b411a --- /dev/null +++ b/ocw_config_runner/ocw_evaluation_from_config.py @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import argparse +import logging + +from configuration_parsing import is_config_valid +from evaluation_creation import generate_evaluation_from_config +from plot_generation import plot_from_config + +import yaml + +logging.basicConfig() +logger = logging.getLogger(__name__) + +def run_evaluation_from_config(config_file_path, ignore_config_errors=False): + """ Run an OCW evaluation specified by a config file. + + :param config_file_path: The file path to a OCW compliant YAML file + specifying how to run the evaluation. For additional information on + the valid options that you can set in the config please check the + project wiki https://cwiki.apache.org/confluence/display/climate/home#'. + :type config_file_path: :mod:`string` + + :param ignore_config_errors: When this is true configuration parsing errors + will NOT interrupt the evaluation run. Note, it is very unlikely that + you will want this value set. However it is possible that you will want + to graph something that doesn't require a full evaluation run. This is + provided for that situation. + :type ignore_config_errors: :func:`bool` + """ + config = yaml.load(open(config_file_path, 'r')) + + if not ignore_config_errors and not is_config_valid(config): + logger.warning( + 'Unable to validate configuration file. Exiting evaluation. ' + 'Please check documentation for config information.' + ) + + sys.exit(1) + + evaluation = generate_evaluation_from_config(config) + + if evaluation._evaluation_is_valid(): + evaluation.run() + + plot_from_config(evaluation, config) + +if __name__ == '__main__': + description = 'OCW Config Based Evaluation' + epilog = 'Additional information at https://cwiki.apache.org/confluence/display/climate/home#' + + parser = argparse.ArgumentParser(description=description, epilog=epilog) + parser.add_argument('config', help='Path to YAML config file for the evaluation') + parser.add_argument('ignore_config_errors', nargs='?', default=False, type=bool) + args = parser.parse_args() + + run_evaluation_from_config(args.config, args.ignore_config_errors) http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/plot_generation.py ---------------------------------------------------------------------- diff --git a/ocw_config_runner/plot_generation.py b/ocw_config_runner/plot_generation.py new file mode 100644 index 0000000..392331d --- /dev/null +++ b/ocw_config_runner/plot_generation.py @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import logging + +import ocw.dataset_processor as dsp +import ocw.plotter as plots +import ocw.utils as utils + +import numpy as np + +logging.basicConfig() +logger = logging.getLogger(__name__) + +def plot_from_config(evaluation, config_data): + """ Generate plots for an evaluation from configuration data. + + :param evaluation: The Evaluation for which to generate plots. + :type evaluation: :class:`ocw.evaluation.Evaluation` + :param config_data: Dictionary of the data parsed from the supplied YAML + configuration file. + :type: :func:`dict` + """ + for plot in config_data['plots']: + if plot['type'] == 'contour': + _draw_contour_plot(evaluation, plot) + elif plot['type'] == 'subregion': + _draw_subregion_diagram(evaluation, plot) + elif plot['type'] == 'taylor': + _draw_taylor_diagram(evaluation, plot) + elif plot['type'] == 'time_series': + _draw_time_series_plot(evaluation, plot) + elif plot['type'] == 'portrait': + _draw_portrait_diagram(evaluation, plot) + else: + logger.error('Unrecognized plot type requested: {}'.format(plot['type'])) + +def _draw_contour_plot(evaluation, plot_config): + """""" + lats = plot_config['lats'] + if type(lats) != type(list): + lats = np.arange(lats['range_min'], lats['range_max'], lats['range_step']) + + lons = plot_config['lons'] + if type(lons) != type(list): + lons = np.arange(lons['range_min'], lons['range_max'], lons['range_step']) + + for i, index in enumerate(plot_config['results_indices']): + if len(index) == 2: + target, metric = index + vals = evaluation.results[target][metric] + elif len(index) == 3: + target, metric, subregion = index + vals = evaluation.results[target][metric][subregion] + + plot_name = plot_config['output_name'] + '_{}'.format(i) + plots.draw_contour_map(vals, + np.array(lats), + np.array(lons), + plot_name, + **plot_config.get('optional_args', {})) + +def _draw_taylor_diagram(evaluation, plot_config): + """""" + plot_name = plot_config['output_name'] + ref_dataset_name = evaluation.ref_dataset.name + target_dataset_names = [t.name for t in evaluation.target_datasets] + + if len(plot_config['stddev_results_indices'][0]) == 2: + stddev_results = [ + evaluation.results[tar][met] + for (tar, met) in plot_config['stddev_results_indices'] + ] + + pattern_corr_results = [ + evaluation.results[tar][met] + for (tar, met) in plot_config['pattern_corr_results_indices'] + ] + elif len(plot_config['stddev_results_indices'][0]) == 3: + stddev_results = [ + evaluation.results[tar][met][sub] + for (tar, met, sub) in plot_config['stddev_results_indices'] + ] + + pattern_corr_results = [ + evaluation.results[tar][met][sub] + for (tar, met, sub) in plot_config['pattern_corr_results_indices'] + ] + + plot_data = np.array([stddev_results, pattern_corr_results]).transpose() + + plots.draw_taylor_diagram(plot_data, + target_dataset_names, + ref_dataset_name, + fname=plot_name, + **plot_config.get('optional_args', {})) + +def _draw_subregion_diagram(evaluation, plot_config): + """""" + lats = plot_config['lats'] + if type(lats) != type(list): + lats = np.arange(lats['range_min'], lats['range_max'], lats['range_step']) + + lons = plot_config['lons'] + if type(lons) != type(list): + lons = np.arange(lons['range_min'], lons['range_max'], lons['range_step']) + + plots.draw_subregions(evaluation.subregions, + lats, + lons, + plot_config['output_name'], + **plot_config.get('optional_args', {})) + +def _draw_portrait_diagram(evaluation, plot_config): + """""" + metric_index = plot_config['metric_index'] + + diagram_data = np.array(evaluation.results[:][metric_index][:]) + subregion_names = ["R{}".format(i) for i in range(len(evaluation.subregions))] + target_names = [t.name for t in evaluation.target_datasets] + + plots.draw_portrait_diagram(diagram_data, + target_names, + subregion_names, + fname=plot_config['output_name'], + **plot_config.get('optional_args', {})) + +def _draw_time_series_plot(evaluation, plot_config): + """""" + time_range_info = plot_config['time_range'] + ref_ds = evaluation.ref_dataset + target_ds = evaluation.target_datasets + + if time_range_info == 'monthly': + ref_ds.values, ref_ds.times = utils.calc_climatology_monthly(ref_ds) + + for t in target_ds: + t.values, t.times = utils.calc_climatology_monthly(t) + else: + logger.error( + 'Invalid time range provided. Only monthly is supported ' + 'at the moment' + ) + return + + if evaluation.subregions: + for bound_count, bound in enumerate(evaluation.subregions): + results = [] + labels = [] + + subset = dsp.subset( + bound, + ref_ds, + subregion_name="R{}_{}".format(bound_count, ref_ds.name) + ) + + results.append(utils.calc_time_series(subset)) + labels.append(subset.name) + + for t in target_ds: + subset = dsp.subset( + bound, + t, + subregion_name="R{}_{}".format(bound_count, t.name) + ) + results.append(utils.calc_time_series(subset)) + labels.append(subset.name) + + plots.draw_time_series(np.array(results), + ref_ds.times, + labels, + 'R{}'.format(bound_count), + **plot_config.get('optional_args', {})) + + else: + results = [] + labels = [] + + results.append(utils.calc_time_series(ref_ds)) + labels.append(ref_ds.name) + + for t in target_ds: + results.append(utils.calc_time_series(t)) + labels.append(t.name) + + plots.draw_time_series(np.array(results), + ref_ds.times, + labels, + 'time_series', + **plot_config.get('optional_args', {}))
