[2/5] climate git commit: replaces ocw-config-runner to ocw_config_runner

huikyole Wed, 09 Sep 2015 15:30:19 -0700

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/tests/test_config_writer.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/tests/test_config_writer.py 
b/ocw-config-runner/tests/test_config_writer.py
deleted file mode 100644
index f163989..0000000
--- a/ocw-config-runner/tests/test_config_writer.py
+++ /dev/null
@@ -1,768 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from mock import patch
-import os
-import unittest
-
-from ocw.dataset import Dataset, Bounds
-from ocw.evaluation import Evaluation
-import ocw.metrics as metrics
-import configuration_writer as writer
-
-import datetime as dt
-import numpy as np
-import yaml
-
-
-class TestLocalDatasetExportGeneration(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        self.lats = np.array([10, 12, 14, 16, 18])
-        self.lons = np.array([100, 102, 104, 106, 108])
-        self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)])
-        flat_array = np.array(range(300))
-        self.values = flat_array.reshape(12, 5, 5)
-        self.variable = 'var'
-        self.units = 'units'
-        self.origin = {
-            'source': 'local',
-            'path': '/a/fake/path.nc',
-            'lat_name': 'a lat name',
-            'lon_name': 'a lon name',
-            'time_name': 'a time name',
-            'elevation_index': 2
-        }
-        self.name = 'name'
-
-        self.dataset = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            origin=self.origin,
-            name=self.name
-        )
-
-        self.exported_info = writer.generate_dataset_config(self.dataset)
-
-    def test_proper_data_source_export(self):
-        self.assertTrue('data_source' in self.exported_info)
-        self.assertEqual(self.exported_info['data_source'],
-                         self.origin['source'])
-
-    def test_proper_path_export(self):
-        self.assertEqual(self.exported_info['path'], self.origin['path'])
-
-    def test_proper_variable_name_export(self):
-        self.assertEqual(self.exported_info['variable'], self.variable)
-
-    def test_proper_units_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['units'],
-                         self.units)
-
-    def test_proper_lats_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['lat_name'],
-                         self.origin['lat_name'])
-
-    def test_proper_lons_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['lon_name'],
-                         self.origin['lon_name'])
-
-    def test_proper_times_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['time_name'],
-                         self.origin['time_name'])
-
-    def test_proper_dataset_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['name'],
-                         self.name)
-
-    def test_proper_elevation_index_export(self):
-        
self.assertEqual(self.exported_info['optional_args']['elevation_index'],
-                         self.origin['elevation_index'])
-
-
-class TestRCMEDDatasetExportGeneration(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        self.lats = np.array([10, 12, 14, 16, 18])
-        self.lons = np.array([100, 102, 104, 106, 108])
-        self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)])
-        flat_array = np.array(range(300))
-        self.values = flat_array.reshape(12, 5, 5)
-        self.variable = 'var'
-        self.units = 'units'
-        self.origin = {
-            'source': 'rcmed',
-            'dataset_id': 4,
-            'parameter_id': 14
-        }
-        self.name = 'name'
-
-        self.dataset = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            origin=self.origin,
-            name=self.name
-        )
-
-        self.exported_info = writer.generate_dataset_config(self.dataset)
-
-    def test_proper_data_source_export(self):
-        self.assertTrue('data_source' in self.exported_info)
-        self.assertEqual(self.exported_info['data_source'],
-                         self.origin['source'])
-
-    def test_proper_dataset_id_export(self):
-        self.assertEqual(self.exported_info['dataset_id'],
-                         self.origin['dataset_id'])
-
-    def test_proper_parameter_id_export(self):
-        self.assertEqual(self.exported_info['parameter_id'],
-                         self.origin['parameter_id'])
-
-    def test_proper_min_lat_export(self):
-        self.assertEqual(self.exported_info['min_lat'], min(self.lats))
-
-    def test_proper_max_lat_export(self):
-        self.assertEqual(self.exported_info['max_lat'], max(self.lats))
-
-    def test_proper_min_lon_export(self):
-        self.assertEqual(self.exported_info['min_lon'], min(self.lons))
-
-    def test_proper_max_lon_export(self):
-        self.assertEqual(self.exported_info['max_lon'], max(self.lons))
-
-    def test_proper_min_time_export(self):
-        self.assertEqual(self.exported_info['start_time'], 
str(min(self.times)))
-
-    def test_proper_max_time_export(self):
-        self.assertEqual(self.exported_info['end_time'], str(max(self.times)))
-
-    def test_proper_dataset_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['name'],
-                         self.name)
-
-    def test_proper_units_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['units'],
-                         self.units)
-
-
-class TestESGFDatasetExportGeneration(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        self.lats = np.array([10, 12, 14, 16, 18])
-        self.lons = np.array([100, 102, 104, 106, 108])
-        self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)])
-        flat_array = np.array(range(300))
-        self.values = flat_array.reshape(12, 5, 5)
-        self.variable = 'var'
-        self.units = 'units'
-        self.origin = {
-            'source': 'esgf',
-            'dataset_id': 'esgf dataset id',
-            'variable': 'var'
-        }
-        self.name = 'name'
-
-        self.dataset = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            origin=self.origin,
-            name=self.name
-        )
-
-        self.exported_info = writer.generate_dataset_config(self.dataset)
-
-    def test_proper_data_source_export(self):
-        self.assertTrue('data_source' in self.exported_info)
-        self.assertEqual(self.exported_info['data_source'],
-                     self.origin['source'])
-
-    def test_proper_dataset_id_export(self):
-        self.assertEqual(self.exported_info['dataset_id'],
-                         self.origin['dataset_id'])
-
-    def test_proper_variable_export(self):
-        self.assertEqual(self.exported_info['variable'],
-                         self.origin['variable'])
-
-    def test_proper_dummy_username_export(self):
-        self.assertTrue('esgf_username' in self.exported_info)
-
-    def test_proper_dummy_password_export(self):
-        self.assertTrue('esgf_password' in self.exported_info)
-
-    def test_proper_dataset_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['name'],
-                         self.name)
-
-    def test_proper_units_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['units'],
-                         self.units)
-
-
-class TestDAPDatasetExportGeneration(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        self.lats = np.array([10, 12, 14, 16, 18])
-        self.lons = np.array([100, 102, 104, 106, 108])
-        self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)])
-        flat_array = np.array(range(300))
-        self.values = flat_array.reshape(12, 5, 5)
-        self.variable = 'var'
-        self.units = 'units'
-        self.origin = {
-            'source': 'dap',
-            'url': 'a fake url',
-        }
-        self.name = 'name'
-
-        self.dataset = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            origin=self.origin,
-            name=self.name
-        )
-
-        self.exported_info = writer.generate_dataset_config(self.dataset)
-
-    def test_proper_data_source_export(self):
-        self.assertTrue('data_source' in self.exported_info)
-        self.assertEqual(self.exported_info['data_source'],
-                     self.origin['source'])
-
-    def test_proper_url_export(self):
-        self.assertEqual(self.exported_info['url'],
-                         self.origin['url'])
-
-    def test_proper_dataset_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['name'],
-                         self.name)
-
-    def test_proper_units_name_export(self):
-        self.assertEqual(self.exported_info['optional_args']['units'],
-                         self.units)
-
-
-class TestDatasetExportFromEvaluation(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        self.lats = np.array([10, 12, 14, 16, 18])
-        self.lons = np.array([100, 102, 104, 106, 108])
-        self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)])
-        flat_array = np.array(range(300))
-        self.values = flat_array.reshape(12, 5, 5)
-        self.variable = 'var'
-        self.units = 'units'
-        self.name = 'name'
-
-        self.local_origin = {
-            'source': 'local',
-            'path': '/a/fake/path.nc',
-            'lat_name': 'a lat name',
-            'lon_name': 'a lon name',
-            'time_name': 'a time name',
-            'elevation_index': 2
-        }
-
-        self.rcmed_origin = {
-            'source': 'rcmed',
-            'dataset_id': 4,
-            'parameter_id': 14
-        }
-
-        self.esgf_origin = {
-            'source': 'esgf',
-            'dataset_id': 'esgf dataset id',
-            'variable': 'var'
-        }
-
-        self.dap_origin = {
-            'source': 'dap',
-            'url': 'a fake url',
-        }
-
-        self.local_ds = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            name=self.name,
-            origin=self.local_origin
-        )
-
-        self.rcmed_ds = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            name=self.name,
-            origin=self.rcmed_origin
-        )
-
-        self.esgf_ds = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            name=self.name,
-            origin=self.esgf_origin
-        )
-
-        self.dap_ds = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            name=self.name,
-            origin=self.dap_origin
-        )
-
-        self.evaluation = Evaluation(
-            self.local_ds,
-            [self.rcmed_ds, self.esgf_ds, self.dap_ds],
-            []
-        )
-
-    def test_contains_only_reference_dataset(self):
-        new_eval = Evaluation(self.local_ds, [], [])
-        out = writer.generate_dataset_information(new_eval)
-
-        self.assertTrue('reference' in out)
-        self.assertTrue('targets' not in out)
-
-    def test_contains_only_target_datasets(self):
-        new_eval = Evaluation(None, [self.local_ds], [])
-        out = writer.generate_dataset_information(new_eval)
-
-        self.assertTrue('reference' not in out)
-        self.assertTrue('targets' in out)
-
-    def test_proper_reference_dataset_export(self):
-        out = writer.generate_dataset_information(self.evaluation)
-
-        self.assertTrue('reference' in out)
-        self.assertTrue(out['reference']['data_source'] == 'local')
-
-    def test_proper_target_datasets_export(self):
-        out = writer.generate_dataset_information(self.evaluation)
-
-        self.assertTrue('targets' in out)
-        self.assertTrue(type(out['targets']) == type(list()))
-        self.assertTrue(len(out['targets']) == 3)
-
-
-class TestMetricExportGeneration(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        self.bias = metrics.Bias()
-        self.tmp_std_dev = metrics.TemporalStdDev()
-        loaded_metrics = [self.bias, self.tmp_std_dev]
-
-        self.evaluation = Evaluation(None, [], loaded_metrics)
-
-    def test_proper_export_format(self):
-        out = writer.generate_metric_information(self.evaluation)
-
-        self.assertTrue(type(out) == type(list()))
-
-        for name in out:
-            self.assertTrue(type(name) == type(str()))
-
-    def test_proper_metric_name_export(self):
-        out = writer.generate_metric_information(self.evaluation)
-
-        self.assertTrue(self.bias.__class__.__name__ in out)
-        self.assertTrue(self.tmp_std_dev.__class__.__name__ in out)
-
-    def test_empty_metrics_in_evaluation(self):
-        new_eval = Evaluation(None, [], [])
-        out = writer.generate_metric_information(new_eval)
-
-        self.assertTrue(type(out) == type(list()))
-        self.assertTrue(len(out) == 0)
-
-
-class TestEvaluationSettingsGeneration(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        self.lats = np.array(range(-10, 10, 1))
-        self.lons = np.array(range(-20, 20, 1))
-        self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)])
-        flat_array = np.array(range(9600))
-        self.values = flat_array.reshape(12, 20, 40)
-
-        self.dataset = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-        )
-
-        self.evaluation = Evaluation(self.dataset, [], [])
-
-    def test_default_data_return(self):
-        new_eval = Evaluation(None, [], [])
-        default_output = {
-            'temporal_time_delta': 999,
-            'spatial_regrid_lats': (-90, 90, 1),
-            'spatial_regrid_lons': (-180, 180, 1),
-            'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"],
-        }
-
-        out = writer.generate_evaluation_information(new_eval)
-
-        self.assertEquals(default_output, out)
-
-    def test_handles_only_reference_dataset(self):
-        new_eval = Evaluation(self.dataset, [], [])
-
-        default_output = {
-            'temporal_time_delta': 999,
-            'spatial_regrid_lats': (-90, 90, 1),
-            'spatial_regrid_lons': (-180, 180, 1),
-            'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"],
-        }
-
-        out = writer.generate_evaluation_information(new_eval)
-
-        self.assertNotEquals(default_output, out)
-
-    def test_handles_only_target_dataset(self):
-        new_eval = Evaluation(None, [self.dataset], [])
-
-        default_output = {
-            'temporal_time_delta': 999,
-            'spatial_regrid_lats': (-90, 90, 1),
-            'spatial_regrid_lons': (-180, 180, 1),
-            'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"],
-        }
-
-        out = writer.generate_evaluation_information(new_eval)
-
-        self.assertNotEquals(default_output, out)
-
-    def test_daily_temporal_bin(self):
-        new_times = np.array([dt.datetime(2000, 1, 1, x) for x in range(1, 
13)])
-
-        dataset = Dataset(
-            self.lats,
-            self.lons,
-            new_times,
-            self.values,
-        )
-        new_eval = Evaluation(dataset, [], [])
-
-        out = writer.generate_evaluation_information(new_eval)
-
-        self.assertEquals(out['temporal_time_delta'], 1)
-
-    def test_monthly_temporal_bin(self):
-        out = writer.generate_evaluation_information(self.evaluation)
-
-        self.assertEquals(out['temporal_time_delta'], 31)
-
-    def test_yearly_temporal_bin(self):
-        new_times = np.array([dt.datetime(2000 + x, 1, 1) for x in range(1, 
13)])
-
-        dataset = Dataset(
-            self.lats,
-            self.lons,
-            new_times,
-            self.values,
-        )
-        new_eval = Evaluation(dataset, [], [])
-
-        out = writer.generate_evaluation_information(new_eval)
-
-        self.assertEquals(out['temporal_time_delta'], 366)
-
-    def test_spatial_regrid_lats(self):
-        out = writer.generate_evaluation_information(self.evaluation)
-
-        lats = out['spatial_regrid_lats']
-        lat_range = np.arange(lats[0], lats[1], lats[2])
-
-        self.assertTrue(np.array_equal(lat_range, self.lats))
-
-    def test_spatial_regrid_lons(self):
-        out = writer.generate_evaluation_information(self.evaluation)
-
-        lons = out['spatial_regrid_lons']
-        lat_range = np.arange(lons[0], lons[1], lons[2])
-
-        self.assertTrue(np.array_equal(lat_range, self.lons))
-
-    def test_subset_with_single_dataset(self):
-        out = writer.generate_evaluation_information(self.evaluation)
-        subset = out['subset']
-
-        ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = 
self.dataset.spatial_boundaries()
-        start, end = self.dataset.time_range()
-
-        self.assertEqual(ds_lat_min, subset[0])
-        self.assertEqual(ds_lat_max, subset[1])
-        self.assertEqual(ds_lon_min, subset[2])
-        self.assertEqual(ds_lon_max, subset[3])
-        self.assertEquals(str(start), subset[4])
-        self.assertEquals(str(end), subset[5])
-
-    def test_subset_with_multiple_datasets(self):
-        new_ds = Dataset(
-            np.arange(0, 20, 1),
-            self.lons,
-            self.times,
-            self.values
-        )
-        new_eval = Evaluation(self.dataset, [new_ds], [])
-
-        out = writer.generate_evaluation_information(new_eval)
-        subset = out['subset']
-
-        ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = 
self.dataset.spatial_boundaries()
-        start, end = self.dataset.time_range()
-
-        self.assertEqual(ds_lat_min, subset[0])
-        # Check that we actually used the different max lat value that we
-        # created by adding 'new_ds'.
-        self.assertEqual(max(new_ds.lats), subset[1])
-        self.assertEqual(ds_lon_min, subset[2])
-        self.assertEqual(ds_lon_max, subset[3])
-        self.assertEquals(str(start), subset[4])
-        self.assertEquals(str(end), subset[5])
-
-
-class FullExportTest(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        self.lats = np.array([10, 12, 14, 16, 18])
-        self.lons = np.array([100, 102, 104, 106, 108])
-        self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)])
-        flat_array = np.array(range(300))
-        self.values = flat_array.reshape(12, 5, 5)
-        self.variable = 'var'
-        self.units = 'units'
-        self.name = 'name'
-
-        self.local_origin = {
-            'source': 'local',
-            'path': '/a/fake/path.nc',
-            'lat_name': 'a lat name',
-            'lon_name': 'a lon name',
-            'time_name': 'a time name',
-            'elevation_index': 2
-        }
-
-        self.rcmed_origin = {
-            'source': 'rcmed',
-            'dataset_id': 4,
-            'parameter_id': 14
-        }
-
-        self.esgf_origin = {
-            'source': 'esgf',
-            'dataset_id': 'esgf dataset id',
-            'variable': 'var'
-        }
-
-        self.dap_origin = {
-            'source': 'dap',
-            'url': 'a fake url',
-        }
-
-        self.local_ds = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            name=self.name,
-            origin=self.local_origin
-        )
-
-        self.rcmed_ds = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            name=self.name,
-            origin=self.rcmed_origin
-        )
-
-        self.esgf_ds = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            name=self.name,
-            origin=self.esgf_origin
-        )
-
-        self.dap_ds = Dataset(
-            self.lats,
-            self.lons,
-            self.times,
-            self.values,
-            variable=self.variable,
-            units=self.units,
-            name=self.name,
-            origin=self.dap_origin
-        )
-
-        self.subregions = [
-            Bounds(-10, 10, -20, 20),
-            Bounds(-5, 5, -15, 15)
-        ]
-
-        self.evaluation = Evaluation(
-            self.local_ds,
-            [self.rcmed_ds, self.esgf_ds, self.dap_ds],
-            [metrics.Bias(), metrics.TemporalStdDev()],
-            subregions=self.subregions
-        )
-
-    @classmethod
-    def tearDownClass(self):
-        if os.path.isfile('/tmp/test_config.yaml'):
-            os.remove('/tmp/test_config.yaml')
-
-    def test_full_export(self):
-        file_path = '/tmp/test_config.yaml'
-        writer.export_evaluation_to_config(
-            self.evaluation,
-            file_path=file_path
-        )
-
-        self.assertTrue(os.path.isfile(file_path))
-
-    def test_proper_metric_export(self):
-        file_path = '/tmp/test_config.yaml'
-        writer.export_evaluation_to_config(
-            self.evaluation,
-            file_path=file_path
-        )
-
-        data = yaml.load(open(file_path, 'r'))
-
-        self.assertTrue('metrics' in data)
-        self.assertTrue(type(data['metrics']) == type(list()))
-
-        for metric in self.evaluation.metrics:
-            self.assertTrue(metric.__class__.__name__ in data['metrics'])
-
-        for metric in self.evaluation.unary_metrics:
-            self.assertTrue(metric.__class__.__name__ in data['metrics'])
-
-        total_eval_metrics = (
-            len(self.evaluation.metrics) +
-            len(self.evaluation.unary_metrics)
-        )
-
-        self.assertTrue(total_eval_metrics, len(data['metrics']))
-
-    def test_proper_dataset_export(self):
-        file_path = '/tmp/test_config.yaml'
-        writer.export_evaluation_to_config(
-            self.evaluation,
-            file_path=file_path
-        )
-
-        data = yaml.load(open(file_path, 'r'))
-
-        self.assertTrue('datasets' in data)
-        self.assertTrue('reference' in data['datasets'])
-        self.assertTrue('targets' in data['datasets'])
-
-        self.assertAlmostEqual(
-            writer.generate_dataset_information(self.evaluation),
-            data['datasets']
-        )
-
-    def test_proper_evaluation_setting_export(self):
-        file_path = '/tmp/test_config.yaml'
-        writer.export_evaluation_to_config(
-            self.evaluation,
-            file_path=file_path
-        )
-
-        data = yaml.load(open(file_path, 'r'))
-
-        self.assertTrue('evaluation' in data)
-        self.assertTrue('temporal_time_delta' in data['evaluation'])
-        self.assertTrue('spatial_regrid_lats' in data['evaluation'])
-        self.assertTrue('spatial_regrid_lons' in data['evaluation'])
-        self.assertTrue('subset' in data['evaluation'])
-
-        self.assertAlmostEqual(
-            writer.generate_evaluation_information(self.evaluation),
-            data['evaluation']
-        )
-
-    def test_proper_subregion_export(self):
-        file_path = '/tmp/test_config.yaml'
-        writer.export_evaluation_to_config(
-            self.evaluation,
-            file_path=file_path
-        )
-
-        data = yaml.load(open(file_path, 'r'))
-
-        self.assertTrue('subregions' in data)
-
-        first_bounds = [
-            self.subregions[0].lat_min,
-            self.subregions[0].lat_max,
-            self.subregions[0].lon_min,
-            self.subregions[0].lon_max,
-        ]
-        second_bounds = [
-            self.subregions[1].lat_min,
-            self.subregions[1].lat_max,
-            self.subregions[1].lon_min,
-            self.subregions[1].lon_max,
-        ]
-
-        self.assertEqual(first_bounds, data['subregions'][0])
-        self.assertEqual(second_bounds, data['subregions'][1])


http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw-config-runner/tests/test_evaluation_creation.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/tests/test_evaluation_creation.py 
b/ocw-config-runner/tests/test_evaluation_creation.py
deleted file mode 100644
index 41f998e..0000000
--- a/ocw-config-runner/tests/test_evaluation_creation.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from mock import patch
-import unittest
-
-import evaluation_creation as eval_create
-import ocw.metrics
-
-import yaml
-
-class TestMetricLoad(unittest.TestCase):
-    def test_valid_metric_load(self):
-        config = yaml.load("""
-            metrics:
-                - Bias
-        """)
-        loaded_metrics = [eval_create._load_metric(m)()
-                          for m in config['metrics']]
-        self.assertTrue(isinstance(loaded_metrics[0], ocw.metrics.Bias))
-
-    @patch('evaluation_creation.logger')
-    def test_invalid_metric_load(self, mock_logger):
-        config = yaml.load("""
-            metrics:
-                - ocw.metrics.Bias
-        """)
-        eval_create._load_metric(config['metrics'][0])
-        error = (
-            'User-defined metrics outside of the ocw.metrics module '
-            'cannot currently be loaded. If you just wanted a metric '
-            'found in ocw.metrics then do not specify the full '
-            'package and module names. See the documentation for examples.'
-        )
-        mock_logger.error.assert_called_with(error)

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/configuration_parsing.py
----------------------------------------------------------------------
diff --git a/ocw_config_runner/configuration_parsing.py 
b/ocw_config_runner/configuration_parsing.py
new file mode 100644
index 0000000..5c28249
--- /dev/null
+++ b/ocw_config_runner/configuration_parsing.py
@@ -0,0 +1,285 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+import re
+import sys
+
+import ocw.metrics as metrics
+
+import yaml
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def is_config_valid(config_data):
+    """ Validate supplied evaluation configuration data.
+
+    :param config_data: Dictionary of the data parsed from the supplied YAML
+        configuration file.
+    :type config_data: :func:`dict`
+
+    :returns: True if the configuration data is sufficient for an evaluation 
and
+        seems to be well formed, False otherwise.
+    """
+    if not _valid_minimal_config(config_data):
+        logger.error('Insufficient configuration file data for an evaluation')
+        return False
+
+    if not _config_is_well_formed(config_data):
+        logger.error('Configuration data is not well formed')
+        return False
+
+    return True
+
+def _valid_minimal_config(config_data):
+    """"""
+    if not 'datasets' in config_data.keys():
+        logger.error('No datasets specified in configuration data.')
+        return False
+
+    if not 'metrics' in config_data.keys():
+        logger.error('No metrics specified in configuration data.')
+        return False
+
+    if _contains_unary_metrics(config_data['metrics']):
+        if (not 'reference' in config_data['datasets'].keys() and 
+            not 'targets' in config_data['datasets'].keys()):
+            err = (
+                'Unary metric in configuration data requires either a 
reference '
+                'or target dataset to be present for evaluation. Please ensure 
'
+                'that your config is well formed.'
+            )
+            logger.error(err)
+            return False
+
+    if _contains_binary_metrics(config_data['metrics']):
+        if (not 'reference' in config_data['datasets'].keys() or 
+            not 'targets' in config_data['datasets'].keys()):
+            logger.error(
+                'Binary metric in configuration requires both a reference '
+                'and target dataset to be present for evaluation. Please 
ensure '
+                'that your config is well formed.'
+            )
+            return False
+
+    return True
+
+def _config_is_well_formed(config_data):
+    """"""
+    is_well_formed = True
+
+    if 'reference' in config_data['datasets']:
+        if not 
_valid_dataset_config_data(config_data['datasets']['reference']):
+            is_well_formed = False
+
+    if 'targets' in config_data['datasets']:
+        targets = config_data['datasets']['targets']
+        if type(targets) != type(list()):
+            err = (
+                'Expected to find list of target datasets but instead found '
+                'object of type {}'
+            ).format(type(targets))
+            logger.error(err)
+            is_well_formed = False
+        else:
+            for t in targets:
+                if not _valid_dataset_config_data(t):
+                    is_well_formed = False
+
+    available_metrics = _fetch_built_in_metrics()
+    for metric in config_data['metrics']:
+        if metric not in available_metrics:
+            warning = (
+                'Unable to locate metric name {} in built-in metrics. If this '
+                'is not a user defined metric then please check for potential '
+                'misspellings.'
+            ).format(metric)
+            logger.warn(warning)
+            is_well_formed = False
+
+    if 'subregions' in config_data:
+        for subregion in config_data['subregions']:
+            if not _valid_subregion_config_data(subregion):
+                is_well_formed = False
+
+    if 'plots' in config_data:
+        for plot in config_data['plots']:
+            if not _valid_plot_config_data(plot):
+                is_well_formed = False
+            # Ensure that if we're trying to make a plot that require
+            # subregion info that the config has this present.
+            elif plot['type'] in ['subregion', 'portrait']:
+                if ('subregions' not in config_data or
+                    len(config_data['subregions']) < 1):
+                    logger.error(
+                        'Plot config that requires subregion information is 
present '
+                        'in a config file without adequate subregion 
information '
+                        'provided. Please ensure that you have properly 
supplied 1 or '
+                        'more subregion config values.'
+                    )
+                    is_well_formed = False
+
+
+    return is_well_formed
+
+def _contains_unary_metrics(config_metric_data):
+    """"""
+    unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()]
+    return any(metric in unarys for metric in config_metric_data)
+
+def _contains_binary_metrics(config_metric_data):
+    """"""
+    binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()]
+    return any(metric in binarys for metric in config_metric_data)
+
+def _fetch_built_in_metrics():
+    """"""
+    unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()]
+    binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()]
+    return unarys + binarys
+
+def _valid_dataset_config_data(dataset_config_data):
+    """"""
+    try:
+        data_source = dataset_config_data['data_source']
+    except KeyError:
+        logger.error('Dataset does not contain a data_source attribute.')
+        return False
+
+    if data_source == 'local':
+        required_keys = set(['data_source', 'file_count', 'path', 'variable'])
+    elif data_source == 'rcmed':
+        required_keys = set([
+            'dataset_id',
+            'parameter_id',
+            'min_lat',
+            'max_lat',
+            'min_lon',
+            'max_lon',
+            'start_time',
+            'end_time',
+        ])
+    elif data_source == 'esgf':
+        required_keys = set([
+            'data_source',
+            'dataset_id',
+            'variable',
+            'esgf_username',
+            'esgf_password'
+        ])
+    elif data_source == 'dap':
+        required_keys = set({'url', 'variable'})
+    else:
+        logger.error('Dataset does not contain a valid data_source location.')
+        return False
+
+    present_keys = set(dataset_config_data.keys())
+    missing_keys = required_keys - present_keys
+    contains_required = len(missing_keys) == 0
+
+    if contains_required:
+        if data_source == 'local' and dataset_config_data['file_count'] > 1:
+            # If the dataset is a multi-file dataset then we need to make sure
+            # that the file glob pattern is included.
+            if not 'file_glob_pattern' in dataset_config_data:
+                logger.error(
+                    'Multi-file local dataset is missing key: 
file_glob_pattern'
+                )
+                return False
+        return True
+    else:
+        missing = sorted(list(missing_keys))
+        logger.error(
+            'Dataset does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        return False
+
+def _valid_plot_config_data(plot_config_data):
+    """"""
+    try:
+        plot_type = plot_config_data['type']
+    except KeyError:
+        logger.error('Plot config does not include a type attribute.')
+        return False
+
+    if plot_type == 'contour':
+        required_keys = set([
+            'results_indices',
+            'lats',
+            'lons',
+            'output_name'
+        ])
+    elif plot_type == 'taylor':
+        required_keys = set([
+            'stddev_results_indices',
+            'pattern_corr_results_indices',
+            'output_name'
+        ])
+    elif plot_type == 'subregion':
+        required_keys = set([
+            'lats',
+            'lons',
+            'output_name'
+        ])
+    elif plot_type == 'time_series':
+        required_keys = set([
+            'time_range'
+        ])
+    elif plot_type == 'portrait':
+        required_keys = set([
+            'metric_index',
+            'output_name'
+        ])
+    else:
+        logger.error('Invalid plot type specified.')
+        return False
+
+    present_keys = set(plot_config_data.keys())
+    missing_keys = required_keys - present_keys
+    contains_required = len(missing_keys) == 0
+
+    if not contains_required:
+        missing = sorted(list(missing_keys))
+        logger.error(
+            'Plot config does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        return False
+
+    return True
+
+def _valid_subregion_config_data(subregion_config_data):
+    """"""
+    if type(subregion_config_data) != type([]):
+        logger.error(
+            'Subregions should be passed as a list of lists where '
+            'each sub-list contains a bounding box of the form: '
+            '[lat_min, lat_max, lon_min, lon_max].'
+        )
+        return False
+
+    if len(subregion_config_data) != 4:
+        logger.error(
+            'Subregions should be passed as a list of lists where '
+            'each sub-list contains a bounding box of the form: '
+            '[lat_min, lat_max, lon_min, lon_max].'
+        )
+        return False
+
+    return True

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/configuration_writer.py
----------------------------------------------------------------------
diff --git a/ocw_config_runner/configuration_writer.py 
b/ocw_config_runner/configuration_writer.py
new file mode 100644
index 0000000..8fc9242
--- /dev/null
+++ b/ocw_config_runner/configuration_writer.py
@@ -0,0 +1,302 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+import logging
+
+import yaml
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def export_evaluation_to_config(evaluation, file_path='./exported_eval.yaml'):
+    ''' Export an evaluation to a config file
+
+    :param evaluation: The evaluation object to export.
+    :type evaluation: :class:`evaluation.Evaluation`
+
+    :param file_path: Optional file path where the config file should be saved.
+    :type file_path: :mod:`string`
+    '''
+    config = {}
+
+    config['evaluation'] = generate_evaluation_information(evaluation)
+    config['datasets'] = generate_dataset_information(evaluation)
+    config['metrics'] = generate_metric_information(evaluation)
+    config['subregions'] = generate_subregion_information(evaluation)
+
+    yaml.dump(config, file(file_path, 'w'))
+
+def generate_dataset_information(evaluation):
+    ''' Generate dataset config file output for a given Evaluation object.
+
+    :param evaluation: The evaluation object from which to extract metrics.
+    :type evaluation: :class:`evaluation.Evaluation`
+
+    :returns: A :func:`dict` of dataset configuration information for export
+        to a configuration file.
+    :rtype: :func:`dict`
+    '''
+    datasets = {}
+
+    if evaluation.ref_dataset:
+        datasets['reference'] = generate_dataset_config(evaluation.ref_dataset)
+
+    if len(evaluation.target_datasets) > 0:
+        datasets['targets'] = [
+            generate_dataset_config(target)
+            for target in evaluation.target_datasets
+        ]
+
+    return datasets
+
+def generate_dataset_config(dataset):
+    ''' Generate dataset config file output for a given Dataset object.
+
+    :param dataset: The dataset from which to extract configuration
+        information.
+    :type dataset: :class:`dataset.Dataset`
+
+    :returns: :func:`dict` containing necessary information for
+        dataset to be saved into a configuration object.
+
+    :raises AttributeError: If dataset does not contain expected source data.
+    '''
+    dataset_source = dataset.origin['source']
+
+    if dataset_source == 'local':
+        info = _extract_local_dataset_info(dataset)
+    elif dataset_source == 'rcmed':
+        info = _extract_rcmed_dataset_info(dataset)
+    elif dataset_source == 'esgf':
+        info = _extract_esgf_dataset_info(dataset)
+    elif dataset_source == 'dap':
+        info = _extract_dap_dataset_info(dataset)
+    else:
+        err = (
+            "Unexpected source in dataset origin information."
+            "Found {}."
+        ).format(dataset_source)
+        logger.error(err)
+        raise AttributeError(err)
+
+    info['optional_args']['name'] = dataset.name
+    info['optional_args']['units'] = dataset.units
+
+    return info
+
+def generate_metric_information(evaluation):
+    ''' Generate metric config file output from a given Evaluation object.
+
+    :param evaluation: The evaluation object from which to extract metrics.
+    :type evaluation: :class:`evaluation.Evaluation`
+
+    :returns: A :func:`list` of :mod:`metrics` object names for output into
+        a configuration file.
+    :rtype: :func:`list` of :mod:`metrics`
+    '''
+    unary_metrics = [x.__class__.__name__ for x in evaluation.unary_metrics]
+    binary_metrics = [x.__class__.__name__ for x in evaluation.metrics]
+
+    return unary_metrics + binary_metrics
+
+def generate_evaluation_information(evaluation):
+    ''' Generate evaluation-related config file output.
+
+    Attempts to parse out temporal and spatial rebinning/regridding information
+    from the supplied evaluation object. If no datasets can be found, values
+    are defaulted to sane defaults or (potentially) excluded entirely.
+
+    It's important to note that this function does its best to extrapolate the
+    configuration information. It's possible that you will encounter a scenario
+    where the guessed values are not what you want/expect. Please double
+    check the output before blinding trusting what this generates.
+
+    :param evaluation: The evaluation object from which to extract metrics.
+    :type evaluation: :class:`evaluation.Evaluation`
+
+    :returns: A dictionary of valid `evaluation` section settings for export
+        to a configuration file.
+    :rtype: :func:`dict`
+    '''
+    eval_config = {
+        'temporal_time_delta': 999,
+        'spatial_regrid_lats': (-90, 90, 1),
+        'spatial_regrid_lons': (-180, 180, 1),
+        'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"],
+    }
+
+    datasets = []
+
+    if evaluation.ref_dataset:
+        datasets.append(evaluation.ref_dataset)
+
+    if evaluation.target_datasets:
+        datasets += evaluation.target_datasets
+
+    if len(datasets) > 0:
+        eval_config['temporal_time_delta'] = _calc_temporal_bin_size(datasets)
+
+        lats, lons = _calc_spatial_lat_lon_grid(datasets)
+        eval_config['spatial_regrid_lats'] = lats
+        eval_config['spatial_regrid_lons'] = lons
+
+        eval_config['subset'] = _calc_subset_config(datasets)
+
+    return eval_config
+
+def generate_subregion_information(evaluation):
+    ''' Generate subregion config file output from a given Evaluation object.
+
+    :param evaluation: The evaluation object from which to extract metrics.
+    :type evaluation: :class:`evaluation.Evaluation`
+
+    :returns: A :func:`list` of :func:`list` objects containing bounding
+        box info for export into a configuration file
+    :rtype: :func:`list` of :func:`list`
+    '''
+    subregions = []
+    for s in evaluation.subregions:
+        subregions.append([s.lat_min, s.lat_max, s.lon_min, s.lon_max])
+
+    return subregions
+
+def _extract_local_dataset_info(dataset):
+    ''''''
+    dataset_info = {'optional_args': {}}
+
+    dataset_info['data_source'] = 'local'
+    dataset_info['file_count'] = 1
+    dataset_info['path'] = dataset.origin['path']
+    dataset_info['variable'] = dataset.variable
+
+    dataset_info['optional_args']['lat_name'] = dataset.origin['lat_name']
+    dataset_info['optional_args']['lon_name'] = dataset.origin['lon_name']
+    dataset_info['optional_args']['time_name'] = dataset.origin['time_name']
+
+    if 'elevation_index' in dataset.origin:
+        elev = dataset.origin['elevation_index']
+        dataset_info['optional_args']['elevation_index'] = elev
+
+    return dataset_info
+
+def _extract_rcmed_dataset_info(dataset):
+    ''''''
+    dataset_info = {'optional_args': {}}
+
+    min_lat, max_lat, min_lon, max_lon = dataset.spatial_boundaries()
+    start_time, end_time = dataset.time_range()
+
+    dataset_info['data_source'] = 'rcmed'
+    dataset_info['dataset_id'] = dataset.origin['dataset_id']
+    dataset_info['parameter_id'] = dataset.origin['parameter_id']
+    dataset_info['min_lat'] = min_lat
+    dataset_info['max_lat'] = max_lat
+    dataset_info['min_lon'] = min_lon
+    dataset_info['max_lon'] = max_lon
+    dataset_info['start_time'] = str(start_time)
+    dataset_info['end_time'] = str(end_time)
+
+    return dataset_info
+
+def _extract_esgf_dataset_info(dataset):
+    ''''''
+    dataset_info = {'optional_args': {}}
+
+    dataset_info['data_source'] = 'esgf'
+    dataset_info['dataset_id'] = dataset.origin['dataset_id']
+    dataset_info['variable'] = dataset.origin['variable']
+    dataset_info['esgf_username'] = 'Put your ESGF Username here'
+    dataset_info['esgf_password'] = 'Put your ESGF Password here'
+
+    return dataset_info
+
+def _extract_dap_dataset_info(dataset):
+    ''''''
+    dataset_info = {'optional_args': {}}
+
+    dataset_info['data_source'] = 'dap'
+    dataset_info['url'] = dataset.origin['url']
+    dataset_info['variable'] = dataset.variable
+
+    return dataset_info
+
+def _calc_temporal_bin_size(datasets):
+    ''''''
+    times = datasets[0].times
+    time_delta = times[1] - times[0]
+
+    if time_delta.days == 0:
+        return 1
+    elif time_delta.days <= 31:
+        return 31
+    elif time_delta.days <= 366:
+        return 366
+    else:
+        return 999
+
+def _calc_spatial_lat_lon_grid(datasets):
+    ''''''
+    lat_min, lat_max, lon_min, lon_max = datasets[0].spatial_boundaries()
+
+    lats = datasets[0].lats
+    lons = datasets[0].lons
+    # These explicit float casts are needed to ensure that the type of the
+    # lat/lon steps are not numpy values. PyYAML will choke on export if it
+    # encounters a Numpy value.
+    lat_step = float(abs(lats[1] - lats[0]))
+    lon_step = float(abs(lons[1] - lons[0]))
+
+    # We need to add an extra step value onto the end so when we generate a
+    # range with these values we don't lose one that we're expecting.
+    if lat_max != 90: lat_max += lat_step
+    if lon_max != 180: lon_max += lon_step
+
+    return ((lat_min, lat_max, lat_step), (lon_min, lon_max, lon_step))
+
+def _calc_subset_config(datasets):
+    ''''''
+    lat_min = 90
+    lat_max = -90
+    lon_min = 180
+    lon_max = -180
+    start = dt.datetime(2500, 1, 1)
+    end = dt.datetime(1500, 1, 1)
+
+    for ds in datasets:
+        ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = 
ds.spatial_boundaries()
+        ds_start, ds_end = ds.time_range()
+
+        if ds_lat_min < lat_min:
+            lat_min = ds_lat_min
+
+        if ds_lat_max > lat_max:
+            lat_max = ds_lat_max
+
+        if ds_lon_min < lon_min:
+            lon_min = ds_lon_min
+
+        if ds_lon_max > lon_max:
+            lon_max = ds_lon_max
+
+        if ds_start < start:
+            start = ds_start
+
+        if ds_end > end:
+            end = ds_end
+
+    return [lat_min, lat_max, lon_min, lon_max, str(start), str(end)]

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/evaluation_creation.py
----------------------------------------------------------------------
diff --git a/ocw_config_runner/evaluation_creation.py 
b/ocw_config_runner/evaluation_creation.py
new file mode 100644
index 0000000..88394de
--- /dev/null
+++ b/ocw_config_runner/evaluation_creation.py
@@ -0,0 +1,178 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import dateutil.parser
+from datetime import timedelta
+import logging
+
+from ocw.dataset import Bounds
+from ocw.evaluation import Evaluation
+import ocw.dataset_processor as dsp
+import ocw.data_source.local as local
+import ocw.data_source.rcmed as rcmed
+import ocw.data_source.esgf as esgf
+import ocw.data_source.dap as dap
+import ocw.metrics as metrics
+
+import numpy as np
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def generate_evaluation_from_config(config_data):
+    """ Generate an Evaluation object from configuration data.
+
+    :param config_data: Dictionary of the data parsed from the supplied YAML
+        configuration file.
+    :type config_data: :func:`dict`
+
+    :returns: An Evaluation object containing the data specified in the
+        supplied configuration data.
+    """
+    # Load datasets
+    reference = None
+    targets = []
+    if config_data['datasets']:
+        if 'reference' in config_data['datasets']:
+            reference = _load_dataset(config_data['datasets']['reference'])
+
+        if 'targets' in config_data['datasets']:
+            targets = [_load_dataset(t) for t in 
config_data['datasets']['targets']]
+
+        reference, targets = _prepare_datasets_for_evaluation(reference,
+                                                              targets,
+                                                              config_data)
+    # Load metrics
+    eval_metrics = []
+    if config_data['metrics']:
+        eval_metrics = [_load_metric(m)() for m in config_data['metrics']]
+
+    # Load Subregions (if present)
+    subregions = None
+    if 'subregions' in config_data:
+        subregions = [_load_subregion(s) for s in config_data['subregions']]
+
+    return Evaluation(reference, targets, eval_metrics, subregions=subregions)
+
+def _load_dataset(dataset_config_data):
+    """"""
+    if dataset_config_data['data_source'] == 'local':
+        if dataset_config_data['file_count'] > 1:
+            logger.error(
+                'Multi-file datasets are currently not supported. Cancelling 
load '
+                'of the following dataset: {}'.format(dataset_config_data)
+            )
+            return None
+
+        return local.load_file(dataset_config_data['path'],
+                               dataset_config_data['variable'],
+                               **dataset_config_data.get('optional_args', {}))
+    elif dataset_config_data['data_source'] == 'rcmed':
+        return rcmed.parameter_dataset(dataset_config_data['dataset_id'],
+                                       dataset_config_data['parameter_id'],
+                                       dataset_config_data['min_lat'],
+                                       dataset_config_data['max_lat'],
+                                       dataset_config_data['min_lon'],
+                                       dataset_config_data['min_lon'],
+                                       dataset_config_data['start_time'],
+                                       dataset_config_data['end_time'],
+                                       
**dataset_config_data.get('optional_args', {}))
+    elif dataset_config_data['data_source'] == 'esgf':
+        return esgf.load_dataset(dataset_config_data['dataset_id'],
+                                 dataset_config_data['variable'],
+                                 dataset_config_data['esgf_username'],
+                                 dataset_config_data['esgf_password'],
+                                 **dataset_config_data.get('optional_args', 
{}))
+    elif dataset_config_data['data_source'] == 'dap':
+        return dap.load(dataset_config_data['url'],
+                        dataset_config_data['variable'],
+                        **dataset_config_data('optional_args', {}))
+
+def _prepare_datasets_for_evaluation(reference, targets, config_data):
+    """"""
+    subset = config_data['evaluation'].get('subset', None)
+    temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', 
None)
+    spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', 
None)
+    spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', 
None)
+
+    # If we have a temporal time delta and it's daily (i.e., 1) we will
+    # normalize the data as daily data (which means we adjust the start times
+    # for each bucket of data to be consistent). By default we will normalize
+    # the data as monthly. Note that this will not break yearly data so it's
+    # safer to do this no matter what. This keeps us from ending up with 1-off
+    # errors in the resulting dataset shape post-temporal/spatial adjustments
+    # that break evaluations.
+    string_time_delta = 'monthly'
+    if temporal_time_delta and temporal_time_delta == 1:
+        string_time_delta = 'daily'
+
+    reference = dsp.normalize_dataset_datetimes(reference, string_time_delta)
+    targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in 
targets]
+
+    if subset:
+        start = dateutil.parser.parse(subset[4])
+        end = dateutil.parser.parse(subset[5])
+        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)
+
+        if reference:
+            reference = dsp.safe_subset(bounds, reference)
+
+        if targets:
+            targets = [dsp.safe_subset(bounds, t) for t in targets]
+
+    if temporal_time_delta:
+        resolution = timedelta(temporal_time_delta)
+
+        if reference:
+            reference = dsp.temporal_rebin(reference, resolution)
+
+        if targets:
+            targets = [dsp.temporal_rebin(t, resolution) for t in targets]
+
+    if spatial_regrid_lats and spatial_regrid_lons:
+        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], 
spatial_regrid_lats[2])
+        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], 
spatial_regrid_lons[2])
+
+        if reference:
+            reference = dsp.spatial_regrid(reference, lats, lons)
+
+        if targets:
+            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]
+
+    return reference, targets
+
+def _load_metric(metric_config_data):
+    """"""
+    # If the dataset is user defined outside of ocw.metrics we won't currently
+    # handle loading it.
+    if '.' in metric_config_data:
+        logger.error(
+            'User-defined metrics outside of the ocw.metrics module '
+            'cannot currently be loaded. If you just wanted a metric '
+            'found in ocw.metrics then do not specify the full '
+            'package and module names. See the documentation for examples.'
+        )
+        return None
+
+    return getattr(metrics, metric_config_data)
+
+def _load_subregion(subregion_config_data):
+    """"""
+    return Bounds(float(subregion_config_data[0]),
+                  float(subregion_config_data[1]),
+                  float(subregion_config_data[2]),
+                  float(subregion_config_data[3]))

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/portrait_diagram.yaml
----------------------------------------------------------------------
diff --git a/ocw_config_runner/example/portrait_diagram.yaml 
b/ocw_config_runner/example/portrait_diagram.yaml
new file mode 100644
index 0000000..f3e45de
--- /dev/null
+++ b/ocw_config_runner/example/portrait_diagram.yaml
@@ -0,0 +1,30 @@
+evaluation:
+    temporal_time_delta: 365
+    spatial_regrid_lats: !!python/tuple [-40, 40, 1]
+    spatial_regrid_lons: !!python/tuple [-40, 40, 1]
+
+datasets:
+    reference:
+        data_source: local
+        file_count: 1
+        path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc
+        variable: tasmax
+
+    targets:
+        - data_source: local
+          file_count: 1
+          path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc
+          variable: tasmax
+          optional_args:
+              name: WRF
+metrics:
+    - PatternCorrelation
+
+plots:
+    - type: portrait
+      metric_index: 0
+      output_name: portrait_test
+
+subregions:
+    - [-10.0, 0.0, 29.0, 36.5] 
+    - [0.0, 10.0,  29.0, 37.5] 

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/simple_model_to_model_bias.yaml
----------------------------------------------------------------------
diff --git a/ocw_config_runner/example/simple_model_to_model_bias.yaml 
b/ocw_config_runner/example/simple_model_to_model_bias.yaml
new file mode 100644
index 0000000..bd59fbd
--- /dev/null
+++ b/ocw_config_runner/example/simple_model_to_model_bias.yaml
@@ -0,0 +1,35 @@
+evaluation:
+    temporal_time_delta: 365
+    spatial_regrid_lats: !!python/tuple [-20, 20, 1]
+    spatial_regrid_lons: !!python/tuple [-20, 20, 1]
+
+datasets:
+    reference:
+        data_source: local
+        file_count: 1
+        path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc
+        variable: tasmax
+
+    targets:
+        - data_source: local
+          file_count: 1
+          path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc
+          variable: tasmax
+metrics:
+    - Bias
+
+plots:
+    - type: contour
+      results_indices:
+          - !!python/tuple [0, 0]
+      lats:
+          range_min: -20
+          range_max: 20
+          range_step: 1
+      lons:
+          range_min: -20
+          range_max: 20
+          range_step: 1
+      output_name: wrf_bias_compared_to_knmi
+      optional_args:
+          gridshape: !!python/tuple [6, 6]

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/subregion.yaml
----------------------------------------------------------------------
diff --git a/ocw_config_runner/example/subregion.yaml 
b/ocw_config_runner/example/subregion.yaml
new file mode 100644
index 0000000..d39e39b
--- /dev/null
+++ b/ocw_config_runner/example/subregion.yaml
@@ -0,0 +1,30 @@
+subregions:
+    - [-10.0, 0.0, 29.0, 36.5]
+    - [0.0, 10.0,  29.0, 37.5]
+    - [10.0, 20.0, 25.0, 32.5]
+    - [20.0, 33.0, 25.0, 32.5]
+    - [-19.3,-10.2,12.0, 20.0]
+    - [15.0, 30.0, 15.0, 25.0]
+    - [-10.0, 10.0, 7.3, 15.0]
+    - [-10.9, 10.0, 5.0, 7.3]
+    - [33.9, 40.0,  6.9, 15.0]
+    - [10.0, 25.0,  0.0, 10.0]
+    - [10.0, 25.0,-10.0,  0.0]
+    - [30.0, 40.0,-15.0,  0.0]
+    - [33.0, 40.0, 25.0, 35.0]
+
+datasets:
+
+metrics:
+
+plots:
+    - type: subregion
+      output_name: subregion_plot
+      lats:
+          range_min: -45.0
+          range_max: 42.24
+          range_step: 0.5
+      lons:
+          range_min: -24.0
+          range_max: 60.0
+          range_step: 0.5

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/taylor_diagram_example.yaml
----------------------------------------------------------------------
diff --git a/ocw_config_runner/example/taylor_diagram_example.yaml 
b/ocw_config_runner/example/taylor_diagram_example.yaml
new file mode 100644
index 0000000..66eab3b
--- /dev/null
+++ b/ocw_config_runner/example/taylor_diagram_example.yaml
@@ -0,0 +1,36 @@
+evaluation:
+    temporal_time_delta: 30
+    spatial_regrid_lats: !!python/tuple [-45, 42, 1]
+    spatial_regrid_lons: !!python/tuple [-24, 60, 1]
+    subset: [-45, 42, -24, 60, "1989-01-01", "1989-12-01"]
+
+datasets:
+    reference:
+        data_source: local
+        file_count: 1
+        path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc
+        variable: tasmax
+        optional_args:
+            name: dataset1
+
+    targets:
+        - data_source: local
+          file_count: 1
+          path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc
+          variable: tasmax
+          optional_args:
+              name: dataset2
+metrics:
+    - StdDevRatio
+    - PatternCorrelation
+
+plots:
+    - type: taylor
+      stddev_results_indices:
+          - !!python/tuple [0, 0]
+      pattern_corr_results_indices:
+          - !!python/tuple [0, 1]
+      output_name: taylor_plot
+      optional_args:
+          fmt: png
+          frameon: False

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/example/time_series_plot_example.yaml
----------------------------------------------------------------------
diff --git a/ocw_config_runner/example/time_series_plot_example.yaml 
b/ocw_config_runner/example/time_series_plot_example.yaml
new file mode 100644
index 0000000..b5599cc
--- /dev/null
+++ b/ocw_config_runner/example/time_series_plot_example.yaml
@@ -0,0 +1,31 @@
+evaluation:
+    temporal_time_delta: 30
+    spatial_regrid_lats: !!python/tuple [-20, 20, 1]
+    spatial_regrid_lons: !!python/tuple [-20, 20, 1]
+    subset: [-180, 180, -90, 90, "1989-01-01", "1990-12-01"]
+
+datasets:
+    reference:
+        data_source: local
+        file_count: 1
+        path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc
+        variable: tasmax
+        optional_args:
+            name: KNMI
+
+    targets:
+        - data_source: local
+          file_count: 1
+          path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc
+          variable: tasmax
+          optional_args:
+            name: WRF
+metrics:
+
+plots:
+    - type: time_series
+      time_range: monthly
+
+subregions:
+    - [-10.0, 0.0, -19.0, 19.0]
+    - [0.0, 10.0,  -10.0, 10.0]

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/ocw_evaluation_from_config.py
----------------------------------------------------------------------
diff --git a/ocw_config_runner/ocw_evaluation_from_config.py 
b/ocw_config_runner/ocw_evaluation_from_config.py
new file mode 100644
index 0000000..93b411a
--- /dev/null
+++ b/ocw_config_runner/ocw_evaluation_from_config.py
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import logging
+
+from configuration_parsing import is_config_valid
+from evaluation_creation import generate_evaluation_from_config
+from plot_generation import plot_from_config
+
+import yaml
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def run_evaluation_from_config(config_file_path, ignore_config_errors=False):
+    """ Run an OCW evaluation specified by a config file.
+
+    :param config_file_path: The file path to a OCW compliant YAML file
+        specifying how to run the evaluation. For additional information on 
+        the valid options that you can set in the config please check the
+        project wiki 
https://cwiki.apache.org/confluence/display/climate/home#'.
+    :type config_file_path: :mod:`string`
+
+    :param ignore_config_errors: When this is true configuration parsing errors
+        will NOT interrupt the evaluation run. Note, it is very unlikely that
+        you will want this value set. However it is possible that you will want
+        to graph something that doesn't require a full evaluation run. This is
+        provided for that situation.
+    :type ignore_config_errors: :func:`bool`
+    """
+    config = yaml.load(open(config_file_path, 'r'))
+
+    if not ignore_config_errors and not is_config_valid(config):
+        logger.warning(
+            'Unable to validate configuration file. Exiting evaluation. '
+            'Please check documentation for config information.'
+        )
+
+        sys.exit(1)
+
+    evaluation = generate_evaluation_from_config(config)
+
+    if evaluation._evaluation_is_valid():
+        evaluation.run()
+
+    plot_from_config(evaluation, config)
+
+if __name__ == '__main__':
+    description = 'OCW Config Based Evaluation'
+    epilog = 'Additional information at 
https://cwiki.apache.org/confluence/display/climate/home#'
+
+    parser = argparse.ArgumentParser(description=description, epilog=epilog)
+    parser.add_argument('config', help='Path to YAML config file for the 
evaluation')
+    parser.add_argument('ignore_config_errors', nargs='?', default=False, 
type=bool)
+    args = parser.parse_args()
+
+    run_evaluation_from_config(args.config, args.ignore_config_errors)

http://git-wip-us.apache.org/repos/asf/climate/blob/e926a581/ocw_config_runner/plot_generation.py
----------------------------------------------------------------------
diff --git a/ocw_config_runner/plot_generation.py 
b/ocw_config_runner/plot_generation.py
new file mode 100644
index 0000000..392331d
--- /dev/null
+++ b/ocw_config_runner/plot_generation.py
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+
+import ocw.dataset_processor as dsp
+import ocw.plotter as plots
+import ocw.utils as utils
+
+import numpy as np
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def plot_from_config(evaluation, config_data):
+    """ Generate plots for an evaluation from configuration data.
+
+    :param evaluation: The Evaluation for which to generate plots.
+    :type evaluation: :class:`ocw.evaluation.Evaluation`
+    :param config_data: Dictionary of the data parsed from the supplied YAML
+        configuration file.
+    :type: :func:`dict`
+    """
+    for plot in config_data['plots']:
+        if plot['type'] == 'contour':
+            _draw_contour_plot(evaluation, plot)
+        elif plot['type'] == 'subregion':
+            _draw_subregion_diagram(evaluation, plot)
+        elif plot['type'] == 'taylor':
+            _draw_taylor_diagram(evaluation, plot)
+        elif plot['type'] == 'time_series':
+            _draw_time_series_plot(evaluation, plot)
+        elif plot['type'] == 'portrait':
+            _draw_portrait_diagram(evaluation, plot)
+        else:
+            logger.error('Unrecognized plot type requested: 
{}'.format(plot['type']))
+
+def _draw_contour_plot(evaluation, plot_config):
+    """"""
+    lats = plot_config['lats']
+    if type(lats) != type(list):
+        lats = np.arange(lats['range_min'], lats['range_max'], 
lats['range_step'])
+
+    lons = plot_config['lons']
+    if type(lons) != type(list):
+        lons = np.arange(lons['range_min'], lons['range_max'], 
lons['range_step'])
+
+    for i, index in enumerate(plot_config['results_indices']):
+        if len(index) == 2:
+            target, metric = index
+            vals = evaluation.results[target][metric]
+        elif len(index) == 3:
+            target, metric, subregion = index
+            vals = evaluation.results[target][metric][subregion]
+
+        plot_name = plot_config['output_name'] + '_{}'.format(i)
+        plots.draw_contour_map(vals,
+                               np.array(lats),
+                               np.array(lons),
+                               plot_name,
+                               **plot_config.get('optional_args', {}))
+
+def _draw_taylor_diagram(evaluation, plot_config):
+    """"""
+    plot_name = plot_config['output_name']
+    ref_dataset_name = evaluation.ref_dataset.name
+    target_dataset_names = [t.name for t in evaluation.target_datasets]
+
+    if len(plot_config['stddev_results_indices'][0]) == 2:
+        stddev_results = [
+            evaluation.results[tar][met]
+            for (tar, met) in plot_config['stddev_results_indices']
+        ]
+
+        pattern_corr_results = [
+            evaluation.results[tar][met]
+            for (tar, met) in plot_config['pattern_corr_results_indices']
+        ]
+    elif len(plot_config['stddev_results_indices'][0]) == 3:
+        stddev_results = [
+            evaluation.results[tar][met][sub]
+            for (tar, met, sub) in plot_config['stddev_results_indices']
+        ]
+
+        pattern_corr_results = [
+            evaluation.results[tar][met][sub]
+            for (tar, met, sub) in plot_config['pattern_corr_results_indices']
+        ]
+
+    plot_data = np.array([stddev_results, pattern_corr_results]).transpose()
+
+    plots.draw_taylor_diagram(plot_data,
+                              target_dataset_names,
+                              ref_dataset_name,
+                              fname=plot_name,
+                              **plot_config.get('optional_args', {}))
+
+def _draw_subregion_diagram(evaluation, plot_config):
+    """"""
+    lats = plot_config['lats']
+    if type(lats) != type(list):
+        lats = np.arange(lats['range_min'], lats['range_max'], 
lats['range_step'])
+
+    lons = plot_config['lons']
+    if type(lons) != type(list):
+        lons = np.arange(lons['range_min'], lons['range_max'], 
lons['range_step'])
+
+    plots.draw_subregions(evaluation.subregions,
+                          lats,
+                          lons,
+                          plot_config['output_name'],
+                          **plot_config.get('optional_args', {}))
+
+def _draw_portrait_diagram(evaluation, plot_config):
+    """"""
+    metric_index = plot_config['metric_index']
+
+    diagram_data = np.array(evaluation.results[:][metric_index][:])
+    subregion_names = ["R{}".format(i) for i in 
range(len(evaluation.subregions))]
+    target_names = [t.name for t in evaluation.target_datasets]
+
+    plots.draw_portrait_diagram(diagram_data,
+                                target_names,
+                                subregion_names,
+                                fname=plot_config['output_name'],
+                                **plot_config.get('optional_args', {}))
+
+def _draw_time_series_plot(evaluation, plot_config):
+    """"""
+    time_range_info = plot_config['time_range']
+    ref_ds = evaluation.ref_dataset
+    target_ds = evaluation.target_datasets
+
+    if time_range_info == 'monthly':
+        ref_ds.values, ref_ds.times = utils.calc_climatology_monthly(ref_ds)
+
+        for t in target_ds:
+            t.values, t.times = utils.calc_climatology_monthly(t)
+    else:
+        logger.error(
+            'Invalid time range provided. Only monthly is supported '
+            'at the moment'
+        )
+        return
+
+    if evaluation.subregions:
+        for bound_count, bound in enumerate(evaluation.subregions):
+            results = []
+            labels = []
+
+            subset = dsp.subset(
+                bound,
+                ref_ds,
+                subregion_name="R{}_{}".format(bound_count, ref_ds.name)
+            )
+
+            results.append(utils.calc_time_series(subset))
+            labels.append(subset.name)
+
+            for t in target_ds:
+                subset = dsp.subset(
+                    bound,
+                    t,
+                    subregion_name="R{}_{}".format(bound_count, t.name)
+                )
+                results.append(utils.calc_time_series(subset))
+                labels.append(subset.name)
+
+            plots.draw_time_series(np.array(results),
+                                   ref_ds.times,
+                                   labels,
+                                   'R{}'.format(bound_count),
+                                   **plot_config.get('optional_args', {}))
+
+    else:
+        results = []
+        labels = []
+
+        results.append(utils.calc_time_series(ref_ds))
+        labels.append(ref_ds.name)
+
+        for t in target_ds:
+            results.append(utils.calc_time_series(t))
+            labels.append(t.name)
+
+        plots.draw_time_series(np.array(results),
+                               ref_ds.times,
+                               labels,
+                               'time_series',
+                               **plot_config.get('optional_args', {}))

[2/5] climate git commit: replaces ocw-config-runner to ocw_config_runner

Reply via email to