Repository: climate Updated Branches: refs/heads/master 577dfb787 -> d30d8146f
Revert "Merge branch 'CLIMATE-825'" This reverts commit 577dfb78704a340b187ee746bef15b91a1fb5414, reversing changes made to 70cd2b5006d1b0dfe1ff97f1cb2913c3d4ad913e. Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/d30d8146 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/d30d8146 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/d30d8146 Branch: refs/heads/master Commit: d30d8146fa9c91844411d41f15f5e64423f2bfa2 Parents: 577dfb7 Author: Alex Goodman <ago...@users.noreply.github.com> Authored: Fri Jul 29 10:28:58 2016 -0700 Committer: Alex Goodman <ago...@users.noreply.github.com> Committed: Fri Jul 29 10:28:58 2016 -0700 ---------------------------------------------------------------------- docs/source/index.rst | 2 +- docs/source/ocw/dataset_loader.rst | 5 - ocw/dataset_loader.py | 206 -------------------------------- ocw/tests/test_dataset_loader.py | 185 ---------------------------- 4 files changed, 1 insertion(+), 397 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/docs/source/index.rst ---------------------------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 2834ee6..1485385 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -13,7 +13,6 @@ Contents: ocw/overview ocw/dataset - ocw/dataset_loader ocw/dataset_processor ocw/evaluation ocw/metrics @@ -34,3 +33,4 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` + http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/docs/source/ocw/dataset_loader.rst ---------------------------------------------------------------------- diff --git a/docs/source/ocw/dataset_loader.rst b/docs/source/ocw/dataset_loader.rst deleted file mode 100644 index 833b7f9..0000000 --- a/docs/source/ocw/dataset_loader.rst +++ /dev/null @@ -1,5 +0,0 @@ -Dataset Loader Module -************** - -.. automodule:: dataset_loader - :members: http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/ocw/dataset_loader.py ---------------------------------------------------------------------- diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py deleted file mode 100644 index be43c05..0000000 --- a/ocw/dataset_loader.py +++ /dev/null @@ -1,206 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -''' -Classes: - DatasetLoader - Generate OCW Dataset objects from a variety of sources. -''' - -import ocw.data_source.local as local -import ocw.data_source.esgf as esgf -import ocw.data_source.rcmed as rcmed -import ocw.data_source.dap as dap - -class DatasetLoader: - '''Generate OCW Dataset objects from a variety of sources.''' - - def __init__(self, reference, targets): - '''Generate OCW Dataset objects from a variety of sources. - - Each keyword argument can be information for a dataset in dictionary - form. For example: - `` - >>> reference = {'data_source':'rcmed', 'name':'cru', 'dataset_id':10, - 'parameter_id':34} - >>> targets = {'data_source':'local_multiple', - 'path':'./data/CORDEX-Africa_data/AFRICA*pr.nc', - 'variable':'pr'} - >>> loader = DatasetLoader(reference, targets) - `` - - Or more conveniently if the loader configuration is defined in a - yaml file named config_file (see RCMES examples): - `` - >>> import yaml - >>> config = yaml.load(open(config_file)) - >>> loader = DatasetLoader(**config['datasets']) - `` - - As shown in the first example, the dictionary for each keyword argument - should contain a data source and parameters specific to the loader for - that data source. Once the configuration is entered, the datasets may be - loaded using: - `` - >>> loader.load_datasets() - >>> target_datasets = loader.target_datasets - `` - - If ``reference`` is entered as a keyword argument, then it may be - accesed from: - `` - >>> reference_dataset = loader.reference_dataset - `` - - Additionally, each dataset must have a ``data_source`` keyword. This may - be one of the following: - * ``'local'`` - A single dataset file in a local directory - * ``'local_split'`` - A single dataset split accross multiple files in a - local directory - * ``'local_multiple'`` - Multiple datasets in a local directory - * ``'esgf'`` - Download the dataset from the Earth System Grid - Federation - * ``'rcmed'`` - Download the dataset from the Regional Climate Model - Evaluation System Database - * ``'dap'`` - Download the dataset from an OPeNDAP URL - - Users who wish to download datasets from sources not described above - may define their own custom dataset loader function and incorporate it - as follows: - >>> loader.add_source_loader('my_source_name', my_loader_func) - - :param reference: The reference dataset loader configuration. - :type reference: :mod:`dict` - - :param targets: The target dataset loader configurations. - :type targets: :mod:`dict` or list of mod:`dict` - - :raises KeyError: If an invalid argument is passed to a data source - loader function. - ''' - # Reference dataset config - self.set_reference(**reference) - - # Target dataset(s) config - self.set_targets(targets) - - # Default loaders - self._source_loaders = { - 'local':local.load_file, - 'local_split':local.load_dataset_from_multiple_netcdf_files, - 'local_multiple':local.load_multiple_files, - 'esgf':esgf.load_dataset, - 'rcmed':rcmed.parameter_dataset, - 'dap':dap.load - } - - def add_source_loader(self, source_name, loader_func): - ''' - Add a custom source loader. - - :param source_name: The name of the data source. - :type source_name: :mod:`string` - - :param loader_func: Reference to a custom defined function. This should - return an OCW Dataset object. - :type loader_func: :class:`callable` - ''' - self._source_loaders[source_name] = loader_func - - def add_target(self, **kwargs): - ''' - A convenient means of adding a target dataset to the loader. - :raises KeyError: If data_source is not specified. - ''' - if 'data_source' not in kwargs: - raise KeyError('Dataset configuration must contain a data_source.') - self._target_config.append(kwargs) - - def add_targets(self, targets): - ''' - A convenient means of adding multiple target datasets to the loader. - - :param targets: List of loader configurations for each target - :type targets: List of :mod:`dict` - - :raises KeyError: If data_source is not specified. - ''' - for target_config in targets: - self.add_target(**target_config) - - def set_targets(self, targets): - ''' - Reset the target dataset config. - - :param targets: List of loader configurations for each target - :type targets: List of :mod:`dict` - - :raises KeyError: If data_source is not specified. - ''' - # This check allows for the user to enter targets as one block or - # as a list of separate blocks in their config files - if not isinstance(targets, list): - targets = [targets] - self._target_config = [] - self.add_targets(targets) - - def set_reference(self, **kwargs): - ''' - Reset the reference dataset config. - :raises KeyError: If data_source is not specified. - ''' - if 'data_source' not in kwargs: - raise KeyError('Dataset configuration must contain a data_source.') - self._reference_config = kwargs - - def load_datasets(self): - ''' - Loads the datasets from the given loader configurations. - ''' - # Load the reference dataset - self.reference_dataset = self._load(**self._reference_config) - - # Ensure output is clear if loading is performed more than once to - # prevent duplicates. - self.target_datasets = [] - - # Load the target datasets - for loader_params in self._target_config: - output = self._load(**loader_params) - - # Need to account for the fact that some loaders return lists - # of OCW Dataset objects instead of just one - if isinstance(output, list): - self.target_datasets.extend(output) - else: - self.target_datasets.append(output) - - def _load(self, **kwargs): - ''' - Generic dataset loading method. - ''' - # Extract the data source - data_source = kwargs.pop('data_source') - - # Find the correct loader function for the given data source - loader_func = self._source_loaders[data_source] - - # The remaining kwargs should be specific to the loader - output = loader_func(**kwargs) - - # Preserve data_source info for later use - kwargs['data_source'] = data_source - return output http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/ocw/tests/test_dataset_loader.py ---------------------------------------------------------------------- diff --git a/ocw/tests/test_dataset_loader.py b/ocw/tests/test_dataset_loader.py deleted file mode 100644 index 2d192c1..0000000 --- a/ocw/tests/test_dataset_loader.py +++ /dev/null @@ -1,185 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import unittest -import os -import copy -import netCDF4 -import numpy as np -from ocw.dataset import Dataset -from ocw.dataset_loader import DatasetLoader - -class TestDatasetLoader(unittest.TestCase): - def setUp(self): - # Read netCDF file - self.file_path = create_netcdf_object() - self.netCDF_file = netCDF4.Dataset(self.file_path, 'r') - self.latitudes = self.netCDF_file.variables['latitude'][:] - self.longitudes = self.netCDF_file.variables['longitude'][:] - self.times = self.netCDF_file.variables['time'][:] - self.alt_lats = self.netCDF_file.variables['alt_lat'][:] - self.alt_lons = self.netCDF_file.variables['alt_lon'][:] - self.values = self.netCDF_file.variables['value'][:] - self.values2 = self.values + 1 - - # Set up config - self.reference_config = {'data_source': 'local', - 'file_path': self.file_path, - 'variable_name': 'value'} - self.target_config = copy.deepcopy(self.reference_config) - self.no_data_source_config = {'file_path': self.file_path, - 'variable_name': 'value'} - self.new_data_source_config = {'data_source': 'foo', - 'lats': self.latitudes, - 'lons': self.longitudes, - 'times': self.times, - 'values': self.values2, - 'variable': 'value'} - - def tearDown(self): - os.remove(self.file_path) - - def testInputHasDataSource(self): - ''' - Make sure input data source is specified for each dataset to be loaded - ''' - with self.assertRaises(KeyError): - self.loader = DatasetLoader(self.reference_config, - self.no_data_source_config) - - def testReferenceHasDataSource(self): - ''' - Make sure ref data source is specified for each dataset to be loaded - ''' - with self.assertRaises(KeyError): - self.loader = DatasetLoader(self.reference_config, - self.target_config) - self.loader.set_reference(**self.no_data_source_config) - - def testTargetHasDataSource(self): - ''' - Make sure target data source is specified for each dataset to be loaded - ''' - with self.assertRaises(KeyError): - self.loader = DatasetLoader(self.reference_config, - self.target_config) - self.loader.add_target(**self.no_data_source_config) - - def testNewDataSource(self): - ''' - Ensures that custom data source loaders can be added - ''' - self.loader = DatasetLoader(self.new_data_source_config, - self.target_config) - - # Here the the data_source "foo" represents the Dataset constructor - self.loader.add_source_loader('foo', build_dataset) - self.loader.load_datasets() - self.assertEqual(self.loader.reference_dataset.origin['source'], - 'foo') - np.testing.assert_array_equal(self.loader.reference_dataset.values, - self.values2) - - def testExistingDataSource(self): - ''' - Ensures that existing data source loaders can be added - ''' - self.loader = DatasetLoader(self.reference_config, - self.target_config) - self.loader.load_datasets() - self.assertEqual(self.loader.reference_dataset.origin['source'], - 'local') - np.testing.assert_array_equal(self.loader.reference_dataset.values, - self.values) - - def testMultipleTargets(self): - ''' - Test for when multiple target dataset configs are specified - ''' - self.loader = DatasetLoader(self.reference_config, - [self.target_config, - self.new_data_source_config]) - - # Here the the data_source "foo" represents the Dataset constructor - self.loader.add_source_loader('foo', build_dataset) - self.loader.load_datasets() - self.assertEqual(self.loader.target_datasets[0].origin['source'], - 'local') - self.assertEqual(self.loader.target_datasets[1].origin['source'], - 'foo') - np.testing.assert_array_equal(self.loader.target_datasets[0].values, - self.values) - np.testing.assert_array_equal(self.loader.target_datasets[1].values, - self.values2) - -def build_dataset(*args, **kwargs): - ''' - Wrapper to Dataset constructor from fictitious 'foo' data_source. - ''' - origin = {'source': 'foo'} - return Dataset(*args, origin=origin, **kwargs) - -def create_netcdf_object(): - # To create the temporary netCDF file - file_path = '/tmp/temporaryNetcdf.nc' - netCDF_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4') - # To create dimensions - netCDF_file.createDimension('lat_dim', 5) - netCDF_file.createDimension('lon_dim', 5) - netCDF_file.createDimension('time_dim', 3) - # To create variables - latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',)) - longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',)) - times = netCDF_file.createVariable('time', 'd', ('time_dim',)) - # unusual variable names to test optional arguments for Dataset constructor - alt_lats = netCDF_file.createVariable('alt_lat', 'd', ('lat_dim',)) - alt_lons = netCDF_file.createVariable('alt_lon', 'd', ('lon_dim',)) - alt_times = netCDF_file.createVariable('alt_time', 'd', ('time_dim',)) - values = netCDF_file.createVariable('value', 'd', - ('time_dim', - 'lat_dim', - 'lon_dim') - ) - - # To latitudes and longitudes for five values - latitudes_data = np.arange(5.) - longitudes_data = np.arange(150., 155.) - # Three months of data. - times_data = np.arange(3) - # Create 150 values - values_data = np.array([i for i in range(75)]) - # Reshape values to 4D array (level, time, lats, lons) - values_data = values_data.reshape(len(times_data), len(latitudes_data), - len(longitudes_data)) - - # Ingest values to netCDF file - latitudes[:] = latitudes_data - longitudes[:] = longitudes_data - times[:] = times_data - alt_lats[:] = latitudes_data + 10 - alt_lons[:] = longitudes_data - 10 - alt_times[:] = times_data - values[:] = values_data - # Assign time info to time variable - netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00' - netCDF_file.variables['alt_time'].units = 'months since 2001-04-01 00:00:00' - netCDF_file.variables['value'].units = 'foo_units' - netCDF_file.close() - return file_path - -if __name__ == '__main__': - unittest.main()