Repository: climate
Updated Branches:
  refs/heads/master 70cd2b500 -> 577dfb787


Added DatasetLoader class


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/ecea6210
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/ecea6210
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/ecea6210

Branch: refs/heads/master
Commit: ecea6210b717e9464ce527c883286a38e41a7a08
Parents: ffd2159
Author: Alex Goodman <ago...@users.noreply.github.com>
Authored: Mon Jul 18 10:11:13 2016 -0700
Committer: Alex Goodman <ago...@users.noreply.github.com>
Committed: Mon Jul 18 10:11:13 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 131 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/ecea6210/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
new file mode 100644
index 0000000..bbcb43c
--- /dev/null
+++ b/ocw/dataset_loader.py
@@ -0,0 +1,131 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+'''
+Classes:
+    DatasetLoader - Generate OCW Dataset objects from a variety of sources.
+'''
+
+import ocw.data_source.local as local
+import ocw.data_source.esgf as esgf
+import ocw.data_source.rcmed as rcmed
+import ocw.data_source.dap as dap
+
+class DatasetLoader:
+    '''Generate OCW Dataset objects from a variety of sources.'''
+
+    def __init__(self, **kwargs):
+        '''Generate OCW Dataset objects from a variety of sources.
+
+        Each keyword argument can be information for a dataset in dictionary
+        form. For example:
+        ``
+        >>> reference = {'data_source':'rcmed', 'name':'cru', 'dataset_id':10,
+                         'parameter_id':34}
+        >>> targets = {'data_source':'local_multiple',
+                       'path':'./data/CORDEX-Africa_data/AFRICA*pr.nc',
+                       'variable':'pr'}
+        >>> loader = DatasetLoader(reference=reference, targets=targets)
+        ``
+
+        Or more conveniently if the loader configuration is defined in a
+        yaml file named config_file (see RCMES examples):
+        ``
+        >>> import yaml
+        >>> config = yaml.load(open(config_file))
+        >>> loader = DatasetLoader(**config['datasets'])
+        ``
+
+        As shown in the first example, the dictionary for each keyword argument
+        should contain a data source and parameters specific to the loader for
+        that data source. Once the configuration is entered, the datasets may 
be
+        loaded using:
+        ``
+        >>> loader.load_datasets()
+        >>> target_datasets = loader.target_datasets
+        ``
+
+        If ``reference`` is entered as a keyword argument, then it may be
+        accesed from:
+        ``
+        >>> reference_dataset = loader.reference_dataset
+        ``
+
+        Additionally, each dataset must have a ``data_source`` keyword. This 
may
+        be one of the following:
+        * ``'local'`` - A single dataset file in a local directory
+        * ``'local_split'`` - A single dataset split accross multiple files in 
a
+                              local directory
+        * ``'local_multiple'`` - Multiple datasets in a local directory
+        * ``'esgf'`` - Download the dataset from the Earth System Grid
+                       Federation
+        * ``'rcmed'`` - Download the dataset from the Regional Climate Model
+                        Evaluation System Database
+        * ``'dap'`` - Download the dataset from an OPeNDAP URL
+
+        Users who wish to download datasets from sources not described above
+        may define their own custom dataset loader function and incorporate it
+        as follows:
+        >>> loader.add_source_loader('my_source_name', my_loader_func)
+
+        :raises KeyError: If an invalid argument is passed to a data source
+        loader function.
+        '''
+        self.reference_dataset = None
+        self.target_datasets = []
+        self._config = kwargs
+        self._source_loaders = {
+                    'local':local.load,
+                    'local_split':local.load_dataset_from_multiple_netcdf_files
+                    'local_multiple':local.load_multiple_files,
+                    'esgf':esgf.load_dataset,
+                    'rcmed':parameter_dataset,
+                    'dap':dap.load
+                    }
+
+    def add_source_loader(self, source_name, loader_func):
+        '''
+        Add a custom source loader.
+
+        :param source_name: The name of the data source.
+        :type source_name: :mod:`string`
+
+        :param loader_func: Reference to a custom defined function. This should
+        return an OCW Dataset object.
+        :type loader_func: :mod:`callable`
+        '''
+        self._source_loader[source_name] = loader_func
+
+
+    def set_config(self, **kwargs):
+        '''
+        Change loader config if necessary. See class docstring for more info.
+        '''
+        self._config = kwargs
+
+    def load_datasets(self):
+        '''
+        Loads the datasets from the given loader configuration.
+        '''
+        for dataset_evaltype, dataset_params in self._config.iteritems():
+            data_source = dataset_params.pop('data_source'):
+            load_func = self._source_loaders[data_source]
+            if dataset_evaltype == 'reference':
+                self.reference_dataset = load_func(**dataset_params)
+            else:
+                target_dataset = load_func(**dataset_params)
+                self.target_datasets.extend(target_dataset)

Reply via email to