fgreg closed pull request #4: SDAP-33 New Processor for adding Global Attributes
URL: https://github.com/apache/incubator-sdap-ningesterpy/pull/4
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdap/processors/__init__.py b/sdap/processors/__init__.py
index 8a48500..6d4a679 100644
--- a/sdap/processors/__init__.py
+++ b/sdap/processors/__init__.py
@@ -56,6 +56,7 @@ def process_nexus_tile(self, nexus_tile):
 from sdap.processors.emptytilefilter import EmptyTileFilter
 from sdap.processors.kelvintocelsius import KelvinToCelsius
 from sdap.processors.normalizetimebeginningofmonth import 
NormalizeTimeBeginningOfMonth
+from sdap.processors.promotevariabletoglobalattribute import 
PromoteVariableToGlobalAttribute
 from sdap.processors.regrid1x1 import Regrid1x1
 from sdap.processors.subtract180longitude import Subtract180Longitude
 from sdap.processors.tilereadingprocessor import GridReadingProcessor, 
SwathReadingProcessor, TimeSeriesReadingProcessor
@@ -70,6 +71,7 @@ def process_nexus_tile(self, nexus_tile):
     "EmptyTileFilter": EmptyTileFilter,
     "KelvinToCelsius": KelvinToCelsius,
     "NormalizeTimeBeginningOfMonth": NormalizeTimeBeginningOfMonth,
+    "PromoteVariableToGlobalAttribute": PromoteVariableToGlobalAttribute,
     "Regrid1x1": Regrid1x1,
     "Subtract180Longitude": Subtract180Longitude,
     "GridReadingProcessor": GridReadingProcessor,
diff --git a/sdap/processors/processorchain.py 
b/sdap/processors/processorchain.py
index 6a68b04..82bad8e 100644
--- a/sdap/processors/processorchain.py
+++ b/sdap/processors/processorchain.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import inspect
+import re
 
 import sdap.processors
 
@@ -51,18 +52,37 @@ def __init__(self, processor_list, *args, **kwargs):
             except KeyError as e:
                 raise ProcessorNotFound(processor['name']) from e
 
+            processor_config = dict(**processor['config'])
+
             missing_args = []
             for arg in 
inspect.signature(processor_constructor).parameters.keys():
                 if arg in ['args', 'kwargs']:
                     continue
-                if arg not in processor['config']:
+                if arg not in processor_config:
                     missing_args.append(arg)
 
+            # Need to check for list type args
+            list_pattern = re.compile('\.\d+$')
+            list_args = [k for k in processor_config if list_pattern.search(k)]
+            if list_args:
+                import itertools
+                grouped = itertools.groupby(list_args, key=lambda k: 
k.split('.')[0])
+                for group, grouped_args in grouped:
+                    for list_arg in grouped_args:
+                        key, idx = list_arg.split('.')
+                        if group not in processor_config:
+                            processor_config[group] = []
+
+                        processor_config[group].insert(int(idx), 
processor_config[list_arg])
+                        del(processor_config[list_arg])
+
+            # Check if the list args satisfied the
+            missing_args = list(filter(lambda a: a not in 
processor_config.keys(), missing_args))
             if missing_args:
                 raise MissingProcessorArguments(processor['name'], 
missing_args)
 
             if 'config' in processor.keys():
-                processor_instance = 
processor_constructor(**processor['config'])
+                processor_instance = processor_constructor(**processor_config)
             else:
                 processor_instance = processor_constructor()
 
diff --git a/sdap/processors/promotevariabletoglobalattribute.py 
b/sdap/processors/promotevariabletoglobalattribute.py
new file mode 100644
index 0000000..11fa9d8
--- /dev/null
+++ b/sdap/processors/promotevariabletoglobalattribute.py
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import nexusproto.DataTile_pb2
+from netCDF4 import Dataset
+
+from sdap.processors import NexusTileProcessor
+
+
+class PromoteVariableToGlobalAttribute(NexusTileProcessor):
+
+    def __init__(self, attribute_name, variable_name, dimensioned_by, *args, 
**kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.attribute_name = attribute_name
+        self.variable_name = variable_name
+        self.dimensioned_by = dimensioned_by
+
+    def process_nexus_tile(self, nexus_tile):
+        output_tile = nexusproto.DataTile_pb2.NexusTile()
+        output_tile.CopyFrom(nexus_tile)
+
+        file_path = output_tile.summary.granule
+        file_path = file_path[len('file:'):] if file_path.startswith('file:') 
else file_path
+
+        dimtoslice = {}
+        for dimension in output_tile.summary.section_spec.split(','):
+            name, start, stop = dimension.split(':')
+            dimtoslice[name] = slice(int(start), int(stop))
+
+        with Dataset(file_path) as ds:
+            new_attr = output_tile.summary.global_attributes.add()
+            new_attr.name = self.attribute_name
+            new_attr.values.extend(
+                [str(v) for v in ds[self.variable_name][[dimtoslice[dim] for 
dim in self.dimensioned_by]]])
+
+        yield output_tile
diff --git a/tests/datafiles/not_empty_wswm.nc 
b/tests/datafiles/not_empty_wswm.nc
index 772bbcb..ce0ebcc 100644
Binary files a/tests/datafiles/not_empty_wswm.nc and 
b/tests/datafiles/not_empty_wswm.nc differ
diff --git a/tests/processorchain_test.py b/tests/processorchain_test.py
index 7657ba6..26f2e52 100644
--- a/tests/processorchain_test.py
+++ b/tests/processorchain_test.py
@@ -21,6 +21,63 @@
 from sdap.processors.processorchain import ProcessorChain
 
 
+class TestConstructChain(unittest.TestCase):
+
+    def test_construct_chain_with_list_config(self):
+        processor_list = [
+            {'name': 'TimeSeriesReadingProcessor',
+             'config': {'latitude': 'lat',
+                        'longitude': 'lon',
+                        'time': 'time',
+                        'variable_to_read': 'Qout'}},
+            {'name': 'EmptyTileFilter', 'config': {}},
+            {'name': 'PromoteVariableToGlobalAttribute',
+             'config': {
+                 'attribute_name': 'rivid_i',
+                 'variable_name': 'rivid',
+                 'dimensioned_by.0': 'rivid',
+                 'dimensioned_by.1': 'other'
+             }},
+            {'name': 'TileSummarizingProcessor', 'config': {}}
+        ]
+
+        processorchain = ProcessorChain(processor_list)
+
+        self.assertIsNotNone(processorchain)
+
+    def test_construct_chain_with_multiple_list_config(self):
+        processor_list = [
+            {'name': 'PromoteVariableToGlobalAttribute',
+             'config': {
+                 'attribute_name': 'rivid_i',
+                 'variable_name': 'rivid',
+                 'dimensioned_by.0': 'rivid',
+                 'dimensioned_by.1': 'other',
+                 'unused.0': 'list',
+                 'unused.1': 'second'
+             }}
+        ]
+
+        processorchain = ProcessorChain(processor_list)
+
+        self.assertIsNotNone(processorchain)
+
+    def test_construct_chain_with_list_config_bad_index(self):
+        processor_list = [
+            {'name': 'PromoteVariableToGlobalAttribute',
+             'config': {
+                 'attribute_name': 'rivid_i',
+                 'variable_name': 'rivid',
+                 'dimensioned_by.0': 'rivid',
+                 'dimensioned_by.10': 'other'
+             }}
+        ]
+
+        processorchain = ProcessorChain(processor_list)
+
+        self.assertIsNotNone(processorchain)
+
+
 class TestRunChainMethod(unittest.TestCase):
     def test_run_chain_read_filter_all(self):
         processor_list = [
@@ -29,7 +86,7 @@ def test_run_chain_read_filter_all(self):
                         'longitude': 'lon',
                         'time': 'time',
                         'variable_to_read': 'analysed_sst'}},
-            {'name': 'EmptyTileFilter'}
+            {'name': 'EmptyTileFilter', 'config': {}}
         ]
         processorchain = ProcessorChain(processor_list)
 
@@ -52,7 +109,7 @@ def test_run_chain_read_filter_none(self):
                         'longitude': 'lon',
                         'time': 'time',
                         'variable_to_read': 'analysed_sst'}},
-            {'name': 'EmptyTileFilter'}
+            {'name': 'EmptyTileFilter', 'config': {}}
         ]
         processorchain = ProcessorChain(processor_list)
 
@@ -75,9 +132,9 @@ def test_run_chain_read_filter_kelvin_summarize(self):
                         'longitude': 'lon',
                         'time': 'time',
                         'variable_to_read': 'analysed_sst'}},
-            {'name': 'EmptyTileFilter'},
-            {'name': 'KelvinToCelsius'},
-            {'name': 'TileSummarizingProcessor'}
+            {'name': 'EmptyTileFilter', 'config': {}},
+            {'name': 'KelvinToCelsius', 'config': {}},
+            {'name': 'TileSummarizingProcessor', 'config': {}}
         ]
         processorchain = ProcessorChain(processor_list)
 
@@ -100,9 +157,9 @@ def test_run_chain_partial_empty(self):
                         'longitude': 'lon',
                         'time': 'time',
                         'variable_to_read': 'analysed_sst'}},
-            {'name': 'EmptyTileFilter'},
-            {'name': 'KelvinToCelsius'},
-            {'name': 'TileSummarizingProcessor'}
+            {'name': 'EmptyTileFilter', 'config': {}},
+            {'name': 'KelvinToCelsius', 'config': {}},
+            {'name': 'TileSummarizingProcessor', 'config': {}}
         ]
         processorchain = ProcessorChain(processor_list)
 
@@ -130,6 +187,39 @@ def test_run_chain_partial_empty(self):
 
         self.assertEqual(0, len(results))
 
+    def test_run_chain_promote_var(self):
+        processor_list = [
+            {'name': 'GridReadingProcessor',
+             'config': {'latitude': 'lat',
+                        'longitude': 'lon',
+                        'time': 'time',
+                        'variable_to_read': 'analysed_sst'}},
+            {'name': 'EmptyTileFilter', 'config': {}},
+            {'name': 'KelvinToCelsius', 'config': {}},
+            {'name': 'PromoteVariableToGlobalAttribute',
+             'config': {
+                 'attribute_name': 'time_i',
+                 'variable_name': 'time',
+                 'dimensioned_by.0': 'time'
+             }},
+            {'name': 'TileSummarizingProcessor', 'config': {}}
+        ]
+        processorchain = ProcessorChain(processor_list)
+
+        test_file = path.join(path.dirname(__file__), 'datafiles', 
'partial_empty_mur.nc4')
+
+        input_tile = nexusproto.NexusTile()
+        tile_summary = nexusproto.TileSummary()
+        tile_summary.granule = "file:%s" % test_file
+        tile_summary.section_spec = "time:0:1,lat:489:499,lon:0:10"
+        input_tile.summary.CopyFrom(tile_summary)
+
+        results = list(processorchain.process(input_tile))
+
+        self.assertEqual(1, len(results))
+        tile = results[0]
+        self.assertEqual("1104483600", 
tile.summary.global_attributes[0].values[0])
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/promotevariabletoglobalattribute_test.py 
b/tests/promotevariabletoglobalattribute_test.py
new file mode 100644
index 0000000..1fbe21d
--- /dev/null
+++ b/tests/promotevariabletoglobalattribute_test.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+
+from os import path
+
+import sdap.processors
+import nexusproto.DataTile_pb2
+
+
+class TestReadWSWMData(unittest.TestCase):
+
+    def test_read_not_empty_wswm(self):
+        test_file = path.join(path.dirname(__file__), 'datafiles', 
'not_empty_wswm.nc')
+
+        promoter = sdap.processors.PromoteVariableToGlobalAttribute('rivid_i', 
'rivid', ('rivid',))
+
+        input_tile = nexusproto.DataTile_pb2.NexusTile()
+        tile_summary = nexusproto.DataTile_pb2.TileSummary()
+        tile_summary.granule = "file:%s" % test_file
+        tile_summary.section_spec = "time:0:5832,rivid:0:1"
+        input_tile.summary.CopyFrom(tile_summary)
+
+        results = list(promoter.process(input_tile))
+
+        print(results)
\ No newline at end of file


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to