fgreg closed pull request #4: SDAP-33 New Processor for adding Global Attributes URL: https://github.com/apache/incubator-sdap-ningesterpy/pull/4
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdap/processors/__init__.py b/sdap/processors/__init__.py index 8a48500..6d4a679 100644 --- a/sdap/processors/__init__.py +++ b/sdap/processors/__init__.py @@ -56,6 +56,7 @@ def process_nexus_tile(self, nexus_tile): from sdap.processors.emptytilefilter import EmptyTileFilter from sdap.processors.kelvintocelsius import KelvinToCelsius from sdap.processors.normalizetimebeginningofmonth import NormalizeTimeBeginningOfMonth +from sdap.processors.promotevariabletoglobalattribute import PromoteVariableToGlobalAttribute from sdap.processors.regrid1x1 import Regrid1x1 from sdap.processors.subtract180longitude import Subtract180Longitude from sdap.processors.tilereadingprocessor import GridReadingProcessor, SwathReadingProcessor, TimeSeriesReadingProcessor @@ -70,6 +71,7 @@ def process_nexus_tile(self, nexus_tile): "EmptyTileFilter": EmptyTileFilter, "KelvinToCelsius": KelvinToCelsius, "NormalizeTimeBeginningOfMonth": NormalizeTimeBeginningOfMonth, + "PromoteVariableToGlobalAttribute": PromoteVariableToGlobalAttribute, "Regrid1x1": Regrid1x1, "Subtract180Longitude": Subtract180Longitude, "GridReadingProcessor": GridReadingProcessor, diff --git a/sdap/processors/processorchain.py b/sdap/processors/processorchain.py index 6a68b04..82bad8e 100644 --- a/sdap/processors/processorchain.py +++ b/sdap/processors/processorchain.py @@ -14,6 +14,7 @@ # limitations under the License. import inspect +import re import sdap.processors @@ -51,18 +52,37 @@ def __init__(self, processor_list, *args, **kwargs): except KeyError as e: raise ProcessorNotFound(processor['name']) from e + processor_config = dict(**processor['config']) + missing_args = [] for arg in inspect.signature(processor_constructor).parameters.keys(): if arg in ['args', 'kwargs']: continue - if arg not in processor['config']: + if arg not in processor_config: missing_args.append(arg) + # Need to check for list type args + list_pattern = re.compile('\.\d+$') + list_args = [k for k in processor_config if list_pattern.search(k)] + if list_args: + import itertools + grouped = itertools.groupby(list_args, key=lambda k: k.split('.')[0]) + for group, grouped_args in grouped: + for list_arg in grouped_args: + key, idx = list_arg.split('.') + if group not in processor_config: + processor_config[group] = [] + + processor_config[group].insert(int(idx), processor_config[list_arg]) + del(processor_config[list_arg]) + + # Check if the list args satisfied the + missing_args = list(filter(lambda a: a not in processor_config.keys(), missing_args)) if missing_args: raise MissingProcessorArguments(processor['name'], missing_args) if 'config' in processor.keys(): - processor_instance = processor_constructor(**processor['config']) + processor_instance = processor_constructor(**processor_config) else: processor_instance = processor_constructor() diff --git a/sdap/processors/promotevariabletoglobalattribute.py b/sdap/processors/promotevariabletoglobalattribute.py new file mode 100644 index 0000000..11fa9d8 --- /dev/null +++ b/sdap/processors/promotevariabletoglobalattribute.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import nexusproto.DataTile_pb2 +from netCDF4 import Dataset + +from sdap.processors import NexusTileProcessor + + +class PromoteVariableToGlobalAttribute(NexusTileProcessor): + + def __init__(self, attribute_name, variable_name, dimensioned_by, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.attribute_name = attribute_name + self.variable_name = variable_name + self.dimensioned_by = dimensioned_by + + def process_nexus_tile(self, nexus_tile): + output_tile = nexusproto.DataTile_pb2.NexusTile() + output_tile.CopyFrom(nexus_tile) + + file_path = output_tile.summary.granule + file_path = file_path[len('file:'):] if file_path.startswith('file:') else file_path + + dimtoslice = {} + for dimension in output_tile.summary.section_spec.split(','): + name, start, stop = dimension.split(':') + dimtoslice[name] = slice(int(start), int(stop)) + + with Dataset(file_path) as ds: + new_attr = output_tile.summary.global_attributes.add() + new_attr.name = self.attribute_name + new_attr.values.extend( + [str(v) for v in ds[self.variable_name][[dimtoslice[dim] for dim in self.dimensioned_by]]]) + + yield output_tile diff --git a/tests/datafiles/not_empty_wswm.nc b/tests/datafiles/not_empty_wswm.nc index 772bbcb..ce0ebcc 100644 Binary files a/tests/datafiles/not_empty_wswm.nc and b/tests/datafiles/not_empty_wswm.nc differ diff --git a/tests/processorchain_test.py b/tests/processorchain_test.py index 7657ba6..26f2e52 100644 --- a/tests/processorchain_test.py +++ b/tests/processorchain_test.py @@ -21,6 +21,63 @@ from sdap.processors.processorchain import ProcessorChain +class TestConstructChain(unittest.TestCase): + + def test_construct_chain_with_list_config(self): + processor_list = [ + {'name': 'TimeSeriesReadingProcessor', + 'config': {'latitude': 'lat', + 'longitude': 'lon', + 'time': 'time', + 'variable_to_read': 'Qout'}}, + {'name': 'EmptyTileFilter', 'config': {}}, + {'name': 'PromoteVariableToGlobalAttribute', + 'config': { + 'attribute_name': 'rivid_i', + 'variable_name': 'rivid', + 'dimensioned_by.0': 'rivid', + 'dimensioned_by.1': 'other' + }}, + {'name': 'TileSummarizingProcessor', 'config': {}} + ] + + processorchain = ProcessorChain(processor_list) + + self.assertIsNotNone(processorchain) + + def test_construct_chain_with_multiple_list_config(self): + processor_list = [ + {'name': 'PromoteVariableToGlobalAttribute', + 'config': { + 'attribute_name': 'rivid_i', + 'variable_name': 'rivid', + 'dimensioned_by.0': 'rivid', + 'dimensioned_by.1': 'other', + 'unused.0': 'list', + 'unused.1': 'second' + }} + ] + + processorchain = ProcessorChain(processor_list) + + self.assertIsNotNone(processorchain) + + def test_construct_chain_with_list_config_bad_index(self): + processor_list = [ + {'name': 'PromoteVariableToGlobalAttribute', + 'config': { + 'attribute_name': 'rivid_i', + 'variable_name': 'rivid', + 'dimensioned_by.0': 'rivid', + 'dimensioned_by.10': 'other' + }} + ] + + processorchain = ProcessorChain(processor_list) + + self.assertIsNotNone(processorchain) + + class TestRunChainMethod(unittest.TestCase): def test_run_chain_read_filter_all(self): processor_list = [ @@ -29,7 +86,7 @@ def test_run_chain_read_filter_all(self): 'longitude': 'lon', 'time': 'time', 'variable_to_read': 'analysed_sst'}}, - {'name': 'EmptyTileFilter'} + {'name': 'EmptyTileFilter', 'config': {}} ] processorchain = ProcessorChain(processor_list) @@ -52,7 +109,7 @@ def test_run_chain_read_filter_none(self): 'longitude': 'lon', 'time': 'time', 'variable_to_read': 'analysed_sst'}}, - {'name': 'EmptyTileFilter'} + {'name': 'EmptyTileFilter', 'config': {}} ] processorchain = ProcessorChain(processor_list) @@ -75,9 +132,9 @@ def test_run_chain_read_filter_kelvin_summarize(self): 'longitude': 'lon', 'time': 'time', 'variable_to_read': 'analysed_sst'}}, - {'name': 'EmptyTileFilter'}, - {'name': 'KelvinToCelsius'}, - {'name': 'TileSummarizingProcessor'} + {'name': 'EmptyTileFilter', 'config': {}}, + {'name': 'KelvinToCelsius', 'config': {}}, + {'name': 'TileSummarizingProcessor', 'config': {}} ] processorchain = ProcessorChain(processor_list) @@ -100,9 +157,9 @@ def test_run_chain_partial_empty(self): 'longitude': 'lon', 'time': 'time', 'variable_to_read': 'analysed_sst'}}, - {'name': 'EmptyTileFilter'}, - {'name': 'KelvinToCelsius'}, - {'name': 'TileSummarizingProcessor'} + {'name': 'EmptyTileFilter', 'config': {}}, + {'name': 'KelvinToCelsius', 'config': {}}, + {'name': 'TileSummarizingProcessor', 'config': {}} ] processorchain = ProcessorChain(processor_list) @@ -130,6 +187,39 @@ def test_run_chain_partial_empty(self): self.assertEqual(0, len(results)) + def test_run_chain_promote_var(self): + processor_list = [ + {'name': 'GridReadingProcessor', + 'config': {'latitude': 'lat', + 'longitude': 'lon', + 'time': 'time', + 'variable_to_read': 'analysed_sst'}}, + {'name': 'EmptyTileFilter', 'config': {}}, + {'name': 'KelvinToCelsius', 'config': {}}, + {'name': 'PromoteVariableToGlobalAttribute', + 'config': { + 'attribute_name': 'time_i', + 'variable_name': 'time', + 'dimensioned_by.0': 'time' + }}, + {'name': 'TileSummarizingProcessor', 'config': {}} + ] + processorchain = ProcessorChain(processor_list) + + test_file = path.join(path.dirname(__file__), 'datafiles', 'partial_empty_mur.nc4') + + input_tile = nexusproto.NexusTile() + tile_summary = nexusproto.TileSummary() + tile_summary.granule = "file:%s" % test_file + tile_summary.section_spec = "time:0:1,lat:489:499,lon:0:10" + input_tile.summary.CopyFrom(tile_summary) + + results = list(processorchain.process(input_tile)) + + self.assertEqual(1, len(results)) + tile = results[0] + self.assertEqual("1104483600", tile.summary.global_attributes[0].values[0]) + if __name__ == '__main__': unittest.main() diff --git a/tests/promotevariabletoglobalattribute_test.py b/tests/promotevariabletoglobalattribute_test.py new file mode 100644 index 0000000..1fbe21d --- /dev/null +++ b/tests/promotevariabletoglobalattribute_test.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +from os import path + +import sdap.processors +import nexusproto.DataTile_pb2 + + +class TestReadWSWMData(unittest.TestCase): + + def test_read_not_empty_wswm(self): + test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_wswm.nc') + + promoter = sdap.processors.PromoteVariableToGlobalAttribute('rivid_i', 'rivid', ('rivid',)) + + input_tile = nexusproto.DataTile_pb2.NexusTile() + tile_summary = nexusproto.DataTile_pb2.TileSummary() + tile_summary.granule = "file:%s" % test_file + tile_summary.section_spec = "time:0:5832,rivid:0:1" + input_tile.summary.CopyFrom(tile_summary) + + results = list(promoter.process(input_tile)) + + print(results) \ No newline at end of file ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services