This is an automated email from the ASF dual-hosted git repository. fgreg pushed a commit to branch v1.0.0-rc1 in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ningesterpy.git
commit b26a22c75b288a524745787f1c8db02a377abc2e Author: echyam <[email protected]> AuthorDate: Tue Oct 23 10:31:28 2018 -0700 SDAP-155 add processor to extract timestamp from granule metadata (#10) * SDAP-155 add processor to extract timestamp from granule metadata * add ExtractTimestampProcessor to INSTALLED_PROCESSORS for ProcessorChain * remove unused vars and check for tile_type * take timestamp pattern as parameter, use param in error log * give tile information when call timestamp processor on wrong tiletype Co-Authored-By: echyam <[email protected]> --- .idea/inspectionProfiles/Project_Default.xml | 21 ++++++++ .idea/misc.xml | 2 +- .idea/ningesterpy.iml | 5 +- sdap/processors/__init__.py | 5 +- sdap/processors/extracttimestampprocessor.py | 66 ++++++++++++++++++++++++ tests/datafiles/not_empty_gpm.HDF5 | Bin 0 -> 5302102 bytes tests/datafiles/not_empty_modis.nc | Bin 0 -> 10224432 bytes tests/extracttimestamp_test.py | 72 +++++++++++++++++++++++++++ 8 files changed, 165 insertions(+), 6 deletions(-) diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..bdf9b07 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,21 @@ +<component name="InspectionProjectProfileManager"> + <profile version="1.0"> + <option name="myName" value="Project Default" /> + <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true"> + <option name="ignoredPackages"> + <value> + <list size="8"> + <item index="0" class="java.lang.String" itemvalue="flask-accept" /> + <item index="1" class="java.lang.String" itemvalue="protobuf" /> + <item index="2" class="java.lang.String" itemvalue="werkzeug" /> + <item index="3" class="java.lang.String" itemvalue="six" /> + <item index="4" class="java.lang.String" itemvalue="PyYAML" /> + <item index="5" class="java.lang.String" itemvalue="nexusproto" /> + <item index="6" class="java.lang.String" itemvalue="pytz" /> + <item index="7" class="java.lang.String" itemvalue="flask" /> + </list> + </value> + </option> + </inspection_tool> + </profile> +</component> \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 0218159..3c94e77 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ <?xml version="1.0" encoding="UTF-8"?> <project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.4 (~/anaconda/envs/ningesterpy/bin/python)" project-jdk-type="Python SDK" /> + <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.6 (~/anaconda3/envs/sdap-ningesterpy/bin/python)" project-jdk-type="Python SDK" /> </project> \ No newline at end of file diff --git a/.idea/ningesterpy.iml b/.idea/ningesterpy.iml index 1f079f2..30a41bc 100644 --- a/.idea/ningesterpy.iml +++ b/.idea/ningesterpy.iml @@ -4,12 +4,9 @@ <content url="file://$MODULE_DIR$"> <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" /> </content> - <orderEntry type="jdk" jdkName="Python 3.5.4 (~/anaconda/envs/ningesterpy/bin/python)" jdkType="Python SDK" /> + <orderEntry type="jdk" jdkName="Python 3.6.6 (~/anaconda3/envs/sdap-ningesterpy/bin/python)" jdkType="Python SDK" /> <orderEntry type="sourceFolder" forTests="false" /> </component> - <component name="PyDocumentationSettings"> - <option name="renderExternalDocumentation" value="true" /> - </component> <component name="TemplatesService"> <option name="TEMPLATE_FOLDERS"> <list> diff --git a/sdap/processors/__init__.py b/sdap/processors/__init__.py index 6d4a679..2cb2f73 100644 --- a/sdap/processors/__init__.py +++ b/sdap/processors/__init__.py @@ -62,6 +62,7 @@ from sdap.processors.subtract180longitude import Subtract180Longitude from sdap.processors.tilereadingprocessor import GridReadingProcessor, SwathReadingProcessor, TimeSeriesReadingProcessor from sdap.processors.tilesummarizingprocessor import TileSummarizingProcessor from sdap.processors.winddirspeedtouv import WindDirSpeedToUV +from sdap.processors.extracttimestampprocessor import ExtractTimestampProcessor INSTALLED_PROCESSORS = { "CallNcpdq": CallNcpdq, @@ -78,5 +79,7 @@ INSTALLED_PROCESSORS = { "SwathReadingProcessor": SwathReadingProcessor, "TimeSeriesReadingProcessor": TimeSeriesReadingProcessor, "TileSummarizingProcessor": TileSummarizingProcessor, - "WindDirSpeedToUV": WindDirSpeedToUV + "WindDirSpeedToUV": WindDirSpeedToUV, + "ExtractTimestampProcessor": ExtractTimestampProcessor } + diff --git a/sdap/processors/extracttimestampprocessor.py b/sdap/processors/extracttimestampprocessor.py new file mode 100644 index 0000000..98b4147 --- /dev/null +++ b/sdap/processors/extracttimestampprocessor.py @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import nexusproto +from nexusproto.serialization import from_shaped_array + +import datetime +import time +import logging +from netCDF4 import Dataset, num2date +from pytz import timezone + +from sdap.processors import NexusTileProcessor + +EPOCH = timezone('UTC').localize(datetime.datetime(1970, 1, 1)) + + +class BadTimestampExtractionException(Exception): + pass + +def to_seconds_from_epoch(timestamp, pattern): + try: + seconds = int(time.mktime(time.strptime(timestamp, pattern))) + return seconds + except ValueError: + logging.error('{} timestamp is not of the format {}'.format(timestamp, pattern)) + +class ExtractTimestampProcessor(NexusTileProcessor): + + def __init__(self, timestamp_name, timestamp_pattern, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.timestamp_name = timestamp_name + self.timestamp_pattern = timestamp_pattern + + def process_nexus_tile(self, nexus_tile): + output_tile = nexusproto.DataTile_pb2.NexusTile() + output_tile.CopyFrom(nexus_tile) + + file_path = output_tile.summary.granule + file_path = file_path[len('file:'):] if file_path.startswith('file:') else file_path + + tile_type = nexus_tile.tile.WhichOneof("tile_type") + + with Dataset(file_path) as ds: + timestamp = getattr(ds,self.timestamp_name) + seconds = to_seconds_from_epoch(timestamp, self.timestamp_pattern) + + if tile_type == "grid_tile": + nexus_tile.tile.grid_tile.time = seconds + else: + raise BadTimestampExtractionException("Unsupported tile type: {}".format(tile_type)) + + yield nexus_tile diff --git a/tests/datafiles/not_empty_gpm.HDF5 b/tests/datafiles/not_empty_gpm.HDF5 new file mode 100644 index 0000000..01397a7 Binary files /dev/null and b/tests/datafiles/not_empty_gpm.HDF5 differ diff --git a/tests/datafiles/not_empty_modis.nc b/tests/datafiles/not_empty_modis.nc new file mode 100644 index 0000000..11bc775 Binary files /dev/null and b/tests/datafiles/not_empty_modis.nc differ diff --git a/tests/extracttimestamp_test.py b/tests/extracttimestamp_test.py new file mode 100644 index 0000000..e2800ab --- /dev/null +++ b/tests/extracttimestamp_test.py @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from os import path +import logging + +import sdap.processors +from nexusproto import DataTile_pb2 as nexusproto +from sdap.processors.extracttimestampprocessor import BadTimestampExtractionException + + +class TestExtractTimestamp(unittest.TestCase): + def setUp(self): + self.module = sdap.processors.ExtractTimestampProcessor('time_coverage_start', '%Y-%m-%dT%H:%M:%S.000Z') + + def test_extract_timestamp_from_metadata(self): + test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc') + + input_tile = nexusproto.NexusTile() + tile_summary = nexusproto.TileSummary() + tile_summary.granule = "file:%s" % test_file + tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10" + input_tile.summary.CopyFrom(tile_summary) + + input_tile.tile.grid_tile.CopyFrom(nexusproto.GridTile()) + results = list(self.module.process_nexus_tile(input_tile)) + nexus_tile_after = results[0] + + self.assertEqual(1537428301, nexus_tile_after.tile.grid_tile.time) + + def test_extract_timestamp_swath_exception(self): + test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc') + + input_tile = nexusproto.NexusTile() + tile_summary = nexusproto.TileSummary() + tile_summary.granule = "file:%s" % test_file + tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10" + input_tile.summary.CopyFrom(tile_summary) + + input_tile.tile.swath_tile.CopyFrom(nexusproto.SwathTile()) + + with self.assertRaises(BadTimestampExtractionException): + list(self.module.process_nexus_tile(input_tile))[0].tile.swath_tile.time + + def test_extract_timestamp_timeseries_exception(self): + test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc') + + input_tile = nexusproto.NexusTile() + tile_summary = nexusproto.TileSummary() + tile_summary.granule = "file:%s" % test_file + tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10" + input_tile.summary.CopyFrom(tile_summary) + + input_tile.tile.time_series_tile.CopyFrom(nexusproto.TimeSeriesTile()) + + with self.assertRaises(BadTimestampExtractionException): + list(self.module.process_nexus_tile(input_tile))[0].tile.time_series_tile.time + +if __name__ == '__main__': + unittest.main() \ No newline at end of file
