This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git
The following commit(s) were added to refs/heads/dev by this push:
new f66c7e1 SDAP-293: Automatic Kelvin-to-Celsius conversion (#22)
f66c7e1 is described below
commit f66c7e1614d1bdf63c09ba14cd850c893e2db3d3
Author: Elizabeth Yam <[email protected]>
AuthorDate: Wed Nov 18 14:11:58 2020 -0800
SDAP-293: Automatic Kelvin-to-Celsius conversion (#22)
Co-authored-by: Eamon Ford <[email protected]>
---
.../services/CollectionProcessor.py | 1 +
granule_ingester/granule_ingester/README.md | 11 +++++++++++
.../granule_ingester/processors/TileProcessor.py | 20 +++++++++++++++++++-
.../granule_ingester/processors/kelvintocelsius.py | 18 +++++++++++-------
4 files changed, 42 insertions(+), 8 deletions(-)
diff --git
a/collection_manager/collection_manager/services/CollectionProcessor.py
b/collection_manager/collection_manager/services/CollectionProcessor.py
index 96c461e..bab56fc 100644
--- a/collection_manager/collection_manager/services/CollectionProcessor.py
+++ b/collection_manager/collection_manager/services/CollectionProcessor.py
@@ -87,6 +87,7 @@ class CollectionProcessor:
},
{'name': 'emptyTileFilter'},
{'name': 'subtract180FromLongitude'},
+ {'name': 'kelvinToCelsius'},
{
'name': 'tileSummary',
'dataset_name': collection.dataset_id
diff --git a/granule_ingester/granule_ingester/README.md
b/granule_ingester/granule_ingester/README.md
new file mode 100644
index 0000000..881461a
--- /dev/null
+++ b/granule_ingester/granule_ingester/README.md
@@ -0,0 +1,11 @@
+## Plugin Processor Architecture
+The operator can write a class that inherits from the `TileProcessor` class
and implements the abstract function `process`, which among other things, takes
in the NexusTile(`nexusproto.DataTile_pb2.NexusTile` object)) generated with
default configurations and the NC4 Dataset(`xarray.Dataset` object), and allows
the user to add further modifications to how granule data is saved.
+
+Any additional transformation the operator needs to accomplish must be done in
this `process` method, which is what is ultimately called in the ingestion
pipeline. Helper functions are suggested for breaking up complex procedures.
+
+The custom code file would be copied into the ingestion pods via the helm
chart (see chart for local and mount paths).
+
+Example: `KelvinToCelsiusProcessor`
+This processor checks the units of the saved variable. If it is some form of
Kelvin, it automatically converts all of the temperature measurements to
Celsius by subtracting 273.15 from each data point. The transformed data then
replaces the default (untransformed) values and the processor returns the
modified tile.
+
+#### TODO Add configuration option for unusual representations of temperature
units.
\ No newline at end of file
diff --git a/granule_ingester/granule_ingester/processors/TileProcessor.py
b/granule_ingester/granule_ingester/processors/TileProcessor.py
index d62c504..47b1a2f 100644
--- a/granule_ingester/granule_ingester/processors/TileProcessor.py
+++ b/granule_ingester/granule_ingester/processors/TileProcessor.py
@@ -14,10 +14,28 @@
# limitations under the License.
from abc import ABC, abstractmethod
+from nexusproto.serialization import from_shaped_array, to_shaped_array
+from nexusproto.DataTile_pb2 import NexusTile
# TODO: make this an informal interface, not an abstract class
class TileProcessor(ABC):
@abstractmethod
- def process(self, tile, *args, **kwargs):
+ def process(self, tile: NexusTile, *args, **kwargs):
+ # accessing the data
+ # the_tile_type = tile.tile.WhichOneof("tile_type")
+ # the_tile_data = getattr(tile.tile, the_tile_type)
+
+ # get netCDF as xarray.Dataset object
+ # ds = kwargs['dataset']
+
+ # example transformation:
+ # var_data = from_shaped_array(the_tile_data.variable_data) - 273.15
+
+ # save transformed data back into tile
+ # the_tile_data.variable_data.CopyFrom(to_shaped_array(var_data))
+
+ # return transformed tile
+ # return tile
+
pass
diff --git a/granule_ingester/granule_ingester/processors/kelvintocelsius.py
b/granule_ingester/granule_ingester/processors/kelvintocelsius.py
index e728418..9ad4f49 100644
--- a/granule_ingester/granule_ingester/processors/kelvintocelsius.py
+++ b/granule_ingester/granule_ingester/processors/kelvintocelsius.py
@@ -13,19 +13,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
from nexusproto.serialization import from_shaped_array, to_shaped_array
-
+from nexusproto.DataTile_pb2 import NexusTile
from granule_ingester.processors.TileProcessor import TileProcessor
class KelvinToCelsius(TileProcessor):
- def process(self, tile, *args, **kwargs):
+ def process(self, tile: NexusTile, *args, **kwargs):
the_tile_type = tile.tile.WhichOneof("tile_type")
the_tile_data = getattr(tile.tile, the_tile_type)
-
- var_data = from_shaped_array(the_tile_data.variable_data) - 273.15
-
- the_tile_data.variable_data.CopyFrom(to_shaped_array(var_data))
+ kelvins = ['kelvin', 'degk', 'deg_k', 'degreesk', 'degrees_k',
'degree_k', 'degreek']
+
+ if 'dataset' in kwargs:
+ ds = kwargs['dataset']
+ variable_name = tile.summary.data_var_name
+ variable_unit = ds.variables[variable_name].attrs['units']
+ if any([unit in variable_unit.lower() for unit in kelvins]):
+ var_data = from_shaped_array(the_tile_data.variable_data) -
273.15
+ the_tile_data.variable_data.CopyFrom(to_shaped_array(var_data))
return tile