This is an automated email from the ASF dual-hosted git repository.
rkk pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git
The following commit(s) were added to refs/heads/dev by this push:
new fbfce1b SDAP-477 + SDAP-478 - 1D preprocessor + subgroup selection
(#82)
fbfce1b is described below
commit fbfce1bbdce9cf965339637369ab128b443aff9e
Author: Riley Kuttruff <[email protected]>
AuthorDate: Thu Sep 7 13:46:29 2023 -0700
SDAP-477 + SDAP-478 - 1D preprocessor + subgroup selection (#82)
* Traj preprocessor
* Changelog
* Keep attrs
---------
Co-authored-by: rileykk <[email protected]>
---
CHANGELOG.md | 10 ++++++++++
.../collection_manager/entities/Collection.py | 7 ++++++-
.../collection_manager/services/CollectionProcessor.py | 6 ++++++
granule_ingester/conda-requirements.txt | 6 +++---
.../granule_ingester/granule_loaders/GranuleLoader.py | 12 +++++++++++-
.../granule_ingester/granule_loaders/Preprocessors.py | 9 +++++++--
.../preprocessors/{__init__.py => Trajectory.py} | 18 +++++++++++++++++-
.../granule_ingester/preprocessors/__init__.py | 1 +
granule_ingester/setup.py | 2 +-
9 files changed, 62 insertions(+), 9 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c2bb077..9cfeab6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this
file.
The format is based on [Keep a
Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic
Versioning](https://semver.org/spec/v2.0.0.html).
+## Unreleased
+### Added
+- SDAP-477: Added preprocessor to properly shape incoming data
+- SDAP-478: Add support to user to select subgroup of interest in input
granules
+### Changed
+### Deprecated
+### Removed
+### Fixed
+### Security
+
## [1.1.0] - 2023-04-26
### Added
- SDAP-437: Added support for preprocessing of input granules. Initial
implementation contains one preprocessor implementation for squeezing one or
more dimensions to ensure the dataset is shaped as needed.
diff --git a/collection_manager/collection_manager/entities/Collection.py
b/collection_manager/collection_manager/entities/Collection.py
index d657bae..333f454 100644
--- a/collection_manager/collection_manager/entities/Collection.py
+++ b/collection_manager/collection_manager/entities/Collection.py
@@ -47,6 +47,8 @@ class Collection:
date_from: Optional[datetime] = None
date_to: Optional[datetime] = None
preprocess: str = None
+ processors: str = None
+ group: str = None
@staticmethod
def __decode_dimension_names(dimension_names_dict):
@@ -80,6 +82,7 @@ class Collection:
date_from = datetime.fromisoformat(properties['from']) if 'from'
in properties else None
preprocess = json.dumps(properties['preprocess']) if 'preprocess'
in properties else None
+ extra_processors = json.dumps(properties['processors']) if
'processors' in properties else None
collection = Collection(dataset_id=properties['id'],
projection=properties['projection'],
@@ -90,7 +93,9 @@ class Collection:
forward_processing_priority=properties.get('forward-processing-priority', None),
date_to=date_to,
date_from=date_from,
- preprocess=preprocess)
+ preprocess=preprocess,
+ processors=extra_processors,
+ group=properties.get('group'))
return collection
except KeyError as e:
raise MissingValueCollectionError(missing_value=e.args[0])
diff --git
a/collection_manager/collection_manager/services/CollectionProcessor.py
b/collection_manager/collection_manager/services/CollectionProcessor.py
index bbf6bf9..6c129c7 100644
--- a/collection_manager/collection_manager/services/CollectionProcessor.py
+++ b/collection_manager/collection_manager/services/CollectionProcessor.py
@@ -125,6 +125,12 @@ class CollectionProcessor:
if collection.preprocess is not None:
config_dict['preprocess'] = json.loads(collection.preprocess)
+ if collection.processors is not None:
+ config_dict['processors'].extend(json.loads(collection.processors))
+
+ if collection.group is not None:
+ config_dict['granule']['group'] = collection.group
+
config_str = yaml.dump(config_dict)
logger.debug(f"Templated dataset config:\n{config_str}")
return config_str
diff --git a/granule_ingester/conda-requirements.txt
b/granule_ingester/conda-requirements.txt
index b647f98..e49a9eb 100644
--- a/granule_ingester/conda-requirements.txt
+++ b/granule_ingester/conda-requirements.txt
@@ -13,12 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-numpy==1.15.4
+numpy>=1.15.4
scipy
netcdf4==1.5.3
-pandas==1.0.4
+pandas>=1.0.4
pytz==2019.3
-xarray
+xarray>=0.19.0
pyyaml==5.3.1
aiohttp==3.6.2
tenacity
diff --git a/granule_ingester/granule_ingester/granule_loaders/GranuleLoader.py
b/granule_ingester/granule_ingester/granule_loaders/GranuleLoader.py
index 6a7978f..9dcab14 100644
--- a/granule_ingester/granule_ingester/granule_loaders/GranuleLoader.py
+++ b/granule_ingester/granule_ingester/granule_loaders/GranuleLoader.py
@@ -34,6 +34,11 @@ class GranuleLoader:
self._resource = resource
self._preprocess = None
+ if 'group' in kwargs:
+ self._group = kwargs['group']
+ else:
+ self._group = None
+
if 'preprocess' in kwargs:
self._preprocess = [GranuleLoader._parse_module(module) for module
in kwargs['preprocess']]
@@ -58,7 +63,12 @@ class GranuleLoader:
granule_name = os.path.basename(self._resource)
try:
- ds = xr.open_dataset(file_path, lock=False)
+ additional_params = {}
+
+ if self._group is not None:
+ additional_params['group'] = self._group
+
+ ds = xr.open_dataset(file_path, lock=False, **additional_params)
if self._preprocess is not None:
logger.info(f'There are {len(self._preprocess)} preprocessors
to apply for granule {self._resource}')
diff --git a/granule_ingester/granule_ingester/granule_loaders/Preprocessors.py
b/granule_ingester/granule_ingester/granule_loaders/Preprocessors.py
index f091fe2..b423c13 100644
--- a/granule_ingester/granule_ingester/granule_loaders/Preprocessors.py
+++ b/granule_ingester/granule_ingester/granule_loaders/Preprocessors.py
@@ -15,8 +15,13 @@
from typing import Dict, Type
-from granule_ingester.preprocessors import (GranulePreprocessor, Squeeze)
+from granule_ingester.preprocessors import (
+ GranulePreprocessor,
+ Squeeze,
+ Trajectory
+)
modules: Dict[str, Type[GranulePreprocessor]] = {
- 'squeeze': Squeeze
+ 'squeeze': Squeeze,
+ 'trajectory': Trajectory
}
diff --git a/granule_ingester/granule_ingester/preprocessors/__init__.py
b/granule_ingester/granule_ingester/preprocessors/Trajectory.py
similarity index 62%
copy from granule_ingester/granule_ingester/preprocessors/__init__.py
copy to granule_ingester/granule_ingester/preprocessors/Trajectory.py
index 6f55d91..2539982 100644
--- a/granule_ingester/granule_ingester/preprocessors/__init__.py
+++ b/granule_ingester/granule_ingester/preprocessors/Trajectory.py
@@ -13,6 +13,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import logging
+
+import xarray as xr
from granule_ingester.preprocessors.GranulePreprocessor import
GranulePreprocessor
-from granule_ingester.preprocessors.Squeeze import Squeeze
+from math import sqrt, ceil
+
+logger = logging.getLogger(__name__)
+
+
+class Trajectory(GranulePreprocessor):
+ def __init__(self, dimension: str):
+ self._dim = dimension
+
+ def process(self, input_dataset: xr.Dataset, *args, **kwargs):
+ length = len(input_dataset[self._dim])
+ window = ceil(sqrt(length))
+ return input_dataset.coarsen(**{self._dim: window}, boundary='pad')\
+ .construct(**{self._dim: ('ROWS', 'COLS')}, keep_attrs=True)
diff --git a/granule_ingester/granule_ingester/preprocessors/__init__.py
b/granule_ingester/granule_ingester/preprocessors/__init__.py
index 6f55d91..190cbe4 100644
--- a/granule_ingester/granule_ingester/preprocessors/__init__.py
+++ b/granule_ingester/granule_ingester/preprocessors/__init__.py
@@ -15,4 +15,5 @@
from granule_ingester.preprocessors.GranulePreprocessor import
GranulePreprocessor
from granule_ingester.preprocessors.Squeeze import Squeeze
+from granule_ingester.preprocessors.Trajectory import Trajectory
diff --git a/granule_ingester/setup.py b/granule_ingester/setup.py
index 7c09146..e99900d 100644
--- a/granule_ingester/setup.py
+++ b/granule_ingester/setup.py
@@ -40,7 +40,7 @@ setup(
exclude=["*.tests", "*.tests.*", "tests.*", "tests", "scripts"]),
test_suite="tests",
platforms='any',
- python_requires='>=3.7',
+ python_requires='>=3.8',
classifiers=[
'Development Status :: 1 - Pre-Alpha',
'Intended Audience :: Developers',