This is an automated email from the ASF dual-hosted git repository.
rkk pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/sdap-ingester.git
The following commit(s) were added to refs/heads/develop by this push:
new 2e9b0c6 SDAP-502: Gridded tile generation bug patch (#92)
2e9b0c6 is described below
commit 2e9b0c616f2075633aa2b7dab91e0ac77b0167c1
Author: Riley Kuttruff <[email protected]>
AuthorDate: Thu May 9 11:32:55 2024 -0700
SDAP-502: Gridded tile generation bug patch (#92)
* SDAP-502: Gridded tile gen squeeze bug patch
* Added unit test
* Changelog typo
---------
Co-authored-by: rileykk <[email protected]>
---
CHANGELOG.md | 1 +
.../GridMultiVariableReadingProcessor.py | 13 ++++-
.../reading_processors/GridReadingProcessor.py | 21 ++++++++-
.../test_GridReadingProcessor.py | 55 ++++++++++++++++++++++
4 files changed, 86 insertions(+), 4 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 754e47e..c537e0f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and this project adheres to [Semantic
Versioning](https://semver.org/spec/v2.0.0
### Removed
### Fixed
- SDAP-512: Fixed Granule Ingester not closing connections to
Zookeeper/Solr/Cassandra, eventually exhausting network resources and requiring
a restart
+- SDAP-502: Fix for rare bug where gridded tiles generated from inputs where
there is a dimension length where `dimensionLength mod tileSliceLength == 1`
would cause tile generation to fail. This is because `np.squeeze` is used on
the coordinate arrays, which, if the generated tile has only a single lat or
lon, would squeeze the corresponding coordinate into a dimensionless array,
which would raise an error down the line when `len` was called with it. Added a
check for this case that bot [...]
### Security
## [1.2.0] - 2023-11-22
diff --git
a/granule_ingester/granule_ingester/processors/reading_processors/GridMultiVariableReadingProcessor.py
b/granule_ingester/granule_ingester/processors/reading_processors/GridMultiVariableReadingProcessor.py
index 035bb6b..bab04f6 100644
---
a/granule_ingester/granule_ingester/processors/reading_processors/GridMultiVariableReadingProcessor.py
+++
b/granule_ingester/granule_ingester/processors/reading_processors/GridMultiVariableReadingProcessor.py
@@ -55,8 +55,17 @@ class
GridMultiVariableReadingProcessor(TileReadingProcessor):
lat_subset =
ds[self.latitude][type(self)._slices_for_variable(ds[self.latitude],
dimensions_to_slices)]
lon_subset =
ds[self.longitude][type(self)._slices_for_variable(ds[self.longitude],
dimensions_to_slices)]
- lat_subset = np.ma.filled(np.squeeze(lat_subset), np.NaN)
- lon_subset = np.ma.filled(np.squeeze(lon_subset), np.NaN)
+
+ lat_subset = np.squeeze(lat_subset)
+ if lat_subset.shape == ():
+ lat_subset = np.expand_dims(lat_subset, 0)
+
+ lon_subset = np.squeeze(lon_subset)
+ if lon_subset.shape == ():
+ lon_subset = np.expand_dims(lon_subset, 0)
+
+ lat_subset = np.ma.filled(lat_subset, np.NaN)
+ lon_subset = np.ma.filled(lon_subset, np.NaN)
if not isinstance(self.variable, list):
raise ValueError(f'self.variable `{self.variable}` needs to be a
list. use GridReadingProcessor for single band Grid files.')
diff --git
a/granule_ingester/granule_ingester/processors/reading_processors/GridReadingProcessor.py
b/granule_ingester/granule_ingester/processors/reading_processors/GridReadingProcessor.py
index 73969e6..95bb7c6 100644
---
a/granule_ingester/granule_ingester/processors/reading_processors/GridReadingProcessor.py
+++
b/granule_ingester/granule_ingester/processors/reading_processors/GridReadingProcessor.py
@@ -36,15 +36,32 @@ class GridReadingProcessor(TileReadingProcessor):
data_variable = self.variable[0] if isinstance(self.variable, list)
else self.variable
new_tile = nexusproto.GridTile()
+ expand_axes = []
+
lat_subset =
ds[self.latitude][type(self)._slices_for_variable(ds[self.latitude],
dimensions_to_slices)]
lon_subset =
ds[self.longitude][type(self)._slices_for_variable(ds[self.longitude],
dimensions_to_slices)]
- lat_subset = np.ma.filled(np.squeeze(lat_subset), np.NaN)
- lon_subset = np.ma.filled(np.squeeze(lon_subset), np.NaN)
+
+ lat_subset = np.squeeze(lat_subset)
+ if lat_subset.shape == ():
+ lat_subset = np.expand_dims(lat_subset, 0)
+ expand_axes.append(0)
+
+ lon_subset = np.squeeze(lon_subset)
+ if lon_subset.shape == ():
+ lon_subset = np.expand_dims(lon_subset, 0)
+ expand_axes.append(1)
+
+ lat_subset = np.ma.filled(lat_subset, np.NaN)
+ lon_subset = np.ma.filled(lon_subset, np.NaN)
data_subset =
ds[data_variable][type(self)._slices_for_variable(ds[data_variable],
dimensions_to_slices)].data
data_subset = np.array(np.squeeze(data_subset))
+ if len(expand_axes) > 0:
+ data_subset = np.expand_dims(data_subset, tuple(expand_axes))
+
+
if self.depth:
depth_dim, depth_slice =
list(type(self)._slices_for_variable(ds[self.depth],
dimensions_to_slices).items())[0]
diff --git
a/granule_ingester/tests/reading_processors/test_GridReadingProcessor.py
b/granule_ingester/tests/reading_processors/test_GridReadingProcessor.py
index ef78394..3123cc4 100644
--- a/granule_ingester/tests/reading_processors/test_GridReadingProcessor.py
+++ b/granule_ingester/tests/reading_processors/test_GridReadingProcessor.py
@@ -123,6 +123,61 @@ class TestReadMurData(unittest.TestCase):
masked_data =
np.ma.masked_invalid(from_shaped_array(output_tile.tile.grid_tile.variable_data))
self.assertEqual(50, np.ma.count(masked_data))
+ def test_single_width_coords(self):
+ reading_processor = GridReadingProcessor(['analysed_sst'], 'lat',
'lon', time='time')
+ granule_path = path.join(path.dirname(__file__),
'../granules/not_empty_mur.nc4')
+
+ input_tile = nexusproto.NexusTile()
+ input_tile.summary.granule = granule_path
+
+ dimensions_to_slices = {
+ 'time': slice(0, 1),
+ 'lat': slice(50, 51),
+ 'lon': slice(0, 5)
+ }
+ with xr.open_dataset(granule_path) as ds:
+ output_tile = reading_processor._generate_tile(ds,
dimensions_to_slices, input_tile)
+
+ self.assertEqual(granule_path, output_tile.summary.granule,
granule_path)
+ self.assertEqual(1451638800, output_tile.tile.grid_tile.time)
+ self.assertEqual([1, 5],
output_tile.tile.grid_tile.variable_data.shape)
+ self.assertEqual([1], output_tile.tile.grid_tile.latitude.shape)
+ self.assertEqual([5], output_tile.tile.grid_tile.longitude.shape)
+
+ input_tile = nexusproto.NexusTile()
+ input_tile.summary.granule = granule_path
+
+ dimensions_to_slices = {
+ 'time': slice(0, 1),
+ 'lat': slice(0, 10),
+ 'lon': slice(50, 51)
+ }
+ with xr.open_dataset(granule_path) as ds:
+ output_tile = reading_processor._generate_tile(ds,
dimensions_to_slices, input_tile)
+
+ self.assertEqual(granule_path, output_tile.summary.granule,
granule_path)
+ self.assertEqual(1451638800, output_tile.tile.grid_tile.time)
+ self.assertEqual([10, 1],
output_tile.tile.grid_tile.variable_data.shape)
+ self.assertEqual([10], output_tile.tile.grid_tile.latitude.shape)
+ self.assertEqual([1], output_tile.tile.grid_tile.longitude.shape)
+
+ input_tile = nexusproto.NexusTile()
+ input_tile.summary.granule = granule_path
+
+ dimensions_to_slices = {
+ 'time': slice(0, 1),
+ 'lat': slice(50, 51),
+ 'lon': slice(50, 51),
+ }
+ with xr.open_dataset(granule_path) as ds:
+ output_tile = reading_processor._generate_tile(ds,
dimensions_to_slices, input_tile)
+
+ self.assertEqual(granule_path, output_tile.summary.granule,
granule_path)
+ self.assertEqual(1451638800, output_tile.tile.grid_tile.time)
+ self.assertEqual([1, 1],
output_tile.tile.grid_tile.variable_data.shape)
+ self.assertEqual([1], output_tile.tile.grid_tile.latitude.shape)
+ self.assertEqual([1], output_tile.tile.grid_tile.longitude.shape)
+
class TestReadCcmpData(unittest.TestCase):