This is an automated email from the ASF dual-hosted git repository.
nchung pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git
The following commit(s) were added to refs/heads/dev by this push:
new f674a5a SDAP-323: Update summarizing processor and Solr schema to
support multiple variables (#41)
f674a5a is described below
commit f674a5a2be8eec4f83b380d7dc56ceb53b16dd16
Author: Stepheny Perez <[email protected]>
AuthorDate: Wed Sep 22 17:03:11 2021 -0700
SDAP-323: Update summarizing processor and Solr schema to support multiple
variables (#41)
* Updated solr doc for multi-var case
* Fix single-variable multi-var standard name case
* Don't include null standard name field
* Added support for case where some variables have standard_name
---
.../processors/TileSummarizingProcessor.py | 2 +-
.../granule_ingester/writers/SolrStore.py | 15 ++++++++--
granule_ingester/tests/writers/test_SolrStore.py | 34 ++++++++++++++++++----
3 files changed, 42 insertions(+), 9 deletions(-)
diff --git
a/granule_ingester/granule_ingester/processors/TileSummarizingProcessor.py
b/granule_ingester/granule_ingester/processors/TileSummarizingProcessor.py
index 602fcd6..041cac5 100644
--- a/granule_ingester/granule_ingester/processors/TileSummarizingProcessor.py
+++ b/granule_ingester/granule_ingester/processors/TileSummarizingProcessor.py
@@ -101,7 +101,7 @@ class TileSummarizingProcessor(TileProcessor):
logger.debug(f'calc standard_name')
standard_names = [dataset.variables[k].attrs.get('standard_name')for k
in data_var_name]
logger.debug(f'using standard_names: {standard_names}')
- tile_summary.standard_name = json.dumps(standard_names if
len(standard_names) > 1 else standard_names[0])
+ tile_summary.standard_name = json.dumps(standard_names)
logger.debug(f'copy tile_summary to tile')
tile.summary.CopyFrom(tile_summary)
return tile
diff --git a/granule_ingester/granule_ingester/writers/SolrStore.py
b/granule_ingester/granule_ingester/writers/SolrStore.py
index 3224379..2e873d7 100644
--- a/granule_ingester/granule_ingester/writers/SolrStore.py
+++ b/granule_ingester/granule_ingester/writers/SolrStore.py
@@ -105,7 +105,14 @@ class SolrStore(MetadataStore):
tile_type = tile.tile.WhichOneof("tile_type")
tile_data = getattr(tile.tile, tile_type)
- var_name = summary.standard_name if summary.standard_name else
summary.data_var_name
+ var_names = json.loads(summary.data_var_name)
+ standard_names = []
+ if summary.standard_name:
+ standard_names = json.loads(summary.standard_name)
+ if not isinstance(var_names, list):
+ var_names = [var_names]
+ if not isinstance(standard_names, list):
+ standard_names = [standard_names]
input_document = {
'table_s': self.TABLE_NAME,
@@ -115,7 +122,7 @@ class SolrStore(MetadataStore):
'sectionSpec_s': summary.section_spec,
'dataset_s': summary.dataset_name,
'granule_s': granule_file_name,
- 'tile_var_name_s': var_name,
+ 'tile_var_name_ss': var_names,
'tile_min_lon': bbox.lon_min,
'tile_max_lon': bbox.lon_max,
'tile_min_lat': bbox.lat_min,
@@ -129,6 +136,10 @@ class SolrStore(MetadataStore):
'tile_count_i': int(stats.count)
}
+ for var_name, standard_name in zip(var_names, standard_names):
+ if standard_name:
+ input_document[f'{var_name}.tile_standard_name_s'] =
standard_name
+
ecco_tile_id = getattr(tile_data, 'tile', None)
if ecco_tile_id:
input_document['ecco_tile'] = ecco_tile_id
diff --git a/granule_ingester/tests/writers/test_SolrStore.py
b/granule_ingester/tests/writers/test_SolrStore.py
index 89e54de..84c0e96 100644
--- a/granule_ingester/tests/writers/test_SolrStore.py
+++ b/granule_ingester/tests/writers/test_SolrStore.py
@@ -13,7 +13,7 @@ class TestSolrStore(unittest.TestCase):
tile.summary.tile_id = 'test_id'
tile.summary.dataset_name = 'test_dataset'
tile.summary.dataset_uuid = 'test_dataset_id'
- tile.summary.data_var_name = json.dumps(['test_variable'])
+ tile.summary.data_var_name = json.dumps('test_variable')
tile.summary.granule = 'test_granule_path'
tile.summary.section_spec = 'time:0:1,j:0:20,i:200:240'
tile.summary.bbox.lat_min = -180.1
@@ -26,7 +26,7 @@ class TestSolrStore(unittest.TestCase):
tile.summary.stats.count = 100
tile.summary.stats.min_time = 694224000
tile.summary.stats.max_time = 694310400
- tile.summary.standard_name = 'sea_surface_temperature'
+ tile.summary.standard_name = json.dumps('sea_surface_temperature')
tile.tile.ecco_tile.depth = 10.5
@@ -41,7 +41,8 @@ class TestSolrStore(unittest.TestCase):
self.assertEqual('test_dataset!test_id', solr_doc['solr_id_s'])
self.assertEqual('time:0:1,j:0:20,i:200:240',
solr_doc['sectionSpec_s'])
self.assertEqual('test_granule_path', solr_doc['granule_s'])
- self.assertEqual('sea_surface_temperature',
solr_doc['tile_var_name_s'])
+ self.assertEqual(['test_variable'], solr_doc['tile_var_name_ss'])
+ self.assertEqual('sea_surface_temperature',
solr_doc['test_variable.tile_standard_name_s'])
self.assertAlmostEqual(-90.5, solr_doc['tile_min_lon'])
self.assertAlmostEqual(90.0, solr_doc['tile_max_lon'])
self.assertAlmostEqual(-180.1, solr_doc['tile_min_lat'], delta=1E-5)
@@ -86,7 +87,7 @@ class TestSolrStore(unittest.TestCase):
self.assertEqual('test_dataset!test_id', solr_doc['solr_id_s'])
self.assertEqual('time:0:1,j:0:20,i:200:240',
solr_doc['sectionSpec_s'])
self.assertEqual('test_granule_path', solr_doc['granule_s'])
- self.assertEqual(['test_variable', 'test_variable_02'],
solr_doc['tile_var_name_s'])
+ self.assertEqual(['test_variable', 'test_variable_02'],
solr_doc['tile_var_name_ss'])
self.assertAlmostEqual(-90.5, solr_doc['tile_min_lon'])
self.assertAlmostEqual(90.0, solr_doc['tile_max_lon'])
self.assertAlmostEqual(-180.1, solr_doc['tile_min_lat'], delta=1E-5)
@@ -102,7 +103,7 @@ class TestSolrStore(unittest.TestCase):
def test_build_solr_doc_no_standard_name(self):
"""
When TileSummary.standard_name isn't available, the solr field
- tile_var_name_s should use TileSummary.data_var_name
+ VAR_NAME.tile_standard_name_s should not be present.
"""
tile = nexusproto.NexusTile()
tile.summary.tile_id = 'test_id'
@@ -112,4 +113,25 @@ class TestSolrStore(unittest.TestCase):
metadata_store = SolrStore()
solr_doc = metadata_store._build_solr_doc(tile)
- self.assertEqual(['test_variable', 'test_variable_02'],
solr_doc['tile_var_name_s'])
+ assert ['test_variable', 'test_variable_02'] ==
solr_doc['tile_var_name_ss']
+ assert 'test_variable.tile_standard_name_s' not in solr_doc
+ assert 'test_variable_02.tile_standard_name_s' not in solr_doc
+
+ def test_build_solr_doc_some_standard_names(self):
+ """
+ When TileSummary.standard_name isn't available, the solr field
+ VAR_NAME.tile_standard_name_s should only be present for the
+ appropriate variables.
+ """
+ tile = nexusproto.NexusTile()
+ tile.summary.tile_id = 'test_id'
+ tile.summary.data_var_name = json.dumps(['test_variable',
'test_variable_02'])
+ tile.summary.standard_name = json.dumps(['sea_surface_temperature',
None])
+ tile.tile.ecco_tile.depth = 10.5
+
+ metadata_store = SolrStore()
+ solr_doc = metadata_store._build_solr_doc(tile)
+
+ assert ['test_variable', 'test_variable_02'] ==
solr_doc['tile_var_name_ss']
+ assert solr_doc['test_variable.tile_standard_name_s'] ==
'sea_surface_temperature'
+ assert 'test_variable_02.tile_standard_name_s' not in solr_doc