This is an automated email from the ASF dual-hosted git repository.

nchung pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git


The following commit(s) were added to refs/heads/dev by this push:
     new f674a5a  SDAP-323: Update summarizing processor and Solr schema to 
support multiple variables (#41)
f674a5a is described below

commit f674a5a2be8eec4f83b380d7dc56ceb53b16dd16
Author: Stepheny Perez <[email protected]>
AuthorDate: Wed Sep 22 17:03:11 2021 -0700

    SDAP-323: Update summarizing processor and Solr schema to support multiple 
variables (#41)
    
    * Updated solr doc for multi-var case
    
    * Fix single-variable multi-var standard name case
    
    * Don't include null standard name field
    
    * Added support for case where some variables have standard_name
---
 .../processors/TileSummarizingProcessor.py         |  2 +-
 .../granule_ingester/writers/SolrStore.py          | 15 ++++++++--
 granule_ingester/tests/writers/test_SolrStore.py   | 34 ++++++++++++++++++----
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git 
a/granule_ingester/granule_ingester/processors/TileSummarizingProcessor.py 
b/granule_ingester/granule_ingester/processors/TileSummarizingProcessor.py
index 602fcd6..041cac5 100644
--- a/granule_ingester/granule_ingester/processors/TileSummarizingProcessor.py
+++ b/granule_ingester/granule_ingester/processors/TileSummarizingProcessor.py
@@ -101,7 +101,7 @@ class TileSummarizingProcessor(TileProcessor):
         logger.debug(f'calc standard_name')
         standard_names = [dataset.variables[k].attrs.get('standard_name')for k 
in data_var_name]
         logger.debug(f'using standard_names: {standard_names}')
-        tile_summary.standard_name = json.dumps(standard_names if 
len(standard_names) > 1 else standard_names[0])
+        tile_summary.standard_name = json.dumps(standard_names)
         logger.debug(f'copy tile_summary to tile')
         tile.summary.CopyFrom(tile_summary)
         return tile
diff --git a/granule_ingester/granule_ingester/writers/SolrStore.py 
b/granule_ingester/granule_ingester/writers/SolrStore.py
index 3224379..2e873d7 100644
--- a/granule_ingester/granule_ingester/writers/SolrStore.py
+++ b/granule_ingester/granule_ingester/writers/SolrStore.py
@@ -105,7 +105,14 @@ class SolrStore(MetadataStore):
         tile_type = tile.tile.WhichOneof("tile_type")
         tile_data = getattr(tile.tile, tile_type)
 
-        var_name = summary.standard_name if summary.standard_name else 
summary.data_var_name
+        var_names = json.loads(summary.data_var_name)
+        standard_names = []
+        if summary.standard_name:
+            standard_names = json.loads(summary.standard_name)
+        if not isinstance(var_names, list):
+            var_names = [var_names]
+        if not isinstance(standard_names, list):
+            standard_names = [standard_names]
 
         input_document = {
             'table_s': self.TABLE_NAME,
@@ -115,7 +122,7 @@ class SolrStore(MetadataStore):
             'sectionSpec_s': summary.section_spec,
             'dataset_s': summary.dataset_name,
             'granule_s': granule_file_name,
-            'tile_var_name_s': var_name,
+            'tile_var_name_ss': var_names,
             'tile_min_lon': bbox.lon_min,
             'tile_max_lon': bbox.lon_max,
             'tile_min_lat': bbox.lat_min,
@@ -129,6 +136,10 @@ class SolrStore(MetadataStore):
             'tile_count_i': int(stats.count)
         }
 
+        for var_name, standard_name in zip(var_names, standard_names):
+            if standard_name:
+                input_document[f'{var_name}.tile_standard_name_s'] = 
standard_name
+
         ecco_tile_id = getattr(tile_data, 'tile', None)
         if ecco_tile_id:
             input_document['ecco_tile'] = ecco_tile_id
diff --git a/granule_ingester/tests/writers/test_SolrStore.py 
b/granule_ingester/tests/writers/test_SolrStore.py
index 89e54de..84c0e96 100644
--- a/granule_ingester/tests/writers/test_SolrStore.py
+++ b/granule_ingester/tests/writers/test_SolrStore.py
@@ -13,7 +13,7 @@ class TestSolrStore(unittest.TestCase):
         tile.summary.tile_id = 'test_id'
         tile.summary.dataset_name = 'test_dataset'
         tile.summary.dataset_uuid = 'test_dataset_id'
-        tile.summary.data_var_name = json.dumps(['test_variable'])
+        tile.summary.data_var_name = json.dumps('test_variable')
         tile.summary.granule = 'test_granule_path'
         tile.summary.section_spec = 'time:0:1,j:0:20,i:200:240'
         tile.summary.bbox.lat_min = -180.1
@@ -26,7 +26,7 @@ class TestSolrStore(unittest.TestCase):
         tile.summary.stats.count = 100
         tile.summary.stats.min_time = 694224000
         tile.summary.stats.max_time = 694310400
-        tile.summary.standard_name = 'sea_surface_temperature'
+        tile.summary.standard_name = json.dumps('sea_surface_temperature')
 
         tile.tile.ecco_tile.depth = 10.5
 
@@ -41,7 +41,8 @@ class TestSolrStore(unittest.TestCase):
         self.assertEqual('test_dataset!test_id', solr_doc['solr_id_s'])
         self.assertEqual('time:0:1,j:0:20,i:200:240', 
solr_doc['sectionSpec_s'])
         self.assertEqual('test_granule_path', solr_doc['granule_s'])
-        self.assertEqual('sea_surface_temperature', 
solr_doc['tile_var_name_s'])
+        self.assertEqual(['test_variable'], solr_doc['tile_var_name_ss'])
+        self.assertEqual('sea_surface_temperature', 
solr_doc['test_variable.tile_standard_name_s'])
         self.assertAlmostEqual(-90.5, solr_doc['tile_min_lon'])
         self.assertAlmostEqual(90.0, solr_doc['tile_max_lon'])
         self.assertAlmostEqual(-180.1, solr_doc['tile_min_lat'], delta=1E-5)
@@ -86,7 +87,7 @@ class TestSolrStore(unittest.TestCase):
         self.assertEqual('test_dataset!test_id', solr_doc['solr_id_s'])
         self.assertEqual('time:0:1,j:0:20,i:200:240', 
solr_doc['sectionSpec_s'])
         self.assertEqual('test_granule_path', solr_doc['granule_s'])
-        self.assertEqual(['test_variable', 'test_variable_02'], 
solr_doc['tile_var_name_s'])
+        self.assertEqual(['test_variable', 'test_variable_02'], 
solr_doc['tile_var_name_ss'])
         self.assertAlmostEqual(-90.5, solr_doc['tile_min_lon'])
         self.assertAlmostEqual(90.0, solr_doc['tile_max_lon'])
         self.assertAlmostEqual(-180.1, solr_doc['tile_min_lat'], delta=1E-5)
@@ -102,7 +103,7 @@ class TestSolrStore(unittest.TestCase):
     def test_build_solr_doc_no_standard_name(self):
         """
         When TileSummary.standard_name isn't available, the solr field
-        tile_var_name_s should use TileSummary.data_var_name
+        VAR_NAME.tile_standard_name_s should not be present.
         """
         tile = nexusproto.NexusTile()
         tile.summary.tile_id = 'test_id'
@@ -112,4 +113,25 @@ class TestSolrStore(unittest.TestCase):
         metadata_store = SolrStore()
         solr_doc = metadata_store._build_solr_doc(tile)
 
-        self.assertEqual(['test_variable', 'test_variable_02'], 
solr_doc['tile_var_name_s'])
+        assert ['test_variable', 'test_variable_02'] == 
solr_doc['tile_var_name_ss']
+        assert 'test_variable.tile_standard_name_s' not in solr_doc
+        assert 'test_variable_02.tile_standard_name_s' not in solr_doc
+
+    def test_build_solr_doc_some_standard_names(self):
+        """
+        When TileSummary.standard_name isn't available, the solr field
+        VAR_NAME.tile_standard_name_s should only be present for the
+        appropriate variables.
+        """
+        tile = nexusproto.NexusTile()
+        tile.summary.tile_id = 'test_id'
+        tile.summary.data_var_name = json.dumps(['test_variable', 
'test_variable_02'])
+        tile.summary.standard_name = json.dumps(['sea_surface_temperature', 
None])
+        tile.tile.ecco_tile.depth = 10.5
+
+        metadata_store = SolrStore()
+        solr_doc = metadata_store._build_solr_doc(tile)
+
+        assert ['test_variable', 'test_variable_02'] == 
solr_doc['tile_var_name_ss']
+        assert solr_doc['test_variable.tile_standard_name_s'] == 
'sea_surface_temperature'
+        assert 'test_variable_02.tile_standard_name_s' not in solr_doc

Reply via email to