ntquach closed pull request #7: SDAP-37 Update SolrProxy to allow tile search 
by arbitrary tile metadata
URL: https://github.com/apache/incubator-sdap-nexus/pull/7
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/data-access/nexustiles/dao/SolrProxy.pyx 
b/data-access/nexustiles/dao/SolrProxy.pyx
index cff728b..d99ebad 100644
--- a/data-access/nexustiles/dao/SolrProxy.pyx
+++ b/data-access/nexustiles/dao/SolrProxy.pyx
@@ -526,6 +526,45 @@ class SolrProxy(object):
 
         return self.do_query_all(*(search, None, None, False, None), 
**additionalparams)
 
+    def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, 
end_time=-1, **kwargs):
+        """
+        Get a list of tile metadata that matches the specified metadata, 
start_time, end_time.
+        :param metadata: List of metadata values to search for tiles e.g 
["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tile metadata
+        """
+        search = 'dataset_s:%s' % ds
+
+        additionalparams = {
+            'fq': metadata
+        }
+
+        if 0 < start_time <= end_time:
+            
additionalparams['fq'].append(self.get_formatted_time_clause(start_time, 
end_time))
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(
+            *(search, None, None, False, None),
+            **additionalparams)
+
+    def get_formatted_time_clause(self, start_time, end_time):
+        search_start_s = 
datetime.utcfromtimestamp(start_time).strftime('%Y-%m-%dT%H:%M:%SZ')
+        search_end_s = 
datetime.utcfromtimestamp(end_time).strftime('%Y-%m-%dT%H:%M:%SZ')
+
+        time_clause = "(" \
+                      "tile_min_time_dt:[%s TO %s] " \
+                      "OR tile_max_time_dt:[%s TO %s] " \
+                      "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s 
TO *])" \
+                      ")" % (
+                          search_start_s, search_end_s,
+                          search_start_s, search_end_s,
+                          search_start_s, search_end_s
+                          )
+        return time_clause
+
     def do_query(self, *args, **params):
 
         response = self.do_query_raw(*args, **params)
diff --git a/data-access/nexustiles/nexustiles.py 
b/data-access/nexustiles/nexustiles.py
index 26bd7a2..71a91ec 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -163,6 +163,21 @@ def find_tiles_in_polygon(self, bounding_polygon, ds=None, 
start_time=0, end_tim
                                                                      **kwargs)
         return tiles
 
+    @tile_data()
+    def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, 
end_time=-1, **kwargs):
+        """
+        Return list of tiles that matches the specified metadata, start_time, 
end_time.
+        :param metadata: List of metadata values to search for tiles e.g 
["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tiles
+        """
+        tiles = self._metadatastore.find_all_tiles_by_metadata(metadata, ds, 
start_time, end_time, **kwargs)
+        tiles = self.mask_tiles_to_time_range(start_time, end_time, tiles)
+
+        return tiles
+
     @tile_data()
     def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, 
**kwargs):
         """
@@ -312,6 +327,29 @@ def mask_tiles_to_polygon(self, bounding_polygon, tiles):
 
         return tiles
 
+    def mask_tiles_to_time_range(self, start_time, end_time, tiles):
+        """
+        Masks data in tiles to specified time range.
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param tiles: List of tiles
+        :return: A list tiles with data masked to specified time range
+        """
+        if 0 < start_time <= end_time:
+            for tile in tiles:
+                tile.times = ma.masked_outside(tile.times, start_time, 
end_time)
+
+                # Or together the masks of the individual arrays to create the 
new mask
+                data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, 
np.newaxis] \
+                            | ma.getmaskarray(tile.latitudes)[np.newaxis, :, 
np.newaxis] \
+                            | ma.getmaskarray(tile.longitudes)[np.newaxis, 
np.newaxis, :]
+
+                tile.data = ma.masked_where(data_mask, tile.data)
+
+            tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
+
+        return tiles
+
     def fetch_data_for_tiles(self, *tiles):
 
         nexus_tile_ids = set([tile.tile_id for tile in tiles])
diff --git a/data-access/tests/solrproxy_test.py 
b/data-access/tests/solrproxy_test.py
index c098e47..6490baa 100644
--- a/data-access/tests/solrproxy_test.py
+++ b/data-access/tests/solrproxy_test.py
@@ -71,3 +71,8 @@ def get_data_series_list_test(self):
         result = self.proxy.get_data_series_list()
 
         print len(result)
+
+    def test_find_all_tiles_by_metadata(self):
+        result = 
self.proxy.find_all_tiles_by_metadata(['granule_s:19811114120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.0.nc'],
 ds="AVHRR_OI_L4_GHRSST_NCEI")
+
+        print len(result)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to