This is an automated email from the ASF dual-hosted git repository.
nchung pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
The following commit(s) were added to refs/heads/master by this push:
new 669157e SDAP-19 Upgrade Solr client in NEXUS data-access (#14)
669157e is described below
commit 669157ec8c6aec8babdf1ca7f22fb2ceeb4ef715
Author: Nga Quach <[email protected]>
AuthorDate: Wed Apr 4 08:10:24 2018 -0700
SDAP-19 Upgrade Solr client in NEXUS data-access (#14)
---
data-access/nexustiles/dao/SolrProxy.pyx | 78 ++++++++++++++++----------------
data-access/nexustiles/nexustiles.py | 6 +--
data-access/requirements.txt | 2 +-
data-access/setup.py | 2 +-
data-access/tests/solrproxy_test.py | 6 +++
5 files changed, 50 insertions(+), 44 deletions(-)
diff --git a/data-access/nexustiles/dao/SolrProxy.pyx
b/data-access/nexustiles/dao/SolrProxy.pyx
index 13c9c81..20dfeeb 100644
--- a/data-access/nexustiles/dao/SolrProxy.pyx
+++ b/data-access/nexustiles/dao/SolrProxy.pyx
@@ -18,14 +18,17 @@ import logging
import threading
import time
from datetime import datetime
+from pytz import timezone, UTC
import requests
-import solr
+import pysolr
from shapely import wkt
SOLR_CON_LOCK = threading.Lock()
thread_local = threading.local()
+EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
+
class SolrProxy(object):
def __init__(self, config):
@@ -36,7 +39,7 @@ class SolrProxy(object):
with SOLR_CON_LOCK:
solrcon = getattr(thread_local, 'solrcon', None)
if solrcon is None:
- solrcon = solr.Solr('http://%s/solr/%s' % (self.solrUrl,
self.solrCore), debug=False)
+ solrcon = pysolr.Solr('http://%s/solr/%s' % (self.solrUrl,
self.solrCore))
thread_local.solrcon = solrcon
self.solrcon = solrcon
@@ -94,7 +97,7 @@ class SolrProxy(object):
results, start, found = self.do_query(*(search, None, None, True,
None), **additionalparams)
- return results[0]['tile_min_time_dt']
+ return self.convert_iso_to_datetime(results[0]['tile_min_time_dt'])
def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs):
@@ -116,7 +119,7 @@ class SolrProxy(object):
results, start, found = self.do_query(*(search, None, None, True,
None), **additionalparams)
- return results[0]['tile_max_time_dt']
+ return self.convert_iso_to_datetime(results[0]['tile_max_time_dt'])
def find_min_max_date_from_granule(self, ds, granule_name, **kwargs):
search = 'dataset_s:%s' % ds
@@ -132,7 +135,7 @@ class SolrProxy(object):
self._merge_kwargs(additionalparams, **kwargs)
results, start, found = self.do_query(*(search, None, None, False,
None), **additionalparams)
- start_time = results[0]['tile_min_time_dt']
+ start_time =
self.convert_iso_to_datetime(results[0]['tile_min_time_dt'])
kwargs['fl'] = 'tile_max_time_dt'
kwargs['sort'] = ['tile_max_time_dt desc']
@@ -144,7 +147,7 @@ class SolrProxy(object):
self._merge_kwargs(additionalparams, **kwargs)
results, start, found = self.do_query(*(search, None, None, False,
None), **additionalparams)
- end_time = results[0]['tile_max_time_dt']
+ end_time = self.convert_iso_to_datetime(results[0]['tile_max_time_dt'])
return start_time, end_time
@@ -153,8 +156,8 @@ class SolrProxy(object):
datasets = self.get_data_series_list_simple()
for dataset in datasets:
- dataset['start'] = time.mktime(self.find_min_date_from_tiles([],
ds=dataset['title']).timetuple()) * 1000
- dataset['end'] = time.mktime(self.find_max_date_from_tiles([],
ds=dataset['title']).timetuple()) * 1000
+ dataset['start'] = (self.find_min_date_from_tiles([],
ds=dataset['title']) - EPOCH).total_seconds() * 1000
+ dataset['end'] = (self.find_max_date_from_tiles([],
ds=dataset['title']) - EPOCH).total_seconds() * 1000
return datasets
@@ -164,12 +167,13 @@ class SolrProxy(object):
'rows': 0,
"facet": "true",
"facet.field": "dataset_s",
- "facet.mincount": "1"
+ "facet.mincount": "1",
+ "facet.limit": "-1"
}
response = self.do_query_raw(*(search, None, None, False, None),
**params)
l = []
- for g, v in response.facet_counts["facet_fields"]["dataset_s"].items():
+ for g, v in
zip(*[iter(response.facets["facet_fields"]["dataset_s"])]*2):
l.append({
"shortName": g,
"title": g,
@@ -194,17 +198,17 @@ class SolrProxy(object):
stats = {}
- for g in response.facet_counts["facet_pivot"]["dataset_s"]:
+ for g in response.facets["facet_pivot"]["dataset_s"]:
if g["value"] == ds:
- stats["start"] =
time.mktime(g["stats"]["stats_fields"]["tile_max_time_dt"]["min"].timetuple())
* 1000
- stats["end"] =
time.mktime(g["stats"]["stats_fields"]["tile_max_time_dt"]["max"].timetuple())
* 1000
+ stats["start"] =
self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["min"])
+ stats["end"] =
self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["max"])
stats["minValue"] =
g["stats"]["stats_fields"]["tile_min_val_d"]["min"]
stats["maxValue"] =
g["stats"]["stats_fields"]["tile_max_val_d"]["max"]
stats["availableDates"] = []
- for dt in response.facet_counts["facet_fields"]["tile_max_time_dt"]:
- stats["availableDates"].append(time.mktime(datetime.strptime(dt,
"%Y-%m-%dT%H:%M:%SZ").timetuple()) * 1000)
+ for dt in response.facets["facet_fields"]["tile_max_time_dt"][::2]:
+ stats["availableDates"].append(self.convert_iso_to_timestamp(dt))
stats["availableDates"] = sorted(stats["availableDates"])
@@ -244,9 +248,9 @@ class SolrProxy(object):
],
'rows': 0,
'facet': 'true',
- 'facet_field': 'tile_min_time_dt',
- 'facet_mincount': '1',
- 'facet_limit': '-1'
+ 'facet.field': 'tile_min_time_dt',
+ 'facet.mincount': '1',
+ 'facet.limit': '-1'
}
self._merge_kwargs(additionalparams, **kwargs)
@@ -255,7 +259,7 @@ class SolrProxy(object):
daysinrangeasc = sorted(
[(datetime.strptime(a_date, '%Y-%m-%dT%H:%M:%SZ') -
datetime.utcfromtimestamp(0)).total_seconds() for a_date
- in
response.facet_counts['facet_fields']['tile_min_time_dt'].keys()])
+ in response.facets['facet_fields']['tile_min_time_dt'][::2]])
return daysinrangeasc
@@ -391,7 +395,7 @@ class SolrProxy(object):
response = self.do_query_raw(*(search, None, None, False, None),
**additionalparams)
- distinct_bounds = [wkt.loads(key).bounds for key in
response.facet_counts["facet_fields"]["geo_s"].keys()]
+ distinct_bounds = [wkt.loads(key).bounds for key in
response.facets["facet_fields"]["geo_s"][::2]]
return distinct_bounds
@@ -604,23 +608,15 @@ class SolrProxy(object):
response = self.do_query_raw(*args, **params)
- return response.results, response.start, response.numFound
+ return response.docs, response.raw_response['response']['start'],
response.hits
def do_query_raw(self, *args, **params):
- # fl only works when passed as the second argument to solrcon.select
- if 'fl' in params.keys():
- fl = params['fl']
- del (params['fl'])
- else:
- fl = args[1]
+ if 'fl' not in params.keys() and args[1]:
+ params['fl'] = args[1]
- # sort only works when passed as the fourth argument to solrcon.select
- if 'sort' in params.keys():
- s = ','.join(params['sort'])
- del (params['sort'])
- else:
- s = args[4]
+ if 'sort' not in params.keys() and args[4]:
+ params['sort'] = args[4]
# If dataset_s is specified as the search term,
# add the _route_ parameter to limit the search to the correct shard
@@ -628,10 +624,8 @@ class SolrProxy(object):
ds = args[0].split(':')[-1]
params['shard_keys'] = ds + '!'
- args = (args[0],) + (fl,) + (args[2:4]) + (s,)
-
with SOLR_CON_LOCK:
- response = self.solrcon.select(*args, **params)
+ response = self.solrcon.search(args[0], **params)
return response
@@ -640,19 +634,25 @@ class SolrProxy(object):
results = []
response = self.do_query_raw(*args, **params)
- results.extend(response.results)
+ results.extend(response.docs)
- limit = min(params.get('limit', float('inf')), response.numFound)
+ limit = min(params.get('limit', float('inf')), response.hits)
while len(results) < limit:
params['start'] = len(results)
response = self.do_query_raw(*args, **params)
- results.extend(response.results)
+ results.extend(response.docs)
assert len(results) == limit
return results
+ def convert_iso_to_datetime(self, date):
+ return datetime.strptime(date,
"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC)
+
+ def convert_iso_to_timestamp(self, date):
+ return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds() *
1000
+
def ping(self):
solrAdminPing = 'http://%s/solr/%s/admin/ping' % (self.solrUrl,
self.solrCore)
try:
diff --git a/data-access/nexustiles/nexustiles.py
b/data-access/nexustiles/nexustiles.py
index c3b08cd..81f086f 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -25,7 +25,7 @@ import dao.CassandraProxy
import dao.S3Proxy
import dao.DynamoProxy
import dao.SolrProxy
-from pytz import timezone
+from pytz import timezone, UTC
from shapely.geometry import MultiPolygon, box
from model.nexusmodel import Tile, BBox, TileStats
@@ -446,12 +446,12 @@ class NexusTileService(object):
pass
try:
- tile.min_time = solr_doc['tile_min_time_dt']
+ tile.min_time =
datetime.strptime(solr_doc['tile_min_time_dt'],
"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC)
except KeyError:
pass
try:
- tile.max_time = solr_doc['tile_max_time_dt']
+ tile.max_time =
datetime.strptime(solr_doc['tile_max_time_dt'],
"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC)
except KeyError:
pass
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index 4975e61..1901db2 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -36,6 +36,6 @@ Shapely==1.5.17
simplegeneric==0.8.1
six==1.10.0
solrcloudpy==2.4.1
-solrpy==0.9.7
+pysolr==3.7.0
traitlets==4.3.2
wcwidth==0.1.7
diff --git a/data-access/setup.py b/data-access/setup.py
index ee1201d..532dede 100644
--- a/data-access/setup.py
+++ b/data-access/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
setup_requires=['cython'],
install_requires=[
'cassandra-driver==3.5.0',
- 'solrpy==0.9.7',
+ 'pysolr==3.7.0',
'requests',
'nexusproto==1.0.0-SNAPSHOT',
'shapely'
diff --git a/data-access/tests/solrproxy_test.py
b/data-access/tests/solrproxy_test.py
index 73936a7..8eaa655 100644
--- a/data-access/tests/solrproxy_test.py
+++ b/data-access/tests/solrproxy_test.py
@@ -83,3 +83,9 @@ class TestQuery(unittest.TestCase):
metadata=['granule_s:19811114120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.0.nc'])
print(tile_count)
+
+ def test_get_data_series_stats(self):
+ print(self.proxy.get_data_series_stats('AVHRR_OI_L4_GHRSST_NCEI'))
+
+ def test_find_days_in_range_asc(self):
+ print(self.proxy.find_days_in_range_asc(-90, 90, -180, 180,
'AVHRR_OI_L4_GHRSST_NCEI', 1, time.time()))
--
To stop receiving notification emails like this one, please contact
[email protected].