This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git
The following commit(s) were added to refs/heads/dev by this push:
new 6f48d8a SDAP-270: Fix bad solr endpoints in solr history manager (#8)
6f48d8a is described below
commit 6f48d8a5da137e06ceb2284f5a7e5002a0a22113
Author: Eamon Ford <[email protected]>
AuthorDate: Mon Aug 3 19:06:18 2020 -0700
SDAP-270: Fix bad solr endpoints in solr history manager (#8)
Co-authored-by: Eamon Ford <[email protected]>
---
.../history_manager/SolrIngestionHistory.py | 40 ++++++++++------------
1 file changed, 18 insertions(+), 22 deletions(-)
diff --git
a/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
b/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
index 1ae7156..319fa3e 100644
---
a/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
+++
b/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
@@ -33,10 +33,10 @@ class SolrIngestionHistory(IngestionHistory):
def __init__(self, solr_url: str, dataset_id: str, signature_fun=None):
try:
- self._solr_url = solr_url
+ self._url_prefix = f"{solr_url.strip('/')}/solr"
self._create_collection_if_needed()
- self._solr_granules = pysolr.Solr('/'.join([solr_url.strip('/'),
self._granule_collection_name]))
- self._solr_datasets = pysolr.Solr('/'.join([solr_url.strip('/'),
self._dataset_collection_name]))
+ self._solr_granules =
pysolr.Solr(f"{self._url_prefix}/{self._granule_collection_name}")
+ self._solr_datasets =
pysolr.Solr(f"{self._url_prefix}/{self._dataset_collection_name}")
self._dataset_id = dataset_id
self._signature_fun = md5sum_from_filepath if signature_fun is
None else signature_fun
self._latest_ingested_file_update = self._get_latest_file_update()
@@ -63,7 +63,7 @@ class SolrIngestionHistory(IngestionHistory):
self._solr_datasets.add([{
'id': self._dataset_id,
'dataset_s': self._dataset_id,
- 'latest_update_l': self._latest_ingested_file_update}])
+ 'latest_update_l': int(self._latest_ingested_file_update)}])
self._solr_datasets.commit()
def _get_latest_file_update(self):
@@ -87,8 +87,8 @@ class SolrIngestionHistory(IngestionHistory):
self._req_session = requests.session()
payload = {'action': 'CLUSTERSTATUS'}
- result =
self._req_session.get('/'.join([self._solr_url.strip('/'), 'admin',
'collections']),
- params=payload)
+ collections_endpoint = f"{self._url_prefix}/admin/collections"
+ result = self._req_session.get(collections_endpoint,
params=payload)
response = result.json()
node_number = len(response['cluster']['live_nodes'])
@@ -100,17 +100,15 @@ class SolrIngestionHistory(IngestionHistory):
'name': self._granule_collection_name,
'numShards': node_number
}
- result =
self._req_session.get('/'.join([self._solr_url.strip("/"), 'admin',
'collections']),
- params=payload)
+ result = self._req_session.get(collections_endpoint,
params=payload)
response = result.json()
logger.info(f"solr collection created {response}")
+
# Update schema
- schema_url = '/'.join([self._solr_url.strip('/'),
self._granule_collection_name, 'schema'])
- # granule_s # dataset_s so that all the granule of a dataset
are less likely to be on the same shard
- # self.add_unique_key_field(schema_url, "uniqueKey_s",
"StrField")
- self._add_field(schema_url, "dataset_s", "StrField")
- self._add_field(schema_url, "granule_s", "StrField")
- self._add_field(schema_url, "granule_signature_s", "StrField")
+ schema_endpoint =
f"{self._url_prefix/{self._granule_collection_name}/schema"
+ self._add_field(schema_endpoint, "dataset_s", "string")
+ self._add_field(schema_endpoint, "granule_s", "string")
+ self._add_field(schema_endpoint, "granule_signature_s",
"string")
else:
logger.info(f"collection {self._granule_collection_name}
already exists")
@@ -121,16 +119,14 @@ class SolrIngestionHistory(IngestionHistory):
'name': self._dataset_collection_name,
'numShards': node_number
}
- result =
self._req_session.get('/'.join([self._solr_url.strip('/'), 'admin',
'collections']),
- params=payload)
+ result = self._req_session.get(collections_endpoint,
params=payload)
response = result.json()
logger.info(f"solr collection created {response}")
+
# Update schema
- #
http://localhost:8983/solr/nexusdatasets/schema?_=1588555874864&wt=json
- schema_url = '/'.join([self._solr_url.strip('/'),
self._dataset_collection_name, 'schema'])
- # self.add_unique_key_field(schema_url, "uniqueKey_s",
"StrField")
- self._add_field(schema_url, "dataset_s", "StrField")
- self._add_field(schema_url, "latest_update_l", "TrieLongField")
+ schema_endpoint =
f"{self._url_prefix}/{self._dataset_collection_name}/schema"
+ self._add_field(schema_endpoint, "dataset_s", "string")
+ self._add_field(schema_endpoint, "latest_update_l",
"TrieLongField")
else:
logger.info(f"collection {self._dataset_collection_name}
already exists")
@@ -154,7 +150,7 @@ class SolrIngestionHistory(IngestionHistory):
"stored": False
}
}
- result = self._req_session.post(schema_url,
data=add_field_payload.__str__())
+ return self._req_session.post(schema_url,
data=str(add_field_payload).encode('utf-8'))
class DatasetIngestionHistorySolrException(Exception):