This is an automated email from the ASF dual-hosted git repository. tloubrieu pushed a commit to branch ascending_latitudes in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git
commit bf8c911f809428542296def9bf74c27c913d0faa Author: Eamon Ford <[email protected]> AuthorDate: Mon Aug 3 19:06:18 2020 -0700 SDAP-270: Fix bad solr endpoints in solr history manager (#8) Co-authored-by: Eamon Ford <[email protected]> --- .../history_manager/SolrIngestionHistory.py | 40 ++++++++++------------ 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py b/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py index 1ae7156..319fa3e 100644 --- a/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py +++ b/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py @@ -33,10 +33,10 @@ class SolrIngestionHistory(IngestionHistory): def __init__(self, solr_url: str, dataset_id: str, signature_fun=None): try: - self._solr_url = solr_url + self._url_prefix = f"{solr_url.strip('/')}/solr" self._create_collection_if_needed() - self._solr_granules = pysolr.Solr('/'.join([solr_url.strip('/'), self._granule_collection_name])) - self._solr_datasets = pysolr.Solr('/'.join([solr_url.strip('/'), self._dataset_collection_name])) + self._solr_granules = pysolr.Solr(f"{self._url_prefix}/{self._granule_collection_name}") + self._solr_datasets = pysolr.Solr(f"{self._url_prefix}/{self._dataset_collection_name}") self._dataset_id = dataset_id self._signature_fun = md5sum_from_filepath if signature_fun is None else signature_fun self._latest_ingested_file_update = self._get_latest_file_update() @@ -63,7 +63,7 @@ class SolrIngestionHistory(IngestionHistory): self._solr_datasets.add([{ 'id': self._dataset_id, 'dataset_s': self._dataset_id, - 'latest_update_l': self._latest_ingested_file_update}]) + 'latest_update_l': int(self._latest_ingested_file_update)}]) self._solr_datasets.commit() def _get_latest_file_update(self): @@ -87,8 +87,8 @@ class SolrIngestionHistory(IngestionHistory): self._req_session = requests.session() payload = {'action': 'CLUSTERSTATUS'} - result = self._req_session.get('/'.join([self._solr_url.strip('/'), 'admin', 'collections']), - params=payload) + collections_endpoint = f"{self._url_prefix}/admin/collections" + result = self._req_session.get(collections_endpoint, params=payload) response = result.json() node_number = len(response['cluster']['live_nodes']) @@ -100,17 +100,15 @@ class SolrIngestionHistory(IngestionHistory): 'name': self._granule_collection_name, 'numShards': node_number } - result = self._req_session.get('/'.join([self._solr_url.strip("/"), 'admin', 'collections']), - params=payload) + result = self._req_session.get(collections_endpoint, params=payload) response = result.json() logger.info(f"solr collection created {response}") + # Update schema - schema_url = '/'.join([self._solr_url.strip('/'), self._granule_collection_name, 'schema']) - # granule_s # dataset_s so that all the granule of a dataset are less likely to be on the same shard - # self.add_unique_key_field(schema_url, "uniqueKey_s", "StrField") - self._add_field(schema_url, "dataset_s", "StrField") - self._add_field(schema_url, "granule_s", "StrField") - self._add_field(schema_url, "granule_signature_s", "StrField") + schema_endpoint = f"{self._url_prefix/{self._granule_collection_name}/schema" + self._add_field(schema_endpoint, "dataset_s", "string") + self._add_field(schema_endpoint, "granule_s", "string") + self._add_field(schema_endpoint, "granule_signature_s", "string") else: logger.info(f"collection {self._granule_collection_name} already exists") @@ -121,16 +119,14 @@ class SolrIngestionHistory(IngestionHistory): 'name': self._dataset_collection_name, 'numShards': node_number } - result = self._req_session.get('/'.join([self._solr_url.strip('/'), 'admin', 'collections']), - params=payload) + result = self._req_session.get(collections_endpoint, params=payload) response = result.json() logger.info(f"solr collection created {response}") + # Update schema - # http://localhost:8983/solr/nexusdatasets/schema?_=1588555874864&wt=json - schema_url = '/'.join([self._solr_url.strip('/'), self._dataset_collection_name, 'schema']) - # self.add_unique_key_field(schema_url, "uniqueKey_s", "StrField") - self._add_field(schema_url, "dataset_s", "StrField") - self._add_field(schema_url, "latest_update_l", "TrieLongField") + schema_endpoint = f"{self._url_prefix}/{self._dataset_collection_name}/schema" + self._add_field(schema_endpoint, "dataset_s", "string") + self._add_field(schema_endpoint, "latest_update_l", "TrieLongField") else: logger.info(f"collection {self._dataset_collection_name} already exists") @@ -154,7 +150,7 @@ class SolrIngestionHistory(IngestionHistory): "stored": False } } - result = self._req_session.post(schema_url, data=add_field_payload.__str__()) + return self._req_session.post(schema_url, data=str(add_field_payload).encode('utf-8')) class DatasetIngestionHistorySolrException(Exception):
