This is an automated email from the ASF dual-hosted git repository. skperez pushed a commit to branch SDAP-467 in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit d7fc917ffa01fe14e314142cd45635f6b1fa4bca Author: skorper <[email protected]> AuthorDate: Wed Jun 28 14:38:16 2023 -0700 pagination --- CHANGELOG.md | 1 + analysis/webservice/algorithms/doms/BaseDomsHandler.py | 11 ++++++++++- .../webservice/algorithms/doms/ResultsRetrieval.py | 6 ++++-- analysis/webservice/algorithms/doms/ResultsStorage.py | 18 +++++++++--------- data-access/requirements.txt | 1 + 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fad2a2..0496e78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added +- SDAP-467: Added pagination to cdmsresults endpoint - SDAP-461: Added 4 remaining Saildrone insitu datasets. ### Changed - SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`. diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py index a31666d..0e6b308 100644 --- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py +++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py @@ -85,7 +85,7 @@ class DomsEncoder(json.JSONEncoder): class DomsQueryResults(NexusResults): def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None, - executionId=None, status_code=200): + executionId=None, status_code=200, page_num=None, page_size=None): NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions, status_code=status_code) self.__args = args @@ -94,6 +94,10 @@ class DomsQueryResults(NexusResults): self.__details = details self.__executionId = str(executionId) + # Add page num and size to details block + self.__details['pageNum'] = page_num + self.__details['pageSize'] = page_size + def toJson(self): bounds = self.__bounds.toMap() if self.__bounds is not None else {} return json.dumps( @@ -276,6 +280,9 @@ class DomsCSVFormatter: {"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)}, {"Global Attribute": "URI_Matchup", "Value": "https://doms.jpl.nasa.gov/domsresults?id=" + executionId + "&output=CSV"}, # TODO how to replace with actual req URL + + {"Global Attribute": "CDMS_page_num", "Value": details["pageNum"]}, + {"Global Attribute": "CDMS_page_size", "Value": details["pageSize"]}, ] writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys())) @@ -326,6 +333,8 @@ class DomsNetCDFFormatter: dataset.CDMS_primary = params["primary"] dataset.CDMS_time_to_complete = details["timeToComplete"] dataset.CDMS_time_to_complete_units = "seconds" + dataset.CDMS_page_num = details["pageNum"] + dataset.CDMS_page_size = details["pageSize"] insituDatasets = params["matchup"] insituLinks = set() diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py index c3b95b0..f03c1ca 100644 --- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py +++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py @@ -35,6 +35,8 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): def calc(self, computeOptions, **args): execution_id = computeOptions.get_argument("id", None) + page_num = computeOptions.get_int_arg('pageNum', default=1) + page_size = computeOptions.get_int_arg('pageSize', default=1000) try: execution_id = uuid.UUID(execution_id) @@ -44,7 +46,7 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): simple_results = computeOptions.get_boolean_arg("simpleResults", default=False) with ResultsStorage.ResultsRetrieval(self.config) as storage: - params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results) + params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size) return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=len(data), - computeOptions=None, executionId=execution_id) + computeOptions=None, executionId=execution_id, page_num=page_num, page_size=page_size) diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py index 7a9a48d..98409d1 100644 --- a/analysis/webservice/algorithms/doms/ResultsStorage.py +++ b/analysis/webservice/algorithms/doms/ResultsStorage.py @@ -26,7 +26,7 @@ import pkg_resources from cassandra.auth import PlainTextAuthProvider from cassandra.cluster import Cluster from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy -from cassandra.query import BatchStatement +from cassandra.query import BatchStatement, SimpleStatement from pytz import UTC from webservice.algorithms.doms.BaseDomsHandler import DomsEncoder from webservice.webmodel import NexusProcessingException @@ -274,17 +274,17 @@ class ResultsRetrieval(AbstractResultsContainer): def __init__(self, config=None): AbstractResultsContainer.__init__(self, config) - def retrieveResults(self, execution_id, trim_data=False): + def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1000): if isinstance(execution_id, str): execution_id = uuid.UUID(execution_id) params = self.retrieveParams(execution_id) stats = self.__retrieveStats(execution_id) - data = self.__retrieveData(execution_id, trim_data=trim_data) + data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size) return params, stats, data - def __retrieveData(self, id, trim_data=False): - dataMap = self.__retrievePrimaryData(id, trim_data=trim_data) + def __retrieveData(self, id, trim_data=False, page_num=1, page_size=1000): + dataMap = self.__retrievePrimaryData(id, trim_data=trim_data, page_num=page_num, page_size=page_size) self.__enrichPrimaryDataWithMatches(id, dataMap, trim_data=trim_data) data = [dataMap[name] for name in dataMap] return data @@ -302,12 +302,12 @@ class ResultsRetrieval(AbstractResultsContainer): else: print(row) - def __retrievePrimaryData(self, id, trim_data=False): - cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true" - rows = self._session.execute(cql, (id,)) + def __retrievePrimaryData(self, id, trim_data=False, page_num=2, page_size=10): + cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true limit %s" + rows = self._session.execute(cql, [id, page_num * page_size]) dataMap = {} - for row in rows: + for row in rows[(page_num-1)*page_size:page_num*page_size]: entry = self.__rowToDataEntry(row, trim_data=trim_data) dataMap[row.value_id] = entry return dataMap diff --git a/data-access/requirements.txt b/data-access/requirements.txt index 5127018..d2ffd3f 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -20,3 +20,4 @@ urllib3==1.26.2 requests nexusproto Shapely +numpy==1.24.3
