http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetatomresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/datasetatomresponse.py b/src/main/python/libraries/edge/opensearch/datasetatomresponse.py new file mode 100644 index 0000000..dc11a93 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/datasetatomresponse.py @@ -0,0 +1,85 @@ +import datetime +import urllib + +from edge.opensearch.atomresponsebysolr import AtomResponseBySolr +from edge.dateutility import DateUtility + +class DatasetAtomResponse(AtomResponseBySolr): + def __init__(self, portalUrl, host, url, datasets): + super(DatasetAtomResponse, self).__init__() + self.portalUrl = portalUrl + self.host = host + self.url = url + self.datasets = datasets + + def _populateChannel(self, solrResponse): + self.variables.append({'name': 'link', 'attribute': {'href': self.url+self.searchBasePath+'podaac-granule-osd.xml', 'rel': 'search', 'type': 'application/opensearchdescription+xml' }}) + + def _populateItem(self, solrResponse, doc, item): + persistentId = doc['Dataset-PersistentId'][0] + idTuple = ('datasetId', persistentId) + if persistentId == '': + idTuple = ('shortName', doc['Dataset-ShortName'][0]) + item.append({'name': 'title', 'value': doc['Dataset-LongName'][0]}) + item.append({'name': 'content', 'value': doc['Dataset-Description'][0]}) + + item.append({'name': 'link', 'attribute': {'href': self.url + self.searchBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('full', 'true')])), 'rel': 'enclosure', 'type': 'application/atom+xml', 'title': 'PO.DAAC Metadata' }}) + item.append({'name': 'link', 'attribute': {'href': self.url + self.metadataBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('format', 'iso')])), 'rel': 'enclosure', 'type': 'text/xml', 'title': 'ISO-19115 Metadata' }}) + item.append({'name': 'link', 'attribute': {'href': self.url + self.metadataBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('format', 'gcmd')])), 'rel': 'enclosure', 'type': 'text/xml', 'title': 'GCMD Metadata' }}) + + #Only generate granule search link if dataset has granules + if (doc['Dataset-ShortName'][0] in self.datasets): + supportedGranuleParams = dict([(key,value) for key,value in self.parameters.iteritems() if key in ['bbox', 'startTime', 'endTime']]) + if persistentId == '': + supportedGranuleParams['shortName'] = doc['Dataset-ShortName'][0] + else: + supportedGranuleParams['datasetId'] = persistentId + item.append({'name': 'link', 'attribute': {'href': self.url + self.searchBasePath + 'granule?' + urllib.urlencode(supportedGranuleParams), 'rel': 'search', 'type': 'application/atom+xml', 'title': 'Granule Search' }}) + + if 'Dataset-ImageUrl' in doc and doc['Dataset-ImageUrl'][0] != '': + item.append({'name': 'link', 'attribute': {'href': doc['Dataset-ImageUrl'][0], 'rel': 'enclosure', 'type': 'image/jpg', 'title': 'Thumbnail' }}) + + if 'DatasetLocationPolicy-Type' in doc and 'DatasetLocationPolicy-BasePath' in doc: + url = dict(zip(doc['DatasetLocationPolicy-Type'], doc['DatasetLocationPolicy-BasePath'])) + if 'LOCAL-OPENDAP' in url: + item.append({'name': 'link', 'attribute': {'href': url['LOCAL-OPENDAP'], 'rel': 'enclosure', 'type': 'text/html', 'title': 'OPeNDAP URL' }}) + elif 'REMOTE-OPENDAP' in url: + item.append({'name': 'link', 'attribute': {'href': url['REMOTE-OPENDAP'], 'rel': 'enclosure', 'type': 'text/html', 'title': 'OPeNDAP URL' }}) + if 'LOCAL-FTP' in url: + item.append({'name': 'link', 'attribute': {'href': url['LOCAL-FTP'], 'rel': 'enclosure', 'type': 'text/plain', 'title': 'FTP URL' }}) + elif 'REMOTE-FTP' in url: + item.append({'name': 'link', 'attribute': {'href': url['REMOTE-FTP'], 'rel': 'enclosure', 'type': 'text/plain', 'title': 'FTP URL' }}) + if doc['DatasetPolicy-ViewOnline'][0] == 'Y' and doc['DatasetPolicy-AccessType-Full'][0] in ['OPEN', 'PREVIEW', 'SIMULATED', 'REMOTE']: + portalUrl = self.portalUrl+'/'+doc['Dataset-ShortName'][0] + item.append({'name': 'link', 'attribute': {'href': portalUrl, 'rel': 'enclosure', 'type': 'text/html', 'title': 'Dataset Information' }}) + updated = None + if 'DatasetMetaHistory-LastRevisionDateLong' in doc and doc['DatasetMetaHistory-LastRevisionDateLong'][0] != '': + updated = DateUtility.convertTimeLongToIso(doc['DatasetMetaHistory-LastRevisionDateLong'][0]) + else: + updated = datetime.datetime.utcnow().isoformat()+'Z' + + item.append({'name': 'updated', 'value': updated}) + item.append({'name': 'id', 'value': persistentId}) + item.append({'namespace': 'podaac', 'name': 'datasetId', 'value': doc['Dataset-PersistentId'][0]}) + item.append({'namespace': 'podaac', 'name': 'shortName', 'value': doc['Dataset-ShortName'][0]}) + + if doc['DatasetCoverage-WestLon'][0] != '' and doc['DatasetCoverage-SouthLat'][0] != '' and doc['DatasetCoverage-EastLon'][0] != '' and doc['DatasetCoverage-NorthLat'][0] != '': + item.append({'namespace': 'georss', 'name': 'where', 'value': {'namespace': 'gml', 'name': 'Envelope', 'value': [{'namespace': 'gml', 'name': 'lowerCorner', 'value': ' '.join([doc['DatasetCoverage-WestLon'][0], doc['DatasetCoverage-SouthLat'][0]]) }, {'namespace': 'gml', 'name': 'upperCorner', 'value': ' '.join([doc['DatasetCoverage-EastLon'][0], doc['DatasetCoverage-NorthLat'][0]])}]}}) + + if 'DatasetCoverage-StartTimeLong' in doc and doc['DatasetCoverage-StartTimeLong'][0] != '': + item.append({'namespace': 'time', 'name': 'start', 'value': DateUtility.convertTimeLongToIso(doc['DatasetCoverage-StartTimeLong'][0])}) + + if 'DatasetCoverage-StopTimeLong' in doc and doc['DatasetCoverage-StopTimeLong'][0] != '': + item.append({'namespace': 'time', 'name': 'end', 'value': DateUtility.convertTimeLongToIso(doc['DatasetCoverage-StopTimeLong'][0])}) + + if 'full' in self.parameters and self.parameters['full']: + if 'DatasetLocationPolicy-Type' in doc and 'DatasetLocationPolicy-BasePath' in doc: + for i, x in enumerate(doc['DatasetLocationPolicy-Type']): + item.append({'namespace': 'podaac', 'name': self._camelCaseStripHyphen(x.title()), 'value': doc['DatasetLocationPolicy-BasePath'][i]}) + del doc['DatasetLocationPolicy-Type'] + del doc['DatasetLocationPolicy-BasePath'] + + multiValuedElementsKeys = ('DatasetRegion-', 'DatasetCharacter-', 'DatasetCitation-', 'DatasetContact-Contact-', 'DatasetDatetime-', + 'DatasetInteger-', 'DatasetParameter-', 'DatasetProject-', 'DatasetReal-', 'DatasetResource-', + 'DatasetSoftware-', 'DatasetSource-', 'DatasetVersion-', 'Collection-') + self._populateItemWithPodaacMetadata(doc, item, multiValuedElementsKeys)
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetgcmdresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/datasetgcmdresponse.py b/src/main/python/libraries/edge/opensearch/datasetgcmdresponse.py new file mode 100644 index 0000000..002bdc9 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/datasetgcmdresponse.py @@ -0,0 +1,11 @@ +from edge.opensearch.gcmdresponsebysolr import GcmdResponseBySolr + +class DatasetGcmdResponse(GcmdResponseBySolr): + def __init__(self, configuration): + super(DatasetGcmdResponse, self).__init__(configuration) + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + pass http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetgranulewriter.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/datasetgranulewriter.py b/src/main/python/libraries/edge/opensearch/datasetgranulewriter.py new file mode 100644 index 0000000..f9c62a1 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/datasetgranulewriter.py @@ -0,0 +1,233 @@ +from types import * +import logging +import urllib +import urlparse +import httplib +from xml.dom.minidom import Document +import json +import xml.sax.saxutils +import datetime +import codecs + +from edge.opensearch.responsewriter import ResponseWriter +from edge.dateutility import DateUtility +from edge.httputility import HttpUtility +from edge.spatialsearch import SpatialSearch +import re + +class DatasetGranuleWriter(ResponseWriter): + def __init__(self, configFilePath, requiredParams = None): + super(DatasetGranuleWriter, self).__init__(configFilePath, requiredParams) + self.solrGranuleResponse = None + + def get(self, requestHandler): + super(DatasetGranuleWriter, self).get(requestHandler) + #logging.debug('uri: '+str(requestHandler.request.headers)) + + startIndex = 0 + try: + startIndex = requestHandler.get_argument('startIndex') + except: + pass + + entriesPerPage = self._configuration.getint('solr', 'entriesPerPage') + try: + entriesPerPage = requestHandler.get_argument('itemsPerPage') + #cap entries per age at 400 + if (int(entriesPerPage) > 400): + entriesPerPage = 400 + except: + pass + + #pretty = True + try: + if requestHandler.get_argument('pretty').lower() == 'false': + self.pretty = False + except: + pass + + parameters = ['startTime', 'endTime', 'keyword', 'granuleName', 'datasetId', 'shortName', 'bbox', 'sortBy'] + #variables = {} + for parameter in parameters: + try: + value = requestHandler.get_argument(parameter) + self.variables[parameter] = value + except: + pass + + if 'keyword' in self.variables: + self.variables['keyword'] = self.variables['keyword'].replace('*', '') + self.variables['keyword'] = self.variables['keyword'].lower() + """ + if 'bbox' in variables: + points = variables['bbox'].split(',') + if len(points) == 4: + spatialSearch = SpatialSearch( + self._configuration.get('service', 'database') + ) + spatialResult = spatialSearch.searchGranules( + int(startIndex), + int(entriesPerPage), + float(points[0]), + float(points[1]), + float(points[2]), + float(points[3]) + ) + if len(spatialResult[0]) > 0: + variables['granuleIds'] = spatialResult[0] + variables['granuleIdsFound'] = spatialResult[1] + + del variables['bbox'] + """ + try: + self._getSolrResponse(startIndex, entriesPerPage, self.variables) + """ + solrJson = json.loads(solrResponse) + if len(solrJson['response']['docs']) >= 1: + dataset = solrJson['response']['docs'][0]['Dataset-ShortName'][0]; + logging.debug('Getting solr response for dataset ' + dataset) + solrDatasetResponse = self._getSingleSolrDatasetResponse({'shortName' : dataset}) + """ + except: + logging.exception('Failed to get solr response.') + """ + if 'granuleIdsFound' in variables: + #solrJson = json.loads(solrResponse) + numFound = solrJson['response']['numFound'] + solrJson['response']['numFound'] = int(variables['granuleIdsFound']) + solrJson['response']['start'] = int(startIndex) + solrJson['responseHeader']['params']['rows'] = numFound + solrResponse = json.dumps(solrJson) + + searchText = '' + if 'keyword' in variables: + searchText = variables['keyword'] + try: + openSearchResponse = self._generateOpenSearchResponse( + solrResponse, + solrDatasetResponse, + searchText, + self._configuration.get('service', 'url')+requestHandler.request.uri, + pretty + ) + requestHandler.set_header("Content-Type", "application/xml") + requestHandler.write(openSearchResponse) + except Exception as exception: + logging.exception(exception) + requestHandler.set_status(404) + requestHandler.write('ERROR - ' + str(exception)) + """ + + def _getSolrResponse(self, startIndex, entriesPerPage, variables): + query = self._constructSolrQuery(startIndex, entriesPerPage, variables) + url = self._configuration.get('solr', 'granuleUrl') + + httpUtility = HttpUtility() + httpUtility.getResponse(url+'/select/?'+query, self._onSolrGranuleResponse) + + def _constructSolrQuery(self, startIndex, entriesPerPage, variables): + #set default sort order + sort='Granule-StartTimeLong+desc' + queries = [] + for key, value in variables.iteritems(): + #query = '' + if key == 'startTime': + startTime = DateUtility.convertISOToUTCTimestamp(value) + if startTime is not None: + query = 'Granule-StartTimeLong:' + query += '['+str(startTime)+'%20TO%20*]' + queries.append(query) + elif key == 'endTime': + stopTime = DateUtility.convertISOToUTCTimestamp(value) + if stopTime is not None: + query = 'Granule-StartTimeLong:' + query += '[*%20TO%20'+str(stopTime)+']' + queries.append(query) + elif key == 'keyword': + newValue = urllib.quote(value) + + query = 'SearchableText-LowerCased:('+newValue+')' + queries.append(query) + elif key == 'datasetId': + query = 'Dataset-PersistentId:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'shortName': + query = 'Dataset-ShortName-Full:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'granuleName': + query = 'Granule-Name-Full:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'granuleIds': + granuleIds = [] + for granuleId in value: + granuleIds.append(str(granuleId)) + query = 'Granule-Id:('+'+OR+'.join(granuleIds)+')' + queries.append(query) + + startIndex = 0 + elif key == 'sortBy': + sortByMapping = {'timeAsc': 'Granule-StartTimeLong+asc', 'archiveTimeDesc': 'Granule-ArchiveTimeLong+desc'} + if value in sortByMapping.keys(): + sort = sortByMapping[value] + elif key == 'archiveTime': + query = 'Granule-ArchiveTimeLong:['+str(value)+'%20TO%20*]' + queries.append(query) + #if query != '': + # queries.append('%2B'+query) + + if len(queries) == 0: + queries.append('*') + + query = 'q='+'+AND+'.join(queries)+'&fq=Granule-AccessType:(OPEN+OR+PREVIEW+OR+SIMULATED+OR+REMOTE)+AND+Granule-Status:ONLINE&version=2.2&start='+str(startIndex)+'&rows='+str(entriesPerPage)+'&indent=on&wt=json&sort='+sort + logging.debug('solr query: '+query) + + return query + + def _readTemplate(self, path): + file = codecs.open(path, encoding='utf-8') + data = file.read() + file.close() + + return data + + def _generateOpenSearchResponse(self, solrGranuleResponse, solrDatasetResponse, pretty): + pass + + def _onSolrGranuleResponse(self, response): + if response.error: + self._handleException(str(response.error)) + else: + self.solrGranuleResponse = response.body + solrJson = json.loads(response.body) + if len(solrJson['response']['docs']) >= 1: + dataset = solrJson['response']['docs'][0]['Dataset-ShortName'][0]; + logging.debug('Getting solr response for dataset ' + dataset) + self._getSingleSolrDatasetResponse({'shortName' : dataset}, self._onSolrDatasetResponse) + else: + try: + openSearchResponse = self._generateOpenSearchResponse( + None, + None, + self.pretty + ) + self.requestHandler.set_header("Content-Type", "application/xml") + self.requestHandler.write(openSearchResponse) + self.requestHandler.finish() + except BaseException as exception: + self._handleException(str(exception)) + + def _onSolrDatasetResponse(self, response): + if response.error: + self._handleException(str(response.error)) + else: + try: + openSearchResponse = self._generateOpenSearchResponse( + self.solrGranuleResponse, + response.body, + self.pretty + ) + self.requestHandler.set_header("Content-Type", "application/xml") + self.requestHandler.write(openSearchResponse) + self.requestHandler.finish() + except BaseException as exception: + self._handleException(str(exception)) http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetisoresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/datasetisoresponse.py b/src/main/python/libraries/edge/opensearch/datasetisoresponse.py new file mode 100644 index 0000000..823d24a --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/datasetisoresponse.py @@ -0,0 +1,11 @@ +from edge.opensearch.isoresponsebysolr import IsoResponseBySolr + +class DatasetIsoResponse(IsoResponseBySolr): + def __init__(self): + super(DatasetIsoResponse, self).__init__() + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + pass http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetresponsebysolr.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/datasetresponsebysolr.py b/src/main/python/libraries/edge/opensearch/datasetresponsebysolr.py new file mode 100644 index 0000000..53e89ae --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/datasetresponsebysolr.py @@ -0,0 +1,14 @@ +from edge.opensearch.responsebysolr import ResponseBySolr + +class DatasetResponseBySolr(ResponseBySolr): + def __init__(self, portalUrl): + super(DatasetResponseBySolr, self).__init__() + self.portalUrl = portalUrl + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + item.append({'name': 'title', 'value': doc['Dataset-LongName'][0]}) + item.append({'name': 'description', 'value': doc['Dataset-Description'][0]}) + item.append({'name': 'link', 'value': self.portalUrl+'/'+doc['Dataset-ShortName'][0]}) http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetrssresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/datasetrssresponse.py b/src/main/python/libraries/edge/opensearch/datasetrssresponse.py new file mode 100644 index 0000000..e9194bc --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/datasetrssresponse.py @@ -0,0 +1,85 @@ +import urllib +from edge.opensearch.rssresponsebysolr import RssResponseBySolr +from edge.dateutility import DateUtility + +class DatasetRssResponse(RssResponseBySolr): + def __init__(self, portalUrl, url, datasets): + super(DatasetRssResponse, self).__init__() + self.portalUrl = portalUrl + self.url = url + self.datasets = datasets + + def _populateChannel(self, solrResponse): + self.variables.append({'namespace': 'atom', 'name': 'link', 'attribute': {'href': self.url+self.searchBasePath+'podaac-granule-osd.xml', 'rel': 'search', 'type': 'application/opensearchdescription+xml' }}) + + def _populateItem(self, solrResponse, doc, item): + persistentId = doc['Dataset-PersistentId'][0] + idTuple = ('datasetId', persistentId) + if persistentId == '': + idTuple = ('shortName', doc['Dataset-ShortName'][0]) + portalUrl = "" + if doc['DatasetPolicy-ViewOnline'][0] == 'Y' and doc['DatasetPolicy-AccessType-Full'][0] in ['OPEN', 'PREVIEW', 'SIMULATED', 'REMOTE']: + portalUrl = self.portalUrl+'/'+doc['Dataset-ShortName'][0] + item.append({'name': 'enclosure', 'attribute': {'url': portalUrl, 'type': 'text/html', 'length': '0'}}) + item.append({'name': 'title', 'value': doc['Dataset-LongName'][0]}) + item.append({'name': 'description', 'value': doc['Dataset-Description'][0]}) + item.append({'name': 'link', 'value': portalUrl}) + + item.append({'name': 'enclosure', 'attribute': {'url': self.url + self.searchBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('full', 'true'), ('format', 'rss')])), 'type': 'application/rss+xml', 'length': '0'}}) + item.append({'name': 'enclosure', 'attribute': {'url': self.url + self.metadataBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('format', 'iso')])), 'type': 'text/xml', 'length': '0'}}) + item.append({'name': 'enclosure', 'attribute': {'url': self.url + self.metadataBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('format', 'gcmd')])), 'type': 'text/xml', 'length': '0'}}) + + #Only generate granule search link if dataset has granules + if (doc['Dataset-ShortName'][0] in self.datasets): + supportedGranuleParams = dict([(key,value) for key,value in self.parameters.iteritems() if key in ['bbox', 'startTime', 'endTime', 'format']]) + if persistentId == '': + supportedGranuleParams['shortName'] = doc['Dataset-ShortName'][0] + else: + supportedGranuleParams['datasetId'] = persistentId + item.append({'name': 'enclosure', 'attribute': {'url': self.url + self.searchBasePath + 'granule?' + urllib.urlencode(supportedGranuleParams), 'type': 'application/rss+xml', 'length': '0'}}) + + if 'Dataset-ImageUrl' in doc and doc['Dataset-ImageUrl'][0] != '': + item.append({'name': 'enclosure', 'attribute': {'url': doc['Dataset-ImageUrl'][0], 'type': 'image/jpg', 'length': '0'}}) + + if 'DatasetLocationPolicy-Type' in doc and 'DatasetLocationPolicy-BasePath' in doc: + url = dict(zip(doc['DatasetLocationPolicy-Type'], doc['DatasetLocationPolicy-BasePath'])) + if 'LOCAL-OPENDAP' in url: + item.append({'name': 'enclosure', 'attribute': {'url': url['LOCAL-OPENDAP'], 'type': 'text/html', 'length': '0'}}) + elif 'REMOTE-OPENDAP' in url: + item.append({'name': 'enclosure', 'attribute': {'url': url['REMOTE-OPENDAP'], 'type': 'text/html', 'length': '0'}}) + if 'LOCAL-FTP' in url: + item.append({'name': 'enclosure', 'attribute': {'url': url['LOCAL-FTP'], 'type': 'text/plain', 'length': '0'}}) + elif 'REMOTE-FTP' in url: + item.append({'name': 'enclosure', 'attribute': {'url': url['REMOTE-FTP'], 'type': 'text/plain', 'length': '0'}}) + + updated = None + if 'DatasetMetaHistory-LastRevisionDateLong' in doc and doc['DatasetMetaHistory-LastRevisionDateLong'][0] != '': + updated = DateUtility.convertTimeLongToIso(doc['DatasetMetaHistory-LastRevisionDateLong'][0]) + else: + updated = datetime.datetime.utcnow().isoformat()+'Z' + + item.append({'name': 'pubDate', 'value': updated}) + item.append({'name': 'guid', 'value': persistentId}) + item.append({'namespace': 'podaac', 'name': 'datasetId', 'value': doc['Dataset-PersistentId'][0]}) + item.append({'namespace': 'podaac', 'name': 'shortName', 'value': doc['Dataset-ShortName'][0]}) + + if doc['DatasetCoverage-WestLon'][0] != '' and doc['DatasetCoverage-SouthLat'][0] != '' and doc['DatasetCoverage-EastLon'][0] != '' and doc['DatasetCoverage-NorthLat'][0] != '': + item.append({'namespace': 'georss', 'name': 'where', 'value': {'namespace': 'gml', 'name': 'Envelope', 'value': [{'namespace': 'gml', 'name': 'lowerCorner', 'value': ' '.join([doc['DatasetCoverage-WestLon'][0], doc['DatasetCoverage-SouthLat'][0]]) }, {'namespace': 'gml', 'name': 'upperCorner', 'value': ' '.join([doc['DatasetCoverage-EastLon'][0], doc['DatasetCoverage-NorthLat'][0]])}]}}) + + if 'DatasetCoverage-StartTimeLong' in doc and doc['DatasetCoverage-StartTimeLong'][0] != '': + item.append({'namespace': 'time', 'name': 'start', 'value': DateUtility.convertTimeLongToIso(doc['DatasetCoverage-StartTimeLong'][0])}) + + if 'DatasetCoverage-StopTimeLong' in doc and doc['DatasetCoverage-StopTimeLong'][0] != '': + item.append({'namespace': 'time', 'name': 'end', 'value': DateUtility.convertTimeLongToIso(doc['DatasetCoverage-StopTimeLong'][0])}) + + if 'full' in self.parameters and self.parameters['full']: + if 'DatasetLocationPolicy-Type' in doc and 'DatasetLocationPolicy-BasePath' in doc: + for i, x in enumerate(doc['DatasetLocationPolicy-Type']): + item.append({'namespace': 'podaac', 'name': self._camelCaseStripHyphen(x.title()), 'value': doc['DatasetLocationPolicy-BasePath'][i]}) + del doc['DatasetLocationPolicy-Type'] + del doc['DatasetLocationPolicy-BasePath'] + + multiValuedElementsKeys = ('DatasetRegion-', 'DatasetCharacter-', 'DatasetCitation-', 'DatasetContact-Contact-', 'DatasetDatetime-', + 'DatasetInteger-', 'DatasetParameter-', 'DatasetProject-', 'DatasetReal-', 'DatasetResource-', + 'DatasetSoftware-', 'DatasetSource-', 'DatasetVersion-', 'Collection-') + self._populateItemWithPodaacMetadata(doc, item, multiValuedElementsKeys) http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetwriter.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/datasetwriter.py b/src/main/python/libraries/edge/opensearch/datasetwriter.py new file mode 100644 index 0000000..3ec56cb --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/datasetwriter.py @@ -0,0 +1,192 @@ +from types import * +import json +import logging +import urllib + +import requestresponder +from edge.dateutility import DateUtility +from edge.httputility import HttpUtility +from edge.opensearch.responsewriter import ResponseWriter +import re + +class DatasetWriter(ResponseWriter): + def __init__(self, configFilePath): + super(DatasetWriter, self).__init__(configFilePath) + self.datasets = [] + + def get(self, requestHandler): + super(DatasetWriter, self).get(requestHandler) + #searchParameters = {} + #logging.debug('uri: '+str(requestHandler.request.headers)) + + startIndex = 0 + try: + startIndex = requestHandler.get_argument('startIndex') + except: + pass + + entriesPerPage = self._configuration.getint('solr', 'entriesPerPage') + try: + entriesPerPage = requestHandler.get_argument('itemsPerPage') + #cap entries per age at 400 + if (int(entriesPerPage) > 400): + entriesPerPage = 400 + self.searchParameters['itemsPerPage'] = entriesPerPage + except: + pass + + #pretty = True + try: + if requestHandler.get_argument('pretty').lower() == 'false': + self.pretty = False + self.searchParameters['pretty'] = 'false' + except: + pass + + try: + if requestHandler.get_argument('full').lower() == 'true': + self.searchParameters['full'] = 'true' + except: + pass + + try: + self.searchParameters['format'] = requestHandler.get_argument('format') + except: + pass + + parameters = ['startTime', 'endTime', 'keyword', 'datasetId', 'shortName', 'instrument', 'satellite', 'fileFormat', 'status', 'processLevel', 'sortBy', 'bbox', 'allowNone'] + #variables = {} + for parameter in parameters: + try: + value = requestHandler.get_argument(parameter) + self.variables[parameter] = value + self.searchParameters[parameter] = value + except: + pass + + if 'keyword' in self.variables: + self.variables['keyword'] = self.variables['keyword'].replace('*', '') + self.variables['keyword'] = self.variables['keyword'].lower() + """ + else: + variables['keyword'] = '""' + """ + #If generating OpenSearch response, need to make additional call to solr + #to determine which datasets have granules + try: + if 'search' in requestHandler.request.path: + callback = self._getSolrHasGranuleResponseCallback(startIndex, entriesPerPage) + self._getSolrHasGranuleResponse(callback) + else: + self._getSolrResponse(startIndex, entriesPerPage, self.variables) + except: + logging.exception('Failed to get solr response.') + """ + searchText = '' + if 'keyword' in variables: + searchText = variables['keyword'] + openSearchResponse = self._generateOpenSearchResponse( + solrResponse, + searchText, + self._configuration.get('service', 'url') + requestHandler.request.path, + searchParameters, + pretty + ) + + requestHandler.set_header("Content-Type", "application/xml") + #requestHandler.set_header("Content-Type", "application/rss+xml") + #requestHandler.write(solrResponse) + requestHandler.write(openSearchResponse) + """ + + def _getSolrResponse(self, startIndex, entriesPerPage, variables): + query = self._constructSolrQuery(startIndex, entriesPerPage, variables) + url = self._configuration.get('solr', 'datasetUrl') + + httpUtility = HttpUtility() + httpUtility.getResponse(url+'/select/?'+query, self._onSolrResponse) + + def _constructSolrQuery(self, startIndex, entriesPerPage, variables): + queries = [] + sort = None + filterQuery = None + for key, value in variables.iteritems(): + #query = '' + if key == 'startTime': + startTime = DateUtility.convertISOToUTCTimestamp(value) + if startTime is not None: + query = 'DatasetCoverage-StopTimeLong-Long:' + query += '['+str(startTime)+'%20TO%20*]' + queries.append(query) + elif key == 'endTime': + stopTime = DateUtility.convertISOToUTCTimestamp(value) + if stopTime is not None: + query = 'DatasetCoverage-StartTimeLong-Long:' + query += '[*%20TO%20'+str(stopTime)+']' + queries.append(query) + elif key == 'keyword': + newValue = urllib.quote(value) + + query = 'SearchableText-LowerCased:('+newValue+')' + queries.append(query) + elif key == 'datasetId': + query = 'Dataset-PersistentId:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'shortName': + query = 'Dataset-ShortName-Full:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'satellite': + query = 'DatasetSource-Source-ShortName-Full:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'instrument': + query = 'DatasetSource-Sensor-ShortName-Full:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'fileFormat': + query = 'DatasetPolicy-DataFormat-LowerCased:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'status': + query = 'DatasetPolicy-AccessType-LowerCased:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'processLevel': + query = 'Dataset-ProcessingLevel-LowerCased:'+value + queries.append(query) + elif key == 'sortBy': + sortByMapping = {'timeDesc': 'DatasetCoverage-StartTimeLong-Long+desc', 'timeAsc': 'DatasetCoverage-StartTimeLong-Long+asc', + 'popularityDesc': 'Dataset-AllTimePopularity+desc', 'popularityAsc': 'Dataset-AllTimePopularity+asc'} + if value in sortByMapping.keys(): + sort = sortByMapping[value] + elif key == 'bbox': + filterQuery = self._constructBoundingBoxQuery(value) + + #if query != '': + # queries.append('%2B'+query) + + if len(queries) == 0: + queries.append('*') + + query = 'q='+'+AND+'.join(queries)+'&fq=DatasetPolicy-AccessType-Full:(OPEN+OR+PREVIEW+OR+SIMULATED+OR+REMOTE)+AND+DatasetPolicy-ViewOnline:Y&version=2.2&start='+str(startIndex)+'&rows='+str(entriesPerPage)+'&indent=on&wt=json' + if sort is not None: + query += '&sort=' + sort + if filterQuery is not None: + query += '&' + filterQuery + logging.debug('solr query: '+query) + + return query + + def _getSolrHasGranuleResponse(self, callback): + url = self._configuration.get('solr', 'granuleUrl') + + httpUtility = HttpUtility() + return httpUtility.getResponse(url+'/select?q=*:*&facet=true&facet.field=Dataset-ShortName-Full&facet.limit=-1&rows=0&indent=on&wt=json&version=2.2', callback) + + def _getSolrHasGranuleResponseCallback(self, startIndex, entriesPerPage): + def onSolrHasGranuleResponse(response): + try: + solrJson = json.loads(response.body) + logging.debug("Got response for dataset facet") + datasetCounter = solrJson['facet_counts']['facet_fields']['Dataset-ShortName-Full'] + self.datasets = [datasetCounter[i] for i in range(len(datasetCounter)) if i % 2 == 0] + self._getSolrResponse(startIndex, entriesPerPage, self.variables) + except: + logging.exception('Failed to get solr response.') + return onSolrHasGranuleResponse http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/fgdcresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/fgdcresponse.py b/src/main/python/libraries/edge/opensearch/fgdcresponse.py new file mode 100644 index 0000000..c8738ce --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/fgdcresponse.py @@ -0,0 +1,56 @@ +import logging + +from jinja2 import Environment, Template +import re +import xml.etree.ElementTree + +from edge.opensearch.response import Response + +class FgdcResponse(Response): + def __init__(self): + self.namespaces = {} + self.env = Environment() + self.env.trim_blocks = True + self.env.autoescape = True + self.variables = {} + + def setTemplate(self, template): + self.template = self.env.from_string(template.replace('>\n<', '><')) + + def addNamespace(self, name, uri): + self.namespaces[name] = uri + + def removeNamespace(self, name): + del self.namespaces[name] + + def generate(self, pretty=False, xmlDeclaration=""): + logging.debug('FgdcResponse.generate is called.') + fgdcStr = self.template.render(self.variables).encode('utf-8') + if fgdcStr != "" and pretty: + #xmlDeclaration ="<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<!DOCTYPE metadata SYSTEM \"http://www.fgdc.gov/metadata/fgdc-std-001-1998.dtd\">\n" + tree = xml.etree.ElementTree.fromstring(fgdcStr) + self._indent(tree) + + for namespace in self.namespaces.keys(): + xml.etree.ElementTree.register_namespace(namespace, self.namespaces[namespace]) + + return xmlDeclaration + xml.etree.ElementTree.tostring(tree, encoding='utf-8') + else: + return fgdcStr + + # Provided by http://effbot.org/zone/element-lib.htm#prettyprint + def _indent(self, elem, level=0): + i = "\n" + level * " " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + self._indent(elem, level + 1) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/fgdcresponsebysolr.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/fgdcresponsebysolr.py b/src/main/python/libraries/edge/opensearch/fgdcresponsebysolr.py new file mode 100644 index 0000000..562dc08 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/fgdcresponsebysolr.py @@ -0,0 +1,141 @@ +import json +import logging + +from edge.opensearch.fgdcresponse import FgdcResponse +from datetime import datetime + +class FgdcResponseBySolr(FgdcResponse): + def __init__(self): + super(FgdcResponseBySolr, self).__init__() + + def generate(self, solrDatasetResponse, solrGranuleResponse = None, pretty=False): + self._populate(solrDatasetResponse, solrGranuleResponse) + return super(FgdcResponseBySolr, self).generate(pretty, "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<!DOCTYPE metadata SYSTEM \"http://www.fgdc.gov/metadata/fgdc-std-001-1998.dtd\">\n") + + def _populate(self, solrDatasetResponse, solrGranuleResponse = None): + if solrDatasetResponse is not None: + solrJson = json.loads(solrDatasetResponse) + + logging.debug('dataset count: '+str(len(solrJson['response']['docs']))) + + if len(solrJson['response']['docs']) == 1: + # ok now populate variables! + doc = solrJson['response']['docs'][0] + + self.variables['doc'] = doc + + # Round spatial to 3 decimal places + doc['DatasetCoverage-WestLon'][0] = '%.3f' % round(float(doc['DatasetCoverage-WestLon'][0]), 3) + doc['DatasetCoverage-EastLon'][0] = '%.3f' % round(float(doc['DatasetCoverage-EastLon'][0]), 3) + doc['DatasetCoverage-NorthLat'][0] = '%.3f' % round(float(doc['DatasetCoverage-NorthLat'][0]), 3) + doc['DatasetCoverage-SouthLat'][0] = '%.3f' % round(float(doc['DatasetCoverage-SouthLat'][0]), 3) + + # Base on the value of Dataset-ProcessingLevel, we query the SOLR differently. + # For 2 or 2P, we look for these 2 attributes: + # + # ACROSS_TRACK_RESOLUTION NUMBER + # ALONG_TRACK_RESOLUTION NUMBER + # + # Because the units of 2 and 2P products are in meters, we have to convert to decimal degrees. + # + # The formula is: + # + # 1 degree = 111.16 km or 111160.0 meters + # + # Calculate latitude and longitude resolution for 2 and 2P products + if (doc['Dataset-ProcessingLevel'][0] == '2' or doc['Dataset-ProcessingLevel'][0] == '2P'): + self.variables['Dataset_LatitudeResolution'] = '%.17f' % round(float(doc['Dataset-AlongTrackResolution'][0]) / 111160.0, 17) + self.variables['Dataset_LongitudeResolution'] = '%.17f' % round(float(doc['Dataset-AcrossTrackResolution'][0]) / 111160.0, 17) + # For value of Dataset-ProcessingLevel of 3 or 4, we look for different attributes: + # + # LATIUDE_RESOLUTION + # LONGITUDE RESOLUTION + elif (doc['Dataset-ProcessingLevel'][0] == '3' or doc['Dataset-ProcessingLevel'][0] == '4'): + self.variables['Dataset_LatitudeResolution'] = doc['Dataset-LatitudeResolution'][0] + self.variables['Dataset_LongitudeResolution'] = doc['Dataset-LongitudeResolution'][0] + + # Format dates + try: + self.variables['DatasetCitation_ReleaseDateTime'] = self._convertTimeLongToString(doc['DatasetCitation-ReleaseDateLong'][0]) + self.variables['DatasetCitation_ReleaseDate'] = datetime.utcfromtimestamp(float(doc['DatasetCitation-ReleaseDateLong'][0]) / 1000).strftime('%Y%m%d') + self.variables['DatasetCitation_ReleaseTime'] = datetime.utcfromtimestamp(float(doc['DatasetCitation-ReleaseDateLong'][0]) / 1000).strftime('%H%M%S')+'Z' + self.variables['DatasetCoverage_StartTime'] = self._convertTimeLongToString(doc['DatasetCoverage-StartTimeLong'][0]) + except: + pass + + # Create list of unique dataset sensor + self.variables['UniqueDatasetSensor'] = {} + for i, x in enumerate(doc['DatasetSource-Sensor-ShortName']): + self.variables['UniqueDatasetSensor'][x] = i + self.variables['UniqueDatasetSensor'] = self.variables['UniqueDatasetSensor'].values() + + # Create list of unique dataset source + self.variables['UniqueDatasetSource'] = {} + for i, x in enumerate(doc['DatasetSource-Source-ShortName']): + self.variables['UniqueDatasetSource'][x] = i + self.variables['UniqueDatasetSource'] = self.variables['UniqueDatasetSource'].values() + + # Create dictionary for dataset_resource + self.variables['DatasetResource'] = dict(zip(doc['DatasetResource-Type'], doc['DatasetResource-Path'])) + + # Get index of dataset Technical Contact + self.variables['TechnicalContactIndex'] = -1 + for i, x in enumerate(doc['DatasetContact-Contact-Role']): + if (x.upper() == 'TECHNICAL CONTACT'): + logging.debug('tech contact is ' + str(i)) + self.variables['TechnicalContactIndex'] = i + break; + + if 'Dataset-Provider-ProviderResource-Path' not in doc: + doc['Dataset-Provider-ProviderResource-Path'] = [''] + else: + raise Exception('No dataset found') + + else: + raise Exception('No dataset found') + + if solrGranuleResponse is not None: + solrGranuleJson = json.loads(solrGranuleResponse) + + logging.debug('granule count: '+str(len(solrGranuleJson['response']['docs']))) + if (len(solrGranuleJson['response']['docs']) == 0): + raise Exception('No granules found') + + for doc in solrGranuleJson['response']['docs']: + self._populateItem(solrGranuleResponse, doc, None) + + doc['Granule-StartTimeLong'][0] = self._convertTimeLongToString(doc['Granule-StartTimeLong'][0]) + doc['Granule-StopTimeLong'][0] = self._convertTimeLongToString(doc['Granule-StopTimeLong'][0]) + + # Create dictionary for bounding box extent + ''' + if ('GranuleReal-Value' in doc and 'GranuleReal-DatasetElement-Element-ShortName' in doc): + # Round real value to 3 decimal places + doc['GranuleReal-Value'] = ['%.3f' % round(float(value), 3) for value in doc['GranuleReal-Value']] + doc['GranuleBoundingBox'] = dict(zip(doc['GranuleReal-DatasetElement-Element-ShortName'], doc['GranuleReal-Value'])) + ''' + if 'GranuleSpatial-NorthLat' in doc and 'GranuleSpatial-EastLon' in doc and 'GranuleSpatial-SouthLat' in doc and 'GranuleSpatial-WestLon' in doc: + doc['GranuleBoundingBox'] = dict([('southernmostLatitude', '%.3f' % round(float(doc['GranuleSpatial-SouthLat'][0]), 3)), + ('northernmostLatitude', '%.3f' % round(float(doc['GranuleSpatial-NorthLat'][0]), 3)), + ('westernmostLongitude', '%.3f' % round(float(doc['GranuleSpatial-WestLon'][0]), 3)), + ('easternmostLongitude', '%.3f' % round(float(doc['GranuleSpatial-EastLon'][0]), 3))]) + else: + # Encounter granule with no bounding box so raise an exception + raise Exception('granule ' + doc['Granule-Name'][0] + ' has no bounding box') + self.variables['granules'] = solrGranuleJson['response']['docs'] + else: + raise Exception('No granules found') + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + pass + + def _convertTimeLongToString(self, time): + isoTime = '' + try: + isoTime = datetime.utcfromtimestamp(float(time) / 1000).strftime('%Y%m%dT%H%M%SZ') + except ValueError: + pass + return isoTime http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/gcmdresponsebysolr.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/gcmdresponsebysolr.py b/src/main/python/libraries/edge/opensearch/gcmdresponsebysolr.py new file mode 100644 index 0000000..588fc4a --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/gcmdresponsebysolr.py @@ -0,0 +1,123 @@ +import json +import logging + +from edge.opensearch.isoresponse import IsoResponse +from datetime import date, datetime + +class GcmdResponseBySolr(IsoResponse): + def __init__(self, configuration): + super(GcmdResponseBySolr, self).__init__() + self._configuration = configuration + + def generate(self, solrResponse, pretty=False, allowNone=False): + self._populate(solrResponse, allowNone) + return super(GcmdResponseBySolr, self).generate(pretty) + + def _populate(self, solrResponse, allowNone): + if solrResponse is not None: + solrJson = json.loads(solrResponse) + + logging.debug('dataset count: '+str(len(solrJson['response']['docs']))) + + if len(solrJson['response']['docs']) == 1: + # ok now populate variables! + doc = solrJson['response']['docs'][0] + + #self.variables['Dataset_ShortName'] = doc['Dataset-ShortName'][0] + #self.variables['Dataset_ShortName'] = u'unko' + + #Filter response from solr, if value contains none, N/A, null set to empty string + if not allowNone: + for key, value in doc.iteritems(): + if key not in ['DatasetPolicy-AccessConstraint', 'DatasetPolicy-UseConstraint'] and isinstance(value[0], basestring) and len(value[0].strip()) <= 4 and value[0].strip().lower() in ['none', 'na', 'n/a', 'null']: + doc[key][0] = "" + + self.variables['doc'] = doc + + # Entry_ID + self.variables['Entry_ID'] = doc['Dataset-PersistentId'][0] if doc['Dataset-PersistentId'][0] != "" else doc['Dataset-ShortName'][0] + + # Entry_Title + self.variables['Entry_Title'] = doc['Dataset-LongName'][0] + + # Dataset_Citation + datasetCitationCol = ['Dataset_Creator', 'Dataset_Title', 'Dataset_Series_Name', 'Dataset_Release_Date', 'Dataset_Release_Place', 'Dataset_Publisher', 'Version', 'Other_Citation_Details', 'Online_Resource'] + if 'DatasetCitation-Creator' in doc: + for i, x in enumerate(doc['DatasetCitation-ReleaseDateLong']): + try: + doc['DatasetCitation-ReleaseDateLong'][i] = datetime.utcfromtimestamp(float(x) / 1000).strftime('%Y-%m-%d') + except: + pass + self.variables['Dataset_Citation'] = [dict(zip(datasetCitationCol,x)) for x in zip(doc['DatasetCitation-Creator'], doc['DatasetCitation-Title'], doc['DatasetCitation-SeriesName'], doc['DatasetCitation-ReleaseDateLong'], doc['DatasetCitation-ReleasePlace'], doc['DatasetCitation-Publisher'], doc['DatasetCitation-Version'], doc['DatasetCitation-CitationDetail'], doc['DatasetCitation-OnlineResource'])] + + # Personnel + datasetPersonnelCol = ['Role', 'First_Name', 'Middle_Name', 'Last_Name', 'Email', 'Phone', 'Fax', 'Provider_Short_Name'] + if 'DatasetContact-Contact-Role' in doc: + self.variables['Personnel'] = [dict(zip(datasetPersonnelCol, x)) for x in zip(doc['DatasetContact-Contact-Role'], doc['DatasetContact-Contact-FirstName'], doc['DatasetContact-Contact-MiddleName'], doc['DatasetContact-Contact-LastName'], doc['DatasetContact-Contact-Email'], doc['DatasetContact-Contact-Phone'], doc['DatasetContact-Contact-Fax'], doc['DatasetContact-Contact-Provider-ShortName'])] + + # Locate dataset provider contact + self.variables['Provider_Personnel'] = next((item for item in self.variables['Personnel'] if item["Provider_Short_Name"] == doc['Dataset-Provider-ShortName'][0]), None) + + # Parameter + datasetParameterCol = ['Category', 'Topic', 'Term', 'Variable_Level_1', 'Detailed_Variable'] + if 'DatasetParameter-Category' in doc: + # Replace all none, None values with empty string + doc['DatasetParameter-VariableDetail'] = [self._filterString(variableDetail) for variableDetail in doc['DatasetParameter-VariableDetail']] + self.variables['Parameters'] = [dict(zip(datasetParameterCol, x)) for x in zip(doc['DatasetParameter-Category'], doc['DatasetParameter-Topic'], doc['DatasetParameter-Term'], doc['DatasetParameter-Variable'], doc['DatasetParameter-VariableDetail'])] + + # Format dates + try: + self.variables['Start_Date'] = datetime.utcfromtimestamp(float(doc['DatasetCoverage-StartTimeLong'][0]) / 1000).strftime('%Y-%m-%d') + self.variables['Stop_Date'] = datetime.utcfromtimestamp(float(doc['DatasetCoverage-StopTimeLong'][0]) / 1000).strftime('%Y-%m-%d') + except: + pass + + + # Project + projectCol = ['Short_Name', 'Long_Name'] + if 'DatasetProject-Project-ShortName' in doc: + self.variables['Project'] = [dict(zip(projectCol, x)) for x in zip(doc['DatasetProject-Project-ShortName'], doc['DatasetProject-Project-LongName'])] + + # Create list of unique dataset sensor + self.variables['UniqueDatasetSensor'] = {} + if 'DatasetSource-Sensor-ShortName' in doc: + for i, x in enumerate(doc['DatasetSource-Sensor-ShortName']): + self.variables['UniqueDatasetSensor'][x] = i + self.variables['UniqueDatasetSensor'] = self.variables['UniqueDatasetSensor'].values() + + # Create list of unique dataset source + self.variables['UniqueDatasetSource'] = {} + if 'DatasetSource-Source-ShortName' in doc: + for i, x in enumerate(doc['DatasetSource-Source-ShortName']): + self.variables['UniqueDatasetSource'][x] = i + self.variables['UniqueDatasetSource'] = self.variables['UniqueDatasetSource'].values() + + # Last_DIF_Revision_Date + self.variables['Last_DIF_Revision_Date'] = datetime.utcfromtimestamp(float(doc['DatasetMetaHistory-LastRevisionDateLong'][0]) / 1000).strftime('%Y-%m-%d') + + # DIF_Revision_History + self.variables['DIF_Revision_History'] = doc['DatasetMetaHistory-RevisionHistory'][0] + + + + # DIF_Creation_Date + self.variables['DIF_Creation_Date'] = datetime.utcnow().strftime('%Y-%m-%d') + + # Set configurable DIF Author contact information + self.variables['author'] = dict(self._configuration.items('author')) + + # Set configurable PO.DAAC and NODC contact information + self.variables['podaac'] = dict(self._configuration.items('podaac')) + self.variables['nodc'] = dict(self._configuration.items('nodc')) + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + pass + + def _filterString(self, str): + if str.lower() == 'none': + return '' + else: + return str http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granuleatomresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/granuleatomresponse.py b/src/main/python/libraries/edge/opensearch/granuleatomresponse.py new file mode 100644 index 0000000..9b38347 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/granuleatomresponse.py @@ -0,0 +1,110 @@ +import datetime +import urllib + +from edge.opensearch.atomresponsebysolr import AtomResponseBySolr +from edge.dateutility import DateUtility + +class GranuleAtomResponse(AtomResponseBySolr): + def __init__(self, linkToGranule, host, url): + super(GranuleAtomResponse, self).__init__() + + self.linkToGranule = linkToGranule.split(',') + self.host = host + self.url = url + + def _populateChannel(self, solrResponse): + self.variables.append({'name': 'link', 'attribute': {'href': self.url+self.searchBasePath+'podaac-dataset-osd.xml', 'rel': 'search', 'type': 'application/opensearchdescription+xml' }}) + + def _populateItem(self, solrResponse, doc, item): + item.append({'name': 'title', 'value': doc['Granule-Name'][0]}) + #item.append({'name': 'content', 'value': doc['Granule-Name'][0]}) + + updated = None + startTime = None + if 'Granule-StartTimeLong' in doc and doc['Granule-StartTimeLong'][0] != '': + updated = DateUtility.convertTimeLongToIso(doc['Granule-StartTimeLong'][0]) + startTime = updated + else: + updated = datetime.datetime.utcnow().isoformat()+'Z' + + item.append({'name': 'updated', 'value': updated}) + item.append({'name': 'id', 'value': doc['Dataset-PersistentId'][0] + ':' + doc['Granule-Name'][0]}) + + parameters = {'datasetId': doc['Dataset-PersistentId'][0], 'granuleName': doc['Granule-Name'][0]} + parameters['full'] = 'true' + item.append({'name': 'link', 'attribute': {'href': self.url+self.searchBasePath + 'granule?' + urllib.urlencode(parameters), 'rel': 'enclosure', 'type': 'application/atom+xml', 'title': 'PO.DAAC Metadata' }}) + del parameters['full'] + parameters['format'] = 'iso' + item.append({'name': 'link', 'attribute': {'href': self.url+self.metadataBasePath + 'granule?' + urllib.urlencode(parameters), 'rel': 'enclosure', 'type': 'text/xml', 'title': 'ISO-19115 Metadata' }}) + parameters['format'] = 'fgdc' + item.append({'name': 'link', 'attribute': {'href': self.url+self.metadataBasePath + 'granule?' + urllib.urlencode(parameters), 'rel': 'enclosure', 'type': 'text/xml', 'title': 'FGDC Metadata' }}) + + #item.append({'name': 'description', 'value': doc['Dataset-Description'][0]}) + #item.append({'name': 'link', 'value': self.portalUrl+'/'+doc['Dataset-ShortName'][0]}) + #link = self._getLinkToGranule(doc) + #if link['href'] is not None: + # item.append({'name': 'link', 'attribute': link}) + if 'GranuleReference-Type' in doc: + if 'Granule-DataFormat' in doc: + type = 'application/x-' + doc['Granule-DataFormat'][0].lower() + else: + type = 'text/plain' + #Look for ONLINE reference only + granuleRefDict = dict([(doc['GranuleReference-Type'][i], doc['GranuleReference-Path'][i]) for i,x in enumerate(doc['GranuleReference-Status']) if x=="ONLINE"]) + if 'LOCAL-OPENDAP' in granuleRefDict: + item.append({'name': 'link', 'attribute': {'href': granuleRefDict['LOCAL-OPENDAP'], 'rel': 'enclosure', 'type': 'text/html', 'title': 'OPeNDAP URL' }}) + elif 'REMOTE-OPENDAP' in granuleRefDict: + item.append({'name': 'link', 'attribute': {'href': granuleRefDict['REMOTE-OPENDAP'], 'rel': 'enclosure', 'type': 'text/html', 'title': 'OPeNDAP URL' }}) + if 'LOCAL-FTP' in granuleRefDict: + item.append({'name': 'link', 'attribute': {'href': granuleRefDict['LOCAL-FTP'], 'rel': 'enclosure', 'type': type, 'title': 'FTP URL' }}) + elif 'REMOTE-FTP' in granuleRefDict: + item.append({'name': 'link', 'attribute': {'href': granuleRefDict['REMOTE-FTP'], 'rel': 'enclosure', 'type': type, 'title': 'FTP URL' }}) + + item.append({'namespace': 'podaac', 'name': 'datasetId', 'value': doc['Dataset-PersistentId'][0]}) + item.append({'namespace': 'podaac', 'name': 'shortName', 'value': doc['Dataset-ShortName'][0]}) + + if 'GranuleSpatial-NorthLat' in doc and 'GranuleSpatial-EastLon' in doc and 'GranuleSpatial-SouthLat' in doc and 'GranuleSpatial-WestLon' in doc: + item.append({'namespace': 'georss', 'name': 'where', 'value': {'namespace': 'gml', 'name': 'Envelope', 'value': [{'namespace': 'gml', 'name': 'lowerCorner', 'value': ' '.join([doc['GranuleSpatial-WestLon'][0], doc['GranuleSpatial-SouthLat'][0]])}, {'namespace': 'gml', 'name': 'upperCorner', 'value': ' '.join([doc['GranuleSpatial-EastLon'][0], doc['GranuleSpatial-NorthLat'][0]])}]}}) + + if startTime is not None: + item.append({'namespace': 'time', 'name': 'start', 'value': startTime}) + + if 'Granule-StopTimeLong' in doc and doc['Granule-StopTimeLong'][0] != '': + item.append({'namespace': 'time', 'name': 'end', 'value': DateUtility.convertTimeLongToIso(doc['Granule-StopTimeLong'][0])}) + + if 'full' in self.parameters and self.parameters['full']: + multiValuedElementsKeys = ('GranuleArchive-', 'GranuleReference-') + self._populateItemWithPodaacMetadata(doc, item, multiValuedElementsKeys) + + ''' + def _getLinkToGranule(self, doc): + attr = {} + link = None + + if 'GranuleReference-Type' in doc and len(self.linkToGranule) > 0: + granuleRefDict = dict(zip(doc['GranuleReference-Type'], zip(doc['GranuleReference-Path'], doc['GranuleReference-Status']))) + + for type in self.linkToGranule: + # check if reference type exists + if type in granuleRefDict: + # check if reference is online + if granuleRefDict[type][1] == 'ONLINE': + link = granuleRefDict[type][0] + break + if link is not None: + attr['rel'] = 'http://esipfed.org/ns/discovery/1.1/data#' + attr['title'] = 'Granule File' + + if 'GranuleArchive-Name' in doc and 'GranuleArchive-Type' in doc and 'GranuleArchive-FileSize': + granuleArchiveDict = dict(zip(doc['GranuleArchive-Type'], zip(doc['GranuleArchive-Name'], doc['GranuleArchive-FileSize']))) + if link.endswith(granuleArchiveDict['DATA'][0]): + attr['size'] = granuleArchiveDict['DATA'][1] + + if 'Granule-DataFormat' in doc: + attr['type'] = 'application/x-' + doc['Granule-DataFormat'][0].lower() + else: + #No link to granule download provided so create link back to opensearch to retrieve granule metadata + link = "http://" + self.host + "/granule/opensearch.atom?granule=" + doc['Granule-Name'][0] + attr['href'] = link + return attr + ''' http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granuledatacastingresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/granuledatacastingresponse.py b/src/main/python/libraries/edge/opensearch/granuledatacastingresponse.py new file mode 100644 index 0000000..24b5dc0 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/granuledatacastingresponse.py @@ -0,0 +1,41 @@ +import logging + +from edge.dateutility import DateUtility +from edge.opensearch.datacastingresponsebysolr import DatacastingResponseBySolr + +class GranuleDatacastingResponse(DatacastingResponseBySolr): + def __init__(self, portalUrl, linkToGranule, archivedWithin): + super(GranuleDatacastingResponse, self).__init__(portalUrl, archivedWithin) + + self.linkToGranule = linkToGranule.split(',') + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + doc['Granule-StartTimeLong'][0] = DateUtility.convertTimeLongToRFC822(doc['Granule-StartTimeLong'][0]) + doc['Granule-StopTimeLong'][0] = DateUtility.convertTimeLongToRFC822(doc['Granule-StopTimeLong'][0]) + doc['Granule-ArchiveTimeLong'][0] = DateUtility.convertTimeLongToRFC822(doc['Granule-ArchiveTimeLong'][0]) + + doc['GranuleLink'] = self._getLinkToGranule(doc) + + doc['GranuleFileSize'] = dict(zip(doc['GranuleArchive-Type'], doc['GranuleArchive-FileSize'])) + + if 'GranuleReference-Type' in doc: + doc['GranuleReference'] = dict([(doc['GranuleReference-Type'][i], doc['GranuleReference-Path'][i]) for i,x in enumerate(doc['GranuleReference-Status']) if x=="ONLINE"]) + + def _getLinkToGranule(self, doc): + link = None + + if 'GranuleReference-Type' in doc and len(self.linkToGranule) > 0: + granuleRefDict = dict(zip(doc['GranuleReference-Type'], zip(doc['GranuleReference-Path'], doc['GranuleReference-Status']))) + + for type in self.linkToGranule: + # check if reference type exists + if type in granuleRefDict: + # check if reference is online + if granuleRefDict[type][1] == 'ONLINE': + link = granuleRefDict[type][0] + break + + return link http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granulefgdcresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/granulefgdcresponse.py b/src/main/python/libraries/edge/opensearch/granulefgdcresponse.py new file mode 100644 index 0000000..0582f60 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/granulefgdcresponse.py @@ -0,0 +1,13 @@ +import datetime + +from edge.opensearch.fgdcresponsebysolr import FgdcResponseBySolr + +class GranuleFgdcResponse(FgdcResponseBySolr): + def __init__(self): + super(GranuleFgdcResponse, self).__init__() + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + pass http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granuleisoresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/granuleisoresponse.py b/src/main/python/libraries/edge/opensearch/granuleisoresponse.py new file mode 100644 index 0000000..7b9b0a7 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/granuleisoresponse.py @@ -0,0 +1,33 @@ +import datetime + +from edge.opensearch.isoresponsebysolr import IsoResponseBySolr + +class GranuleIsoResponse(IsoResponseBySolr): + def __init__(self, linkToGranule): + super(GranuleIsoResponse, self).__init__() + + self.linkToGranule = linkToGranule.split(',') + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + link = self._getLinkToGranule(doc) + if link is not None: + doc['link'] = link + + def _getLinkToGranule(self, doc): + link = None + + if 'GranuleReference-Type' in doc and len(self.linkToGranule) > 0: + granuleRefDict = dict(zip(doc['GranuleReference-Type'], zip(doc['GranuleReference-Path'], doc['GranuleReference-Status']))) + + for type in self.linkToGranule: + # check if reference type exists + if type in granuleRefDict: + # check if reference is online + if granuleRefDict[type][1] == 'ONLINE': + link = granuleRefDict[type][0] + break + + return link http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granuleresponsebysolr.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/granuleresponsebysolr.py b/src/main/python/libraries/edge/opensearch/granuleresponsebysolr.py new file mode 100644 index 0000000..1b1ca80 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/granuleresponsebysolr.py @@ -0,0 +1,37 @@ +from edge.opensearch.responsebysolr import ResponseBySolr + +class GranuleResponseBySolr(ResponseBySolr): + def __init__(self, linkToGranule): + super(GranuleResponseBySolr, self).__init__() + + self.linkToGranule = linkToGranule + + def _populateChannel(self, solrResponse): + pass + + def _populateItem(self, solrResponse, doc, item): + item.append({'name': 'title', 'value': doc['Granule-Name'][0]}) + item.append({'name': 'description', 'value': doc['Granule-Name'][0]}) + #item.append({'name': 'description', 'value': doc['Dataset-Description'][0]}) + #item.append({'name': 'link', 'value': self.portalUrl+'/'+doc['Dataset-ShortName'][0]}) + link = self._getLinkToGranule(doc) + if link is not None: + item.append({'name': 'link', 'value': link}) + + def _getLinkToGranule(self, doc): + link = None + + if 'GranuleReference-Type' in doc: + types = doc['GranuleReference-Type'] + + typeIndex = -1 + for index, type in enumerate(types): + if type == self.linkToGranule: + typeIndex = index + break + + if typeIndex >= 0: + if ('GranuleReference-Path' in doc) and (len(doc['GranuleReference-Path']) > typeIndex): + link = doc['GranuleReference-Path'][typeIndex] + + return link http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granulerssresponse.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/granulerssresponse.py b/src/main/python/libraries/edge/opensearch/granulerssresponse.py new file mode 100644 index 0000000..a514cca --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/granulerssresponse.py @@ -0,0 +1,96 @@ +import urllib +from edge.opensearch.rssresponsebysolr import RssResponseBySolr +from edge.dateutility import DateUtility + +class GranuleRssResponse(RssResponseBySolr): + def __init__(self, linkToGranule, host, url): + super(GranuleRssResponse, self).__init__() + + self.linkToGranule = linkToGranule.split(',') + self.host = host + self.url = url + + def _populateChannel(self, solrResponse): + self.variables.append({'namespace':'atom', 'name': 'link', 'attribute': {'href': self.url+self.searchBasePath+'podaac-dataset-osd.xml', 'rel': 'search', 'type': 'application/opensearchdescription+xml' }}) + + def _populateItem(self, solrResponse, doc, item): + item.append({'name': 'title', 'value': doc['Granule-Name'][0]}) + item.append({'name': 'description', 'value': doc['Granule-Name'][0]}) + #item.append({'name': 'description', 'value': doc['Dataset-Description'][0]}) + #item.append({'name': 'link', 'value': self.portalUrl+'/'+doc['Dataset-ShortName'][0]}) + + updated = None + startTime = None + if 'Granule-StartTimeLong' in doc and doc['Granule-StartTimeLong'][0] != '': + updated = DateUtility.convertTimeLongToIso(doc['Granule-StartTimeLong'][0]) + startTime = updated + else: + updated = datetime.datetime.utcnow().isoformat()+'Z' + + item.append({'name': 'pubDate', 'value': updated}) + item.append({'name': 'guid', 'value': doc['Dataset-PersistentId'][0] + ':' + doc['Granule-Name'][0]}) + + link = self._getLinkToGranule(doc) + if link is not None: + item.append({'name': 'link', 'value': link}) + + parameters = {'datasetId': doc['Dataset-PersistentId'][0], 'granuleName': doc['Granule-Name'][0]} + parameters['full'] = 'true' + parameters['format'] = 'rss' + item.append({'name': 'enclosure', 'attribute': {'url': self.url+self.searchBasePath + 'granule?' + urllib.urlencode(parameters), 'type': 'application/rss+xml', 'length': '0'}}) + del parameters['full'] + parameters['format'] = 'iso' + item.append({'name': 'enclosure', 'attribute': {'url': self.url+self.metadataBasePath + 'granule?' + urllib.urlencode(parameters), 'type': 'text/xml', 'length': '0'}}) + parameters['format'] = 'fgdc' + item.append({'name': 'enclosure', 'attribute': {'url': self.url+self.metadataBasePath + 'granule?' + urllib.urlencode(parameters), 'type': 'text/xml', 'length': '0'}}) + + if 'GranuleReference-Type' in doc: + if 'Granule-DataFormat' in doc: + type = 'application/x-' + doc['Granule-DataFormat'][0].lower() + else: + type = 'text/plain' + #Look for ONLINE reference only + granuleRefDict = dict([(doc['GranuleReference-Type'][i], doc['GranuleReference-Path'][i]) for i,x in enumerate(doc['GranuleReference-Status']) if x=="ONLINE"]) + if 'LOCAL-OPENDAP' in granuleRefDict: + item.append({'name': 'enclosure', 'attribute': {'url': granuleRefDict['LOCAL-OPENDAP'], 'type': 'text/html', 'length': '0'}}) + elif 'REMOTE-OPENDAP' in granuleRefDict: + item.append({'name': 'enclosure', 'attribute': {'url': granuleRefDict['REMOTE-OPENDAP'], 'type': 'text/html', 'length': '0'}}) + if 'LOCAL-FTP' in granuleRefDict: + item.append({'name': 'enclosure', 'attribute': {'url': granuleRefDict['LOCAL-FTP'], 'type': type, 'length': '0'}}) + elif 'REMOTE-FTP' in granuleRefDict: + item.append({'name': 'enclosure', 'attribute': {'url': granuleRefDict['REMOTE-FTP'], 'type': type, 'length': '0'}}) + + item.append({'namespace': 'podaac', 'name': 'datasetId', 'value': doc['Dataset-PersistentId'][0]}) + item.append({'namespace': 'podaac', 'name': 'shortName', 'value': doc['Dataset-ShortName'][0]}) + + if 'GranuleSpatial-NorthLat' in doc and 'GranuleSpatial-EastLon' in doc and 'GranuleSpatial-SouthLat' in doc and 'GranuleSpatial-WestLon' in doc: + item.append({'namespace': 'georss', 'name': 'where', 'value': {'namespace': 'gml', 'name': 'Envelope', 'value': [{'namespace': 'gml', 'name': 'lowerCorner', 'value': ' '.join([doc['GranuleSpatial-WestLon'][0], doc['GranuleSpatial-SouthLat'][0]])}, {'namespace': 'gml', 'name': 'upperCorner', 'value': ' '.join([doc['GranuleSpatial-EastLon'][0], doc['GranuleSpatial-NorthLat'][0]])}]}}) + + if 'Granule-StartTimeLong' in doc and doc['Granule-StartTimeLong'][0] != '': + item.append({'namespace': 'time', 'name': 'start', 'value': DateUtility.convertTimeLongToIso(doc['Granule-StartTimeLong'][0])}) + + if 'Granule-StopTimeLong' in doc and doc['Granule-StopTimeLong'][0] != '': + item.append({'namespace': 'time', 'name': 'end', 'value': DateUtility.convertTimeLongToIso(doc['Granule-StopTimeLong'][0])}) + + if 'full' in self.parameters and self.parameters['full']: + multiValuedElementsKeys = ('GranuleArchive-', 'GranuleReference-') + self._populateItemWithPodaacMetadata(doc, item, multiValuedElementsKeys) + + def _getLinkToGranule(self, doc): + link = None + + if 'GranuleReference-Type' in doc and len(self.linkToGranule) > 0: + granuleRefDict = dict(zip(doc['GranuleReference-Type'], zip(doc['GranuleReference-Path'], doc['GranuleReference-Status']))) + + for type in self.linkToGranule: + # check if reference type exists + if type in granuleRefDict: + # check if reference is online + if granuleRefDict[type][1] == 'ONLINE': + link = granuleRefDict[type][0] + break + else: + #No link to granule download provided so create link back to opensearch to retrieve granule metadata + link = "http://" + self.host + "/granule/opensearch.rss?granule=" + doc['Granule-Name'][0] + + return link http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granulewriter.py ---------------------------------------------------------------------- diff --git a/src/main/python/libraries/edge/opensearch/granulewriter.py b/src/main/python/libraries/edge/opensearch/granulewriter.py new file mode 100644 index 0000000..ddbb194 --- /dev/null +++ b/src/main/python/libraries/edge/opensearch/granulewriter.py @@ -0,0 +1,251 @@ +from types import * +import logging +import urllib +import json + +from edge.opensearch.responsewriter import ResponseWriter +from edge.dateutility import DateUtility +from edge.httputility import HttpUtility +from edge.spatialsearch import SpatialSearch +import re + +class GranuleWriter(ResponseWriter): + def __init__(self, configFilePath, requiredParams = None): + super(GranuleWriter, self).__init__(configFilePath, requiredParams) + self.startIndex = 0 + self.entriesPerPage = self._configuration.getint('solr', 'entriesPerPage') + + def get(self, requestHandler): + super(GranuleWriter, self).get(requestHandler) + #searchParameters = {} + #logging.debug('uri: '+str(requestHandler.request.headers)) + + #startIndex = 0 + try: + self.startIndex = requestHandler.get_argument('startIndex') + except: + pass + + #entriesPerPage = self._configuration.getint('solr', 'entriesPerPage') + try: + self.entriesPerPage = requestHandler.get_argument('itemsPerPage') + #cap entries per age at 400 + if (int(self.entriesPerPage) > 400): + self.entriesPerPage = 400 + self.searchParameters['itemsPerPage'] = self.entriesPerPage + except: + pass + + #pretty = True + try: + if requestHandler.get_argument('pretty').lower() == 'false': + self.pretty = False + self.searchParameters['pretty'] = 'false' + except: + pass + + try: + if requestHandler.get_argument('full').lower() == 'true': + self.searchParameters['full'] = 'true' + except: + pass + + try: + self.searchParameters['format'] = requestHandler.get_argument('format') + except: + pass + + parameters = ['startTime', 'endTime', 'keyword', 'granuleName', 'datasetId', 'shortName', 'bbox', 'sortBy'] + #variables = {} + for parameter in parameters: + try: + value = requestHandler.get_argument(parameter) + self.variables[parameter] = value + self.searchParameters[parameter] = value + except: + pass + + if 'keyword' in self.variables: + self.variables['keyword'] = self.variables['keyword'].replace('*', '') + self.variables['keyword'] = self.variables['keyword'].lower() + + #Fetch dataset metadata from Solr + datasetVariables = {} + if 'datasetId' in self.variables: + datasetVariables['datasetId'] = self.variables['datasetId'] + if 'shortName' in self.variables: + datasetVariables['shortName'] = self.variables['shortName'] + self._getSingleSolrDatasetResponse(datasetVariables, self._onSolrDetermineProcessLevelResponse) + + def _getSolrResponse(self, startIndex, entriesPerPage, variables): + query = self._constructSolrQuery(startIndex, entriesPerPage, variables) + url = self._configuration.get('solr', 'granuleUrl') + + httpUtility = HttpUtility() + httpUtility.getResponse(url+'/select/?'+query, self._onSolrResponse) + + def _constructSolrQuery(self, startIndex, entriesPerPage, variables): + #set default sort order + sort='Granule-StartTimeLong+desc' + filterQuery = None + queries = [] + for key, value in variables.iteritems(): + #query = '' + if key == 'startTime': + startTime = DateUtility.convertISOToUTCTimestamp(value) + if startTime is not None: + query = 'Granule-StopTimeLong:' + query += '['+str(startTime)+'%20TO%20*]' + queries.append(query) + elif key == 'endTime': + stopTime = DateUtility.convertISOToUTCTimestamp(value) + if stopTime is not None: + query = 'Granule-StartTimeLong:' + query += '[*%20TO%20'+str(stopTime)+']' + queries.append(query) + elif key == 'keyword': + newValue = urllib.quote(value) + + query = 'SearchableText-LowerCased:('+newValue+')' + queries.append(query) + elif key == 'datasetId': + query = 'Dataset-PersistentId:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'shortName': + query = 'Dataset-ShortName-Full:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'granuleName': + query = 'Granule-Name-Full:'+self._urlEncodeSolrQueryValue(value) + queries.append(query) + elif key == 'granuleIds': + granuleIds = [] + for granuleId in value: + granuleIds.append(str(granuleId)) + query = 'Granule-Id:('+'+OR+'.join(granuleIds)+')' + queries.append(query) + + startIndex = 0 + elif key == 'sortBy': + sortByMapping = {'timeAsc': 'Granule-StartTimeLong+asc'} + if value in sortByMapping.keys(): + sort = sortByMapping[value] + elif key == 'bbox': + filterQuery = self._constructBoundingBoxQuery(value) + #if query != '': + # queries.append('%2B'+query) + + if len(queries) == 0: + queries.append('*') + + query = 'q='+'+AND+'.join(queries)+'&fq=Granule-AccessType:(OPEN+OR+PREVIEW+OR+SIMULATED+OR+REMOTE)+AND+Granule-Status:ONLINE&version=2.2&start='+str(startIndex)+'&rows='+str(entriesPerPage)+'&indent=on&wt=json&sort='+sort + if filterQuery is not None: + query += '&' + filterQuery + logging.debug('solr query: '+query) + + return query + + def _onSolrDetermineProcessLevelResponse(self, response): + try: + #Determine dataset processing level + processingLevel = None + solrJson = json.loads(response.body) + if len(solrJson['response']['docs']) >= 1: + if 'bbox' in self.variables: + processingLevel = solrJson['response']['docs'][0]['Dataset-ProcessingLevel-Full'][0] + + if processingLevel is not None and processingLevel.find('2') != -1: + if self._configuration.get('service', 'bbox') == 'l2': + #Call Matt's L2 Search Service + #raise Exception(self._configuration.get('service', 'l2')+'?'+requestHandler.request.query) + httpUtility = HttpUtility() + url = self._configuration.get('service', 'l2') + '?' + if 'format' not in self.requestHandler.request.arguments: + url += 'format=atom&' + url += self.requestHandler.request.query + logging.debug("Calling L2 Service: " + url) + result = httpUtility.getResponse(url, self._onL2Response) + else: + points = self.variables['bbox'].split(',') + if len(points) == 4: + spatialSearch = SpatialSearch( + self._configuration.get('service', 'database') + ) + spatialResult = spatialSearch.searchGranules( + int(self.startIndex), + int(self.entriesPerPage), + float(points[0]), + float(points[1]), + float(points[2]), + float(points[3]) + ) + logging.debug("Granule spatial search returned") + #if len(spatialResult[0]) > 0: + self.variables['granuleIds'] = spatialResult[0] + self.variables['granuleIdsFound'] = spatialResult[1] + + del self.variables['bbox'] + solrJson = {'responseHeader': {'params': {}}, 'response': {}} + solrJson['response']['numFound'] = int(self.variables['granuleIdsFound']) + solrJson['response']['start'] = int(self.startIndex) + solrJson['responseHeader']['params']['rows'] = int(self.entriesPerPage) + solrJson['response']['docs'] = [] + for name in self.variables['granuleIds']: + solrJson['response']['docs'].append({'Granule-Name': [name]}) + solrResponse = json.dumps(solrJson) + + searchText = '' + if 'keyword' in self.variables: + searchText = self.variables['keyword'] + openSearchResponse = self._generateOpenSearchResponse( + solrResponse, + searchText, + self._configuration.get('service', 'url')+self.requestHandler.request.path, + self.searchParameters, + self.pretty + ) + + self.requestHandler.set_header("Content-Type", "application/xml") + #requestHandler.set_header("Content-Type", "application/rss+xml") + #requestHandler.write(solrResponse) + self.requestHandler.write(openSearchResponse) + self.requestHandler.finish() + else: + #Dataset is not an L2 dataset so handle search via Solr + try: + self._getSolrResponse(self.startIndex, self.entriesPerPage, self.variables) + except: + logging.exception('Failed to get solr response.') + else: + #Not a bounding box search so handle search via Solr + try: + self._getSolrResponse(self.startIndex, self.entriesPerPage, self.variables) + except: + logging.exception('Failed to get solr response.') + else: + #Dataset metadata cannot be retreived so return empty search result + solrJson = {'responseHeader': {'params': {}}, 'response': {}} + solrJson['response']['numFound'] = 0 + solrJson['response']['start'] = int(self.startIndex) + solrJson['responseHeader']['params']['rows'] = int(self.entriesPerPage) + solrJson['response']['docs'] = [] + solrResponse = json.dumps(solrJson) + + self._writeResponse(solrResponse) + except BaseException as exception: + logging.exception('Failed to determine dataset processing level for bbox search ' + str(exception)) + self._handleException(str(exception)) + + def _onL2Response(self, response): + if response.error: + self._handleException(str(response.error)) + else: + try: + logging.debug('header: Content-Type '+response.headers['Content-Type']) + self.requestHandler.set_header('Content-Type', response.headers['Content-Type']) + logging.debug('header: Content-Length '+response.headers['Content-Length']) + self.requestHandler.set_header('Content-Length', response.headers['Content-Length']) + except: + pass + self.requestHandler.write(response.body) + self.requestHandler.finish() +
