CLIMATE-519 - Rework ESGF integration module structuring The ESGF module is now part of the OCW package.
Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/7d3c237c Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/7d3c237c Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/7d3c237c Branch: refs/heads/master Commit: 7d3c237c57305629e588e84bd2f5c46e6dc87edd Parents: 22192f5 Author: Michael Joyce <[email protected]> Authored: Mon Sep 15 08:01:22 2014 -0700 Committer: Michael Joyce <[email protected]> Committed: Sun Oct 19 14:17:08 2014 -0700 ---------------------------------------------------------------------- esgf/README | 17 ----- esgf/src/esgf/__init__.py | 18 ------ esgf/src/esgf/rcmes/__init__.py | 18 ------ esgf/src/esgf/rcmes/constants.py | 35 ---------- esgf/src/esgf/rcmes/download.py | 67 -------------------- esgf/src/esgf/rcmes/logon.py | 44 ------------- esgf/src/esgf/rcmes/logon2.py | 43 ------------- esgf/src/esgf/rcmes/main.py | 116 ---------------------------------- esgf/src/esgf/rcmes/search.py | 89 -------------------------- ocw/esgf/__init__.py | 18 ++++++ ocw/esgf/constants.py | 35 ++++++++++ ocw/esgf/download.py | 67 ++++++++++++++++++++ ocw/esgf/logon.py | 44 +++++++++++++ ocw/esgf/logon2.py | 43 +++++++++++++ ocw/esgf/main.py | 116 ++++++++++++++++++++++++++++++++++ ocw/esgf/search.py | 89 ++++++++++++++++++++++++++ 16 files changed, 412 insertions(+), 447 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/README ---------------------------------------------------------------------- diff --git a/esgf/README b/esgf/README deleted file mode 100644 index 019eb24..0000000 --- a/esgf/README +++ /dev/null @@ -1,17 +0,0 @@ -PROTOTYPE PYTHON PACKAGE FOR ESGF-RCMES INTEGRATION - -Python dependencies: - - Python 2.7 - - PyOpenSSL (sudo easy_install PyOpenSSL==0.10 - NOTE: latest PyOpenSSL 0.13 needs latest SSL installed) - - myproxyclient (sudo easy_install MyProxyClient) - - esgf-pyclient (sudo easy_install esgf-pyclient) - -Pre-requisites: - - user must be registered with ESGF and be assigned an OpenID to logon - - user must have been granted membership in group CMIP5 Research to be able to download CMIP5 datasets - -Usage: - - install package under some directory: <INSTALL_DIR> - - export PYTHONPATH=<INSTALL_DIR>/src - - edit <INSTALL_DIR>/src/esgf/rcmes/main.py to configure openid, password, output directory - - python <INSTALL_DIR>/src/esgf/rcmes/main.py \ No newline at end of file http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/src/esgf/__init__.py ---------------------------------------------------------------------- diff --git a/esgf/src/esgf/__init__.py b/esgf/src/esgf/__init__.py deleted file mode 100644 index 2872cad..0000000 --- a/esgf/src/esgf/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/src/esgf/rcmes/__init__.py ---------------------------------------------------------------------- diff --git a/esgf/src/esgf/rcmes/__init__.py b/esgf/src/esgf/rcmes/__init__.py deleted file mode 100644 index 2872cad..0000000 --- a/esgf/src/esgf/rcmes/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/src/esgf/rcmes/constants.py ---------------------------------------------------------------------- diff --git a/esgf/src/esgf/rcmes/constants.py b/esgf/src/esgf/rcmes/constants.py deleted file mode 100644 index a6baf9f..0000000 --- a/esgf/src/esgf/rcmes/constants.py +++ /dev/null @@ -1,35 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -'''Module containing constant parameters for ESGF RCMES integration.''' - -# default location of ESGF user credentials -ESGF_CREDENTIALS = "~/.esg/credentials.pem" - -# URL for ESGF certificate service -#CERT_SERVICE_URL = "https://localhost:8443/esgf-idp/idp/getcert.htm" -CERT_SERVICE_URL = "https://esg-datanode.jpl.nasa.gov/esgf-idp/idp/getcert.htm" - -# Basic authentication realm -REALM = "ESGF" - -# DN of JPL MyProxy server (needs to be explicitely set somtimes) -JPL_MYPROXY_SERVER_DN = "/O=ESGF/OU=esg-datanode.jpl.nasa.gov/CN=host/esg-vm.jpl.nasa.gov" - -# URL of ESGF search service to contact -JPL_SEARCH_SERVICE_URL = "http://esg-datanode.jpl.nasa.gov/esg-search/search" http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/src/esgf/rcmes/download.py ---------------------------------------------------------------------- diff --git a/esgf/src/esgf/rcmes/download.py b/esgf/src/esgf/rcmes/download.py deleted file mode 100644 index 389b982..0000000 --- a/esgf/src/esgf/rcmes/download.py +++ /dev/null @@ -1,67 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -''' -RCMES module to download a file from ESGF. - -''' - -import urllib2, httplib -from os.path import expanduser, join - -from esgf.rcmes.constants import ESGF_CREDENTIALS - -class HTTPSClientAuthHandler(urllib2.HTTPSHandler): - ''' - HTTP handler that transmits an X509 certificate as part of the request - ''' - - def __init__(self, key, cert): - urllib2.HTTPSHandler.__init__(self) - self.key = key - self.cert = cert - def https_open(self, req): - return self.do_open(self.getConnection, req) - def getConnection(self, host, timeout=300): - return httplib.HTTPSConnection(host, key_file=self.key, cert_file=self.cert) - -def download(url, toDirectory="/tmp"): - ''' - Function to download a single file from ESGF. - - :param url: the URL of the file to download - :param toDirectory: target directory where the file will be written - ''' - - # setup HTTP handler - certFile = expanduser(ESGF_CREDENTIALS) - opener = urllib2.build_opener(HTTPSClientAuthHandler(certFile,certFile)) - opener.add_handler(urllib2.HTTPCookieProcessor()) - - # download file - localFilePath = join(toDirectory,url.split('/')[-1]) - print "\nDownloading url: %s to local path: %s ..." % (url, localFilePath) - localFile=open( localFilePath, 'w') - webFile=opener.open(url) - localFile.write(webFile.read()) - - # cleanup - localFile.close() - webFile.close() - opener.close() - print "... done" http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/src/esgf/rcmes/logon.py ---------------------------------------------------------------------- diff --git a/esgf/src/esgf/rcmes/logon.py b/esgf/src/esgf/rcmes/logon.py deleted file mode 100644 index 0a6f4f7..0000000 --- a/esgf/src/esgf/rcmes/logon.py +++ /dev/null @@ -1,44 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -''' -RCMES module to logon onto the ESGF. -''' - -from pyesgf.logon import LogonManager -import os - -from esgf.rcmes.constants import JPL_MYPROXY_SERVER_DN - -def logon(openid, password): - ''' - Function to retrieve a short-term X.509 certificate that can be used to authenticate with ESGF. - The certificate is written in the location ~/.esg/credentials.pem. - The trusted CA certificates are written in the directory ~/.esg/certificates. - ''' - - # Must configure the DN of the JPL MyProxy server if using a JPL openid - if "esg-datanode.jpl.nasa.gov" in openid: - os.environ['MYPROXY_SERVER_DN'] = JPL_MYPROXY_SERVER_DN - - lm = LogonManager() - lm.logon_with_openid(openid,password) - return lm.is_logged_on() - - - http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/src/esgf/rcmes/logon2.py ---------------------------------------------------------------------- diff --git a/esgf/src/esgf/rcmes/logon2.py b/esgf/src/esgf/rcmes/logon2.py deleted file mode 100644 index 11a808a..0000000 --- a/esgf/src/esgf/rcmes/logon2.py +++ /dev/null @@ -1,43 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -''' -RCMES module to logon onto the ESGF by contacting the IdP RESTful service. -''' - -from esgf.rcmes.constants import ESGF_CREDENTIALS, CERT_SERVICE_URL, REALM - -import urllib2 -from os.path import expanduser - -def logon2(openid, password): - ''' - Function to retrieve a short-term X.509 certificate that can be used to authenticate with ESGF. - The certificate is written in the location specified by ESGF_CREDENTIALS. - ''' - - password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() - password_mgr.add_password(REALM, CERT_SERVICE_URL, openid, password) - handler = urllib2.HTTPBasicAuthHandler(password_mgr) - opener = urllib2.build_opener(urllib2.HTTPHandler, handler) - request = opener.open(CERT_SERVICE_URL) - #print request.read() - - localFilePath = expanduser(ESGF_CREDENTIALS) - certFile=open(localFilePath, 'w') - certFile.write(request.read()) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/src/esgf/rcmes/main.py ---------------------------------------------------------------------- diff --git a/esgf/src/esgf/rcmes/main.py b/esgf/src/esgf/rcmes/main.py deleted file mode 100644 index 4ffadcc..0000000 --- a/esgf/src/esgf/rcmes/main.py +++ /dev/null @@ -1,116 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -''' -Example main program for ESGF-RCMES integration. - -''' - -# constant parameters -USER_OPENID = "https://esg-datanode.jpl.nasa.gov/esgf-idp/openid/lucacinquini" -USER_PASSWORD = "*****" -DATA_DIRECTORY = "/tmp" - -from esgf.rcmes.logon import logon -from esgf.rcmes.logon2 import logon2 -from esgf.rcmes.search import SearchClient -from esgf.rcmes.download import download - -def main(): - '''Example driver program''' - - # step 1: obtain short-term certificate - print 'Retrieving ESGF certificate...' - # logon using client-side MyProxy libraries - #if logon(USER_OPENID, USER_PASSWORD): - # print "...done." - # logon through server-side MyProxy service - if logon2(USER_OPENID, USER_PASSWORD): - print "...done" - - # step 2: execute faceted search for files - urls = main_obs4mips() - #urls = main_cmip5() - - # step 3: download file(s) - for i, url in enumerate(urls): - if i>=1: - break - download(url, toDirectory=DATA_DIRECTORY) - - -def main_cmip5(): - ''' - Example workflow to search for CMIP5 files - ''' - - searchClient = SearchClient(searchServiceUrl="http://pcmdi9.llnl.gov/esg-search/search", distrib=False) - - print '\nAvailable projects=%s' % searchClient.getFacets('project') - searchClient.setConstraint(project='CMIP5') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - print '\nAvailable models=%s' % searchClient.getFacets('model') - searchClient.setConstraint(model='INM-CM4') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - print '\nAvailable experiments=%s' % searchClient.getFacets('experiment') - searchClient.setConstraint(experiment='historical') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - print '\nAvailable time frequencies=%s' % searchClient.getFacets('time_frequency') - searchClient.setConstraint(time_frequency='mon') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - print '\nAvailable CF standard names=%s' % searchClient.getFacets('cf_standard_name') - searchClient.setConstraint(cf_standard_name='air_temperature') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - urls = searchClient.getFiles() - return urls - - -def main_obs4mips(): - ''' - Example workflow to search for obs4MIPs files. - ''' - - searchClient = SearchClient(distrib=False) - - # obs4MIPs - print '\nAvailable projects=%s' % searchClient.getFacets('project') - searchClient.setConstraint(project='obs4MIPs') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - print '\nAvailable variables=%s' % searchClient.getFacets('variable') - searchClient.setConstraint(variable='hus') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - print '\nAvailable time frequencies=%s' % searchClient.getFacets('time_frequency') - searchClient.setConstraint(time_frequency='mon') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - print '\nAvailable models=%s' % searchClient.getFacets('model') - searchClient.setConstraint(model='Obs-MLS') - print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() - - urls = searchClient.getFiles() - return urls - -if __name__ == '__main__': - main() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/esgf/src/esgf/rcmes/search.py ---------------------------------------------------------------------- diff --git a/esgf/src/esgf/rcmes/search.py b/esgf/src/esgf/rcmes/search.py deleted file mode 100644 index 3a81f24..0000000 --- a/esgf/src/esgf/rcmes/search.py +++ /dev/null @@ -1,89 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -''' -RCMES module to execute a faceted search for ESGF files. - -''' - -from pyesgf.search import SearchConnection - -from esgf.rcmes.constants import JPL_SEARCH_SERVICE_URL - -class SearchClient(): - """ - Simple ESGF search client for RCMES. - This class is a thin layer on top of the esgfpy-client package. - Note: this class always searches for latest versions, no replicas. - """ - - def __init__(self, searchServiceUrl=JPL_SEARCH_SERVICE_URL, distrib=True): - """ - :param searchServiceUrl: URL of ESGF search service to query - :param distrib: True to execute a federation-wide search, - False to search only the specified search service - """ - connection = SearchConnection(searchServiceUrl, distrib=distrib) - - # dictionary of query constraints - self.constraints = { "latest":True, "replica":False, "distrib":distrib } - - # initial search context - self.context = connection.new_context( **self.constraints ) - - - def setConstraint(self, **constraints): - """ - Sets one or more facet constraints. - :param constraints: dictionary of (facet name, facet value) constraints. - """ - for key in constraints: - print 'Setting constraint: %s=%s' % (key, constraints[key]) - self.constraints[key] = constraints[key] - self.context = self.context.constrain(**constraints) - - def getNumberOfDatasets(self): - """ - :return: the number of datasets matching the current constraints. - """ - return self.context.hit_count - - def getFacets(self, facet): - """ - :return: a dictionary of (facet value, facet count) for the specified facet and current constraints. - Example (for facet='project'): {u'COUND': 4, u'CMIP5': 2657, u'obs4MIPs': 7} - """ - return self.context.facet_counts[facet] - - def getFiles(self): - """ - Executes a search for files with the current constraints. - :return: list of file download URLs. - """ - datasets = self.context.search() - urls = [] - for dataset in datasets: - print "\nSearching files for dataset=%s with constraints: %s" % (dataset.dataset_id, self.constraints) - files = dataset.file_context().search(**self.constraints) - for file in files: - print 'Found file=%s' % file.download_url - urls.append(file.download_url) - return urls - - - http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/ocw/esgf/__init__.py ---------------------------------------------------------------------- diff --git a/ocw/esgf/__init__.py b/ocw/esgf/__init__.py new file mode 100644 index 0000000..2872cad --- /dev/null +++ b/ocw/esgf/__init__.py @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/ocw/esgf/constants.py ---------------------------------------------------------------------- diff --git a/ocw/esgf/constants.py b/ocw/esgf/constants.py new file mode 100644 index 0000000..a6baf9f --- /dev/null +++ b/ocw/esgf/constants.py @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +'''Module containing constant parameters for ESGF RCMES integration.''' + +# default location of ESGF user credentials +ESGF_CREDENTIALS = "~/.esg/credentials.pem" + +# URL for ESGF certificate service +#CERT_SERVICE_URL = "https://localhost:8443/esgf-idp/idp/getcert.htm" +CERT_SERVICE_URL = "https://esg-datanode.jpl.nasa.gov/esgf-idp/idp/getcert.htm" + +# Basic authentication realm +REALM = "ESGF" + +# DN of JPL MyProxy server (needs to be explicitely set somtimes) +JPL_MYPROXY_SERVER_DN = "/O=ESGF/OU=esg-datanode.jpl.nasa.gov/CN=host/esg-vm.jpl.nasa.gov" + +# URL of ESGF search service to contact +JPL_SEARCH_SERVICE_URL = "http://esg-datanode.jpl.nasa.gov/esg-search/search" http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/ocw/esgf/download.py ---------------------------------------------------------------------- diff --git a/ocw/esgf/download.py b/ocw/esgf/download.py new file mode 100644 index 0000000..389b982 --- /dev/null +++ b/ocw/esgf/download.py @@ -0,0 +1,67 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +''' +RCMES module to download a file from ESGF. + +''' + +import urllib2, httplib +from os.path import expanduser, join + +from esgf.rcmes.constants import ESGF_CREDENTIALS + +class HTTPSClientAuthHandler(urllib2.HTTPSHandler): + ''' + HTTP handler that transmits an X509 certificate as part of the request + ''' + + def __init__(self, key, cert): + urllib2.HTTPSHandler.__init__(self) + self.key = key + self.cert = cert + def https_open(self, req): + return self.do_open(self.getConnection, req) + def getConnection(self, host, timeout=300): + return httplib.HTTPSConnection(host, key_file=self.key, cert_file=self.cert) + +def download(url, toDirectory="/tmp"): + ''' + Function to download a single file from ESGF. + + :param url: the URL of the file to download + :param toDirectory: target directory where the file will be written + ''' + + # setup HTTP handler + certFile = expanduser(ESGF_CREDENTIALS) + opener = urllib2.build_opener(HTTPSClientAuthHandler(certFile,certFile)) + opener.add_handler(urllib2.HTTPCookieProcessor()) + + # download file + localFilePath = join(toDirectory,url.split('/')[-1]) + print "\nDownloading url: %s to local path: %s ..." % (url, localFilePath) + localFile=open( localFilePath, 'w') + webFile=opener.open(url) + localFile.write(webFile.read()) + + # cleanup + localFile.close() + webFile.close() + opener.close() + print "... done" http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/ocw/esgf/logon.py ---------------------------------------------------------------------- diff --git a/ocw/esgf/logon.py b/ocw/esgf/logon.py new file mode 100644 index 0000000..0a6f4f7 --- /dev/null +++ b/ocw/esgf/logon.py @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +''' +RCMES module to logon onto the ESGF. +''' + +from pyesgf.logon import LogonManager +import os + +from esgf.rcmes.constants import JPL_MYPROXY_SERVER_DN + +def logon(openid, password): + ''' + Function to retrieve a short-term X.509 certificate that can be used to authenticate with ESGF. + The certificate is written in the location ~/.esg/credentials.pem. + The trusted CA certificates are written in the directory ~/.esg/certificates. + ''' + + # Must configure the DN of the JPL MyProxy server if using a JPL openid + if "esg-datanode.jpl.nasa.gov" in openid: + os.environ['MYPROXY_SERVER_DN'] = JPL_MYPROXY_SERVER_DN + + lm = LogonManager() + lm.logon_with_openid(openid,password) + return lm.is_logged_on() + + + http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/ocw/esgf/logon2.py ---------------------------------------------------------------------- diff --git a/ocw/esgf/logon2.py b/ocw/esgf/logon2.py new file mode 100644 index 0000000..11a808a --- /dev/null +++ b/ocw/esgf/logon2.py @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +''' +RCMES module to logon onto the ESGF by contacting the IdP RESTful service. +''' + +from esgf.rcmes.constants import ESGF_CREDENTIALS, CERT_SERVICE_URL, REALM + +import urllib2 +from os.path import expanduser + +def logon2(openid, password): + ''' + Function to retrieve a short-term X.509 certificate that can be used to authenticate with ESGF. + The certificate is written in the location specified by ESGF_CREDENTIALS. + ''' + + password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() + password_mgr.add_password(REALM, CERT_SERVICE_URL, openid, password) + handler = urllib2.HTTPBasicAuthHandler(password_mgr) + opener = urllib2.build_opener(urllib2.HTTPHandler, handler) + request = opener.open(CERT_SERVICE_URL) + #print request.read() + + localFilePath = expanduser(ESGF_CREDENTIALS) + certFile=open(localFilePath, 'w') + certFile.write(request.read()) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/ocw/esgf/main.py ---------------------------------------------------------------------- diff --git a/ocw/esgf/main.py b/ocw/esgf/main.py new file mode 100644 index 0000000..4ffadcc --- /dev/null +++ b/ocw/esgf/main.py @@ -0,0 +1,116 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +''' +Example main program for ESGF-RCMES integration. + +''' + +# constant parameters +USER_OPENID = "https://esg-datanode.jpl.nasa.gov/esgf-idp/openid/lucacinquini" +USER_PASSWORD = "*****" +DATA_DIRECTORY = "/tmp" + +from esgf.rcmes.logon import logon +from esgf.rcmes.logon2 import logon2 +from esgf.rcmes.search import SearchClient +from esgf.rcmes.download import download + +def main(): + '''Example driver program''' + + # step 1: obtain short-term certificate + print 'Retrieving ESGF certificate...' + # logon using client-side MyProxy libraries + #if logon(USER_OPENID, USER_PASSWORD): + # print "...done." + # logon through server-side MyProxy service + if logon2(USER_OPENID, USER_PASSWORD): + print "...done" + + # step 2: execute faceted search for files + urls = main_obs4mips() + #urls = main_cmip5() + + # step 3: download file(s) + for i, url in enumerate(urls): + if i>=1: + break + download(url, toDirectory=DATA_DIRECTORY) + + +def main_cmip5(): + ''' + Example workflow to search for CMIP5 files + ''' + + searchClient = SearchClient(searchServiceUrl="http://pcmdi9.llnl.gov/esg-search/search", distrib=False) + + print '\nAvailable projects=%s' % searchClient.getFacets('project') + searchClient.setConstraint(project='CMIP5') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + print '\nAvailable models=%s' % searchClient.getFacets('model') + searchClient.setConstraint(model='INM-CM4') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + print '\nAvailable experiments=%s' % searchClient.getFacets('experiment') + searchClient.setConstraint(experiment='historical') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + print '\nAvailable time frequencies=%s' % searchClient.getFacets('time_frequency') + searchClient.setConstraint(time_frequency='mon') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + print '\nAvailable CF standard names=%s' % searchClient.getFacets('cf_standard_name') + searchClient.setConstraint(cf_standard_name='air_temperature') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + urls = searchClient.getFiles() + return urls + + +def main_obs4mips(): + ''' + Example workflow to search for obs4MIPs files. + ''' + + searchClient = SearchClient(distrib=False) + + # obs4MIPs + print '\nAvailable projects=%s' % searchClient.getFacets('project') + searchClient.setConstraint(project='obs4MIPs') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + print '\nAvailable variables=%s' % searchClient.getFacets('variable') + searchClient.setConstraint(variable='hus') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + print '\nAvailable time frequencies=%s' % searchClient.getFacets('time_frequency') + searchClient.setConstraint(time_frequency='mon') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + print '\nAvailable models=%s' % searchClient.getFacets('model') + searchClient.setConstraint(model='Obs-MLS') + print "Number of Datasets=%d" % searchClient.getNumberOfDatasets() + + urls = searchClient.getFiles() + return urls + +if __name__ == '__main__': + main() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/climate/blob/7d3c237c/ocw/esgf/search.py ---------------------------------------------------------------------- diff --git a/ocw/esgf/search.py b/ocw/esgf/search.py new file mode 100644 index 0000000..3a81f24 --- /dev/null +++ b/ocw/esgf/search.py @@ -0,0 +1,89 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +''' +RCMES module to execute a faceted search for ESGF files. + +''' + +from pyesgf.search import SearchConnection + +from esgf.rcmes.constants import JPL_SEARCH_SERVICE_URL + +class SearchClient(): + """ + Simple ESGF search client for RCMES. + This class is a thin layer on top of the esgfpy-client package. + Note: this class always searches for latest versions, no replicas. + """ + + def __init__(self, searchServiceUrl=JPL_SEARCH_SERVICE_URL, distrib=True): + """ + :param searchServiceUrl: URL of ESGF search service to query + :param distrib: True to execute a federation-wide search, + False to search only the specified search service + """ + connection = SearchConnection(searchServiceUrl, distrib=distrib) + + # dictionary of query constraints + self.constraints = { "latest":True, "replica":False, "distrib":distrib } + + # initial search context + self.context = connection.new_context( **self.constraints ) + + + def setConstraint(self, **constraints): + """ + Sets one or more facet constraints. + :param constraints: dictionary of (facet name, facet value) constraints. + """ + for key in constraints: + print 'Setting constraint: %s=%s' % (key, constraints[key]) + self.constraints[key] = constraints[key] + self.context = self.context.constrain(**constraints) + + def getNumberOfDatasets(self): + """ + :return: the number of datasets matching the current constraints. + """ + return self.context.hit_count + + def getFacets(self, facet): + """ + :return: a dictionary of (facet value, facet count) for the specified facet and current constraints. + Example (for facet='project'): {u'COUND': 4, u'CMIP5': 2657, u'obs4MIPs': 7} + """ + return self.context.facet_counts[facet] + + def getFiles(self): + """ + Executes a search for files with the current constraints. + :return: list of file download URLs. + """ + datasets = self.context.search() + urls = [] + for dataset in datasets: + print "\nSearching files for dataset=%s with constraints: %s" % (dataset.dataset_id, self.constraints) + files = dataset.file_context().search(**self.constraints) + for file in files: + print 'Found file=%s' % file.download_url + urls.append(file.download_url) + return urls + + +
