Hi Markus,

I did a bit of exploring of the Pubchem Power User Gateway this
morning and discovered that they have a SOAP interface:
http://pubchem.ncbi.nlm.nih.gov/pug_soap/pug_soap_help.html
They even include a bit of help and some examples for using that
interface from Python:
http://pubchem.ncbi.nlm.nih.gov/pug_soap/examples/python_zsi/python_zsi.html

This provides a pretty straightforward way of interacting with the PUG
from Python.

I made some minor modifications to one of the sample scripts provided
in order to come up with the attached script. It shows how to convert
a list of pubchem CIDs into an RDKit SDMolSupplier. You will need to
have ZSI installed and to run wsdl2py as indicated on the above page.

This is, perhaps, a place to start.

-greg
#!/usr/bin/env python
#
# General Python/ZSI hints for PUG:
#  http://pubchem.ncbi.nlm.nih.gov/pug_soap/examples/python_zsi/python_zsi.html
# adapted from: 
#  http://pubchem.ncbi.nlm.nih.gov/pug_soap/examples/python_zsi/DownloadCIDs.py

# bring in the ZSI-generated interface
from PUG_services import *

# other modules/functions
from time import sleep
import tempfile,gzip,os
from urllib import urlretrieve
from RDLogger import logger
logger=logger()
import Chem

def CIDsToSupplier(cids):
    # get a PUG SOAP port instance
    loc = PUGLocator()
    port = loc.getPUGSoap()
    
    # start with a list of CIDs
    req = InputListSoapIn()
    req.set_element_ids(req.new_ids())
    req.get_element_ids().set_element_int(cids)
    req.set_element_idType('eID_CID')
    listKey = port.InputList(req).get_element_ListKey()
    
    # request download in SDF format, gzip-compressed
    req = DownloadSoapIn();
    req.set_element_ListKey(listKey)
    req.set_element_eFormat('eFormat_SDF')
    req.set_element_eCompress('eCompress_GZip')
    downloadKey = port.Download(req).get_element_DownloadKey()
    
    # call GetOperationStatus until the operation is finished
    req = GetOperationStatusSoapIn()
    req.set_element_AnyKey(downloadKey)
    status = port.GetOperationStatus(req).get_element_status()
    while (status == 'eStatus_Queued' or status == 'eStatus_Running'):
        logger.info('Waiting for operation to finish...')
        sleep(10)
        status = port.GetOperationStatus(req).get_element_status()
        
    # check status
    if (status == 'eStatus_Success'):
        # get the url of the prepared file
        req = GetDownloadUrlSoapIn()
        req.set_element_DownloadKey(downloadKey)
        url = port.GetDownloadUrl(req).get_element_url()
        fname,hdrs = urlretrieve(url,tempfile.mktemp('.gz'))
        gz = gzip.open(fname)
        data = gz.read()
        gz=None
        os.unlink(fname)
        supplier = Chem.SDMolSupplier()
        supplier.SetData(data)
    else:   # status indicates error
        
        # see if there's some explanatory message
        req = GetStatusMessageSoapIn()
        req.set_element_AnyKey(downloadKey)
        logger.error(port.GetStatusMessage(req).get_element_message())
        supplier = None
    return supplier

if __name__ == '__main__':
    import sys
    suppl=CIDsToSupplier([int(x) for x in sys.argv[1:]])
    for mol in suppl:
        if not mol:
            continue
        print mol.GetProp('_Name'),Chem.MolToSmiles(mol,True)

Reply via email to