Hi Markus, I did a bit of exploring of the Pubchem Power User Gateway this morning and discovered that they have a SOAP interface: http://pubchem.ncbi.nlm.nih.gov/pug_soap/pug_soap_help.html They even include a bit of help and some examples for using that interface from Python: http://pubchem.ncbi.nlm.nih.gov/pug_soap/examples/python_zsi/python_zsi.html
This provides a pretty straightforward way of interacting with the PUG from Python. I made some minor modifications to one of the sample scripts provided in order to come up with the attached script. It shows how to convert a list of pubchem CIDs into an RDKit SDMolSupplier. You will need to have ZSI installed and to run wsdl2py as indicated on the above page. This is, perhaps, a place to start. -greg
#!/usr/bin/env python # # General Python/ZSI hints for PUG: # http://pubchem.ncbi.nlm.nih.gov/pug_soap/examples/python_zsi/python_zsi.html # adapted from: # http://pubchem.ncbi.nlm.nih.gov/pug_soap/examples/python_zsi/DownloadCIDs.py # bring in the ZSI-generated interface from PUG_services import * # other modules/functions from time import sleep import tempfile,gzip,os from urllib import urlretrieve from RDLogger import logger logger=logger() import Chem def CIDsToSupplier(cids): # get a PUG SOAP port instance loc = PUGLocator() port = loc.getPUGSoap() # start with a list of CIDs req = InputListSoapIn() req.set_element_ids(req.new_ids()) req.get_element_ids().set_element_int(cids) req.set_element_idType('eID_CID') listKey = port.InputList(req).get_element_ListKey() # request download in SDF format, gzip-compressed req = DownloadSoapIn(); req.set_element_ListKey(listKey) req.set_element_eFormat('eFormat_SDF') req.set_element_eCompress('eCompress_GZip') downloadKey = port.Download(req).get_element_DownloadKey() # call GetOperationStatus until the operation is finished req = GetOperationStatusSoapIn() req.set_element_AnyKey(downloadKey) status = port.GetOperationStatus(req).get_element_status() while (status == 'eStatus_Queued' or status == 'eStatus_Running'): logger.info('Waiting for operation to finish...') sleep(10) status = port.GetOperationStatus(req).get_element_status() # check status if (status == 'eStatus_Success'): # get the url of the prepared file req = GetDownloadUrlSoapIn() req.set_element_DownloadKey(downloadKey) url = port.GetDownloadUrl(req).get_element_url() fname,hdrs = urlretrieve(url,tempfile.mktemp('.gz')) gz = gzip.open(fname) data = gz.read() gz=None os.unlink(fname) supplier = Chem.SDMolSupplier() supplier.SetData(data) else: # status indicates error # see if there's some explanatory message req = GetStatusMessageSoapIn() req.set_element_AnyKey(downloadKey) logger.error(port.GetStatusMessage(req).get_element_message()) supplier = None return supplier if __name__ == '__main__': import sys suppl=CIDsToSupplier([int(x) for x in sys.argv[1:]]) for mol in suppl: if not mol: continue print mol.GetProp('_Name'),Chem.MolToSmiles(mol,True)

