OK, I am almost there!

First, I tried the AllChem.ConstrainedEmbed(qmol, core) function to
generate conformers, where core was a mol object created from the MCS with
3D coordinates copied from template's MCS. But is seems that this functions
works only when core is an intact molecule, because I get this error:

[15:48:22] Explicit valence for atom # 8 C, 5, is greater than permitted
> Traceback (most recent call last):
>   File "/usr/local/bin/align_lig.py", line 392, in <module>
>     embed_conformers2(qmolname, refmolname, writer)
>   File "/usr/local/bin/align_lig.py", line 295, in embed_conformers2
>     Chem.AllChem.EmbedMultipleConfs(patt)
> ValueError: Sanitization error: Explicit valence for atom # 8 C, 5, is
> greater than permitted
>


So I ended up working with distance restraints during optimization. At the
end of the email is the relevant part of the code. The problem is that most
of the geometries are wrong, namely distorted rings, out-of-plane
hydrogens, etc. At first, my code selected as the most representative
conformer of each query compound that one with the lowest RMSD and highest
shape similarity value to the template. But in most of the cases the
selected conformer had wrong geometry. How can I rule out such conformers?
One thought was to measure the energy and select the lowest energy
conformer, but this does not necessarily mean that it will be the one
resembling the template ligand the most.

Lastly and most importantly (!!!), RDKit fails to generate conformers for
many ligands. The error I get is:

[16:29:30] Could not triangle bounds smooth molecule.
> WARNING: No conformations generated for molecule erk000036
>

What does this mean? Am I using an old version of RDKit (2015.03.1, the one
in Ubuntu repositories) ?


Thanks in advance,
Thomas


############### THE RELEVANT CODE ####################


def embed_conformers1(qmolname, refmolname, writer):
>
>     global qmolname2qmol_dict, refmolname2refmol_dict,
> qmolname_refmolname_fullscaffMCS_multidict, rmsthreshold, shapethreshold,
> nconf
>     global forcetol, RMSD_cutoff
>
>     qmol = qmolname2qmol_dict[qmolname]
>     refmolname = qmolname_bestRefmolname_dict[qmolname] # the best
> reference ligand
>     qmol.SetProp('refmolname', refmolname)  # save in its properties the
> name of the template ligand
>     refmol = refmolname2refmol_dict[refmolname]
>     refconf = refmol.GetConformer() # the original reference conformer
>     qconf = qmol.GetConformer() # the original query conformer
>
>     # nsuccess = 0    # number of query compound conformations that passed
> the RMSD and SHAPE SIM threshold criteria
>
>     #
>     # : matchValences=True,completeRingsOnly=True,bondCompare="bondtypes"
>     mcs = qmolname_refmolname_fullscaffMCS_multidict[qmolname][refmolname]
>     patt  = Chem.MolFromSmarts(mcs.smartsString)
>     refatoms = refmol.GetSubstructMatch(patt)  # reference ligand atoms of
> the MCS
>     qatoms  = qmol.GetSubstructMatch(patt)    # query compound atoms of
> the MCS
>
>     aliatoms = []   # the atomMap for the restrained conformer generation
> (include hydrogens)
>     coordmap = {}   # the coordinates of the restrained atoms in conformer
> generation
>
>     # AVENUE4.1: copy the coordinates of the MCS of the reference to the
> query 1st conformer, generate N conformers using distance restraints and
> optimize
>     # their geometry using distance restraints again.
>     for k in range(0, len(refatoms) ):
>         pt3 = refconf.GetAtomPosition(refatoms[k])
>         qconf.SetAtomPosition(qatoms[k],pt3)
>         coordmap[qatoms[k]] = pt3
>         aliatoms.append( [qatoms[k], refatoms[k]] )
>
>     qmol.AddConformer(qconf, assignId=0)    # replace the original query
> conformer with the one which has a MCS with the reference ligand coordinates
>     # Now generate N query conformer and save their conformer IDs
>     newconfIDs = AllChem.EmbedMultipleConfs(qmol, coordMap=coordmap,
> forceTol=forcetol, numConfs=args.N, pruneRmsThresh=RMSD_cutoff)
>
>     if(len(newconfIDs) == 0):
>         print "WARNING: No conformations generated for molecule %s"
> %(qmolname)
>     else:
>         bestshape_sim = -1
>         bestrms = 1e10
>         bestconfID    = -1
>
>         confID_isvalid_dict = []    # confID -> 1 if the conformer is not
> too similar to the other conformers, or 0 otherwise
>         # loop over all generated conformations
>         confIDEnergy_list = []
>         for qconfID in range(0, len(newconfIDs)):
>             # print "DEBUG: optimizing confomrer", qconfID
>
>             confID_isvalid_dict.append(1)
>             # Optimize current conformation
>             # from ConstrainedEmbed() , minimization using constraints
> defined by MCS-matching atoms
>             ff = AllChem.UFFGetMoleculeForceField(qmol, confId=qconfID)
>             # Add distance constraints for minimization
>             for k in range(0, len(refatoms) ):
>                 pt3 = refconf.GetAtomPosition(refatoms[k])
>                 pIdx = ff.AddExtraPoint(pt3.x, pt3.y, pt3.z, fixed=True)
> -1 # -1 because atom indexing starts from 0
>                 # ff.AddDistanceConstraint(pIdx, qatoms[k], 0, 0, 100.) #
> Deprecated function
>                 ff.UFFAddDistanceConstraint(pIdx, qatoms[k], False, 0, 0,
> 1000.)
>             # Perform minimization
>             ff.Initialize()
>             n = 4
>             more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
>             while more and n:
>                 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
>                 n-=1
>             confIDEnergy_list.append( (qconfID, ff.CalcEnergy()) )
>
>             # compare current conformation with previously optimized
> conformations
>             # and ignore it if it is too similar to any other conformation
>             for d in range(0, qconfID): # iterate over all optimized
> conformations so far
>                 if(confID_isvalid_dict[qconfID] != 0 and qconfID != d):
>                     rms = AllChem.AlignMol(qmol, qmol, qconfID, d)
>                     if(rms <= RMSD_cutoff):
>                         confID_isvalid_dict[qconfID] = 0
>                         break
>
>             if(confID_isvalid_dict[qconfID] == 1):
>                 # re-align current conformation to the reference ligand
>                 rms = Chem.rdMolAlign.AlignMol(qmol, refmol,
> atomMap=aliatoms, prbCid=qconfID, refCid=0)
>                 shape_sim = AllChem.ShapeTanimotoDist(refmol, qmol,
> confId2=qconfID)
>
>                 # # write out all conformations which are below at least
> one threshold
>                 # if(rms < rmsthreshold or shape_sim > shapethreshold):
>                 #     print "Writing conformation with RMS %f and SHAPE
> SIM %f to the reference ligand %s and energy %f" %(rms, shape_sim,
> refmolname, confID2energy_dict[qconfID])
>                 #     writer.write(qmol, confId=qconfID)
>                 #     # nsuccess = nsuccess + 1
>
>                 # if shape_sim, rmsd are higher & lower respectively than
> the best conformer so far -> store the current conformer as the best
>                 if(shape_sim > bestshape_sim and rms < bestrms):
>                     bestshape_sim=shape_sim
>                     bestrms=rms
>                     bestconfID=qconfID
>
>         # # Save the best aligned query conformation
>         # if(bestconfID >= 0 and confID_isvalid_dict[bestconfID] != 0 ):
>         #     if(rmsthreshold < 0 and shapethreshold > 1):
>         #         print "Writing best conformation shape-sim %.3f rms
> %.3f" %(bestshape_sim, bestrms)
>         #         writer.write(qmol, confId=bestconfID)
>         # Save the aligned conformer that satisfies the RMSD & shape-sim
> threshold criteria and has the lowest energy
>         confIDEnergy_list.sort(key=itemgetter(1))
>         # print "DEBUG: confIDEnergy_list=", confIDEnergy_list
>         bestconfID = confIDEnergy_list[0][0]
>         bestconfenergy = confIDEnergy_list[0][1]
>         rms = Chem.rdMolAlign.AlignMol(qmol, refmol, atomMap=aliatoms,
> prbCid=bestconfID, refCid=0)
>         shape_sim = AllChem.ShapeTanimotoDist(refmol, qmol,
> confId2=bestconfID)
>         print "Writing best conformation of %s aligned to %s, with energy
> %f, RMSD %f and shape-similarity %f." % (qmolname, refmolname,
> bestconfenergy, rms, shape_sim)
>         writer.write(qmol, confId=bestconfID)
>
>         #         nsuccess = 1
>         # elif (nsuccess == 0):
>         #     print "Error when generating conformation"
>
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to