I am using AllChem.EmbedMultipleConfs to generate conformers.  I noticed
that conformers in the result set are very similar to each other.  I wrote
a test script to calculate RMS for the conformers and may have found a
bug.  Looks like AllChem.EmbedMultipleConfs is calculating RMS using all
atoms, including Hs, when pruning.  The documents says pruning is based on
heavy atoms RMS.

Attached is my test script and an input file that illustrates the problem.
In this script, 50 conformers are generated and pruneRmsThresh is 0.5.
Pairwise RMS between conformers are >0.5 when H atoms are included.
Pairwise RMS are <0.5 for many conformers when only heavy atoms are


JW Feng, Ph.D.
Denali Therapeutics Inc.
151 Oyster Point Blvd, 2nd Floor, South San Francisco, CA 94080 | (650)

Attachment: 21843_confs_output.sdf
Description: Binary data

Attachment: 21843_input.sdf
Description: Binary data

#!/usr/bin/env python
from __future__ import print_function
from __future__ import division
import sys
import os
import argparse
from rdkit import Chem
from rdkit.Chem import AllChem

# check to see if there are invalid properties
def main(argv=None):
    parser = argparse.ArgumentParser()

    # optional requirements, "required=True" makes it NOT optional
    parser.add_argument("-in", dest="infile", required=True, help="input file")
    parser.add_argument("-out", dest="outfile", required=True, help="output file")
    parser.add_argument("-rmsd", dest="prune_rmsd", type=float, default=0.5,
                        help="RMSD criteria for generating unique confomers."
                             " default=0.5")
    parser.add_argument("-confs", dest="confs", type=int, default=50, help="number of confs to generate, default=50")
    args = None
        args = parser.parse_args(argv)
        # useful parser functions
        sys.stderr.write("Input parameters were incorrect, please check help messages\n")
        return 2

    suppl = Chem.ForwardSDMolSupplier(args.infile)
    sd_writer = Chem.SDWriter(args.outfile)

    for mol in suppl:
        if mol is None:
            print("skipping mol", file=sys.stderr)

        mol = Chem.AddHs(mol)
        #EmbedMultipleConfs((Mol) mol[, (int) numConfs = 10[, (int) maxAttempts = 0[, (int) randomSeed = -1[, (bool)
        #  clearConfs = True[, (bool) useRandomCoords = False[, (float) boxSizeMult = 2.0[, (bool) randNegEig = True[, (int)
        #  numZeroFail = 1[, (float) pruneRmsThresh = -1.0[, (dict) coordMap = {}[, (float) forceTol = 0.001[, (bool)
        #  ignoreSmoothingFailures = False[, (bool) enforceChirality = True[, (int) numThreads = 1[, (bool)
        #  useExpTorsionAnglePrefs = False[, (bool) useBasicKnowledge = False[, (bool) printExpTorsionAngles = False
        # ]]]]]]]]]]]]]]]]]) -> _vecti:
        conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=args.confs, maxAttempts=500, pruneRmsThresh=args.prune_rmsd,
                                              randomSeed=1, numThreads=0, enforceChirality=True,
                                              useExpTorsionAnglePrefs=True, useBasicKnowledge=True)
        # calculate RMSD between conformers, all should be greater than args.prune_rmsd

        sys.stderr.write("RMSD calculated over all atoms, including H\n")
        for id1 in conf_ids:
            for id2 in conf_ids:
                if id1 != id2:
                    #rms = AllChem.GetBestRMS(mol, mol, id1, id2, maps)
                    rms = AllChem.GetConformerRMS(mol, id1, id2)
                    if rms < args.prune_rmsd:
                        sys.stderr.write("RMSD between conf %d and %d: %.2f\n" % (id1, id2, rms))

        sys.stderr.write("RMSD calculated over heavy atoms\n")
        mol = AllChem.RemoveHs(mol)
        for id1 in conf_ids:
            for id2 in conf_ids:
                if id1 != id2:
                    #rms = AllChem.GetBestRMS(mol, mol, id1, id2, maps)
                    rms = AllChem.GetConformerRMS(mol, id1, id2)
                    if rms < args.prune_rmsd:
                        sys.stderr.write("Heavy atom RMSD between conf %d and %d: %.2f\n" % (id1, id2, rms))

        for id in conf_ids:
            sd_writer.write(mol, confId=id)
        sys.stderr.write("Generated %d conformers\n" % len(conf_ids))

if __name__ == "__main__":
    # Let main()'s return value specify the exit status.
Developer Access Program for Intel Xeon Phi Processors
Access to Intel Xeon Phi processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today.http://sdm.link/intel
Rdkit-discuss mailing list

Reply via email to