I'm trying RDKit to calculate 3D descriptors, but I get significant different descriptors if I read molecules from a SMILES file (and clean/optimize the 3D structure before calculating the descriptors) or if I read the SDF file obtained from exactly the same SMILES file using exactly the same code to optimize the structures.
Scripts attached. Running smiltodesc_check.py produces descr_myfile.txt Running gen3D_check.py and then descr_from_sdf_check.py produces myfile_descr.txt But the two files are significantly different. Why aren't they the same? Which is wrong? JSousa
myfile.smi
Description: application/smil
import rdkit from rdkit import Chem from rdkit.Chem import Descriptors, rdMolDescriptors from rdkit.ML.Descriptors import MoleculeDescriptors from rdkit.Chem.MolStandardize import rdMolStandardize from rdkit.Chem import AllChem md = rdMolStandardize.MetalDisconnector() lfc = rdMolStandardize.LargestFragmentChooser(preferOrganic=True) u = rdMolStandardize.Uncharger() suppl = Chem.rdmolfiles.SmilesMolSupplier('myfile.smi', nameColumn=-1, titleLine=False) foutput = open("descr_myfile.txt", "w") foutput.close() for mol in suppl: mol = md.Disconnect(mol) mol = lfc.choose(mol) mol = u.uncharge(mol) rdMolStandardize.Cleanup(mol) mol = Chem.AddHs(mol) Chem.SanitizeMol(mol) AllChem.EmbedMolecule(mol,useRandomCoords=True) AllChem.MMFFOptimizeMolecule(mol,'MMFF94s',maxIters=5000) descriptorsRDF=rdMolDescriptors.CalcRDF(mol) descriptorsMORSE=rdMolDescriptors.CalcMORSE(mol) descriptorsMW=rdMolDescriptors.CalcExactMolWt(mol) descriptorsWHIM=rdMolDescriptors.CalcWHIM(mol) descriptorsAUTOCORR3D=rdMolDescriptors.CalcAUTOCORR3D(mol) descriptorsGETAWAY=rdMolDescriptors.CalcGETAWAY(mol) descriptorsPEOE=rdMolDescriptors.PEOE_VSA_(mol) descriptorsSMR=rdMolDescriptors.SMR_VSA_(mol) foutput = open("descr_myfile.txt", "a") for item in descriptorsRDF: foutput.write("%s," % item) for item in descriptorsMORSE: foutput.write("%s," % item) for item in descriptorsWHIM: foutput.write("%s," % item) for item in descriptorsAUTOCORR3D: foutput.write("%s," % item) for item in descriptorsGETAWAY: foutput.write("%s," % item) for item in descriptorsPEOE: foutput.write("%s," % item) for item in descriptorsSMR: foutput.write("%s," % item) foutput.write("{:f},".format(descriptorsMW)) mol = Chem.RemoveHs(mol) foutput.write(Chem.MolToSmiles(mol)) foutput.write("\n") foutput.close()
import rdkit from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem.MolStandardize import rdMolStandardize md = rdMolStandardize.MetalDisconnector() lfc = rdMolStandardize.LargestFragmentChooser(preferOrganic=True) u = rdMolStandardize.Uncharger() suppl = Chem.rdmolfiles.SmilesMolSupplier('myfile.smi', nameColumn=-1, titleLine=False) w = Chem.SDWriter('myfile_3D.sdf') for mol in suppl: mol = md.Disconnect(mol) mol = lfc.choose(mol) mol = u.uncharge(mol) rdMolStandardize.Cleanup(mol) mol = Chem.AddHs(mol) Chem.SanitizeMol(mol) AllChem.EmbedMolecule(mol,useRandomCoords=True) AllChem.MMFFOptimizeMolecule(mol,'MMFF94s',maxIters=5000) w.write(mol)
import rdkit from rdkit import Chem from rdkit.Chem import Descriptors, rdMolDescriptors from rdkit.ML.Descriptors import MoleculeDescriptors suppl = Chem.SDMolSupplier('myfile_3D.sdf') foutput = open("myfile_descr.txt", "w") foutput.close() for mol in suppl: descriptorsRDF=rdMolDescriptors.CalcRDF(mol) descriptorsMORSE=rdMolDescriptors.CalcMORSE(mol) descriptorsMW=rdMolDescriptors.CalcExactMolWt(mol) descriptorsWHIM=rdMolDescriptors.CalcWHIM(mol) descriptorsAUTOCORR3D=rdMolDescriptors.CalcAUTOCORR3D(mol) descriptorsGETAWAY=rdMolDescriptors.CalcGETAWAY(mol) descriptorsPEOE=rdMolDescriptors.PEOE_VSA_(mol) descriptorsSMR=rdMolDescriptors.SMR_VSA_(mol) foutput = open("myfile_descr.txt", "a") for item in descriptorsRDF: foutput.write("%s," % item) for item in descriptorsMORSE: foutput.write("%s," % item) for item in descriptorsWHIM: foutput.write("%s," % item) for item in descriptorsAUTOCORR3D: foutput.write("%s," % item) for item in descriptorsGETAWAY: foutput.write("%s," % item) for item in descriptorsPEOE: foutput.write("%s," % item) for item in descriptorsSMR: foutput.write("%s," % item) foutput.write("{:f},".format(descriptorsMW)) mol = Chem.RemoveHs(mol) foutput.write(Chem.MolToSmiles(mol)) foutput.write("\n") foutput.close()
_______________________________________________ Rdkit-discuss mailing list Rdkit-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/rdkit-discuss