I'm trying RDKit to calculate 3D descriptors, but I get significant
different descriptors if I read molecules from a SMILES file (and
clean/optimize the 3D structure before calculating the descriptors) or if I
read the SDF file obtained from exactly the same SMILES file using exactly
the same code to optimize the structures.

Scripts attached.

Running smiltodesc_check.py produces descr_myfile.txt

Running gen3D_check.py and then descr_from_sdf_check.py produces
myfile_descr.txt

But the two files are significantly different.

Why aren't they the same? Which is wrong?

JSousa

Attachment: myfile.smi
Description: application/smil

import rdkit
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem import AllChem

md = rdMolStandardize.MetalDisconnector()
lfc = rdMolStandardize.LargestFragmentChooser(preferOrganic=True)
u = rdMolStandardize.Uncharger()

suppl = Chem.rdmolfiles.SmilesMolSupplier('myfile.smi', nameColumn=-1, titleLine=False)

foutput = open("descr_myfile.txt", "w")
foutput.close()

for mol in suppl:
     mol = md.Disconnect(mol)
     mol = lfc.choose(mol)
     mol = u.uncharge(mol)
     rdMolStandardize.Cleanup(mol)
     mol = Chem.AddHs(mol)
     Chem.SanitizeMol(mol)
     AllChem.EmbedMolecule(mol,useRandomCoords=True)
     AllChem.MMFFOptimizeMolecule(mol,'MMFF94s',maxIters=5000)
     descriptorsRDF=rdMolDescriptors.CalcRDF(mol)
     descriptorsMORSE=rdMolDescriptors.CalcMORSE(mol)
     descriptorsMW=rdMolDescriptors.CalcExactMolWt(mol)
     descriptorsWHIM=rdMolDescriptors.CalcWHIM(mol)
     descriptorsAUTOCORR3D=rdMolDescriptors.CalcAUTOCORR3D(mol)
     descriptorsGETAWAY=rdMolDescriptors.CalcGETAWAY(mol)
     descriptorsPEOE=rdMolDescriptors.PEOE_VSA_(mol)
     descriptorsSMR=rdMolDescriptors.SMR_VSA_(mol)
     foutput = open("descr_myfile.txt", "a")
     for item in descriptorsRDF:
         foutput.write("%s," % item)
     for item in descriptorsMORSE:
         foutput.write("%s," % item)
     for item in descriptorsWHIM:
         foutput.write("%s," % item)
     for item in descriptorsAUTOCORR3D:
         foutput.write("%s," % item)
     for item in descriptorsGETAWAY:
         foutput.write("%s," % item)
     for item in descriptorsPEOE:
         foutput.write("%s," % item)
     for item in descriptorsSMR:
         foutput.write("%s," % item)
     foutput.write("{:f},".format(descriptorsMW))
     mol = Chem.RemoveHs(mol)
     foutput.write(Chem.MolToSmiles(mol))
     foutput.write("\n")
     foutput.close()
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.MolStandardize import rdMolStandardize

md = rdMolStandardize.MetalDisconnector()
lfc = rdMolStandardize.LargestFragmentChooser(preferOrganic=True)
u = rdMolStandardize.Uncharger()

suppl = Chem.rdmolfiles.SmilesMolSupplier('myfile.smi', nameColumn=-1, titleLine=False)

w = Chem.SDWriter('myfile_3D.sdf')

for mol in suppl:
     mol = md.Disconnect(mol)
     mol = lfc.choose(mol)
     mol = u.uncharge(mol)
     rdMolStandardize.Cleanup(mol)
     mol = Chem.AddHs(mol)
     Chem.SanitizeMol(mol)
     AllChem.EmbedMolecule(mol,useRandomCoords=True)
     AllChem.MMFFOptimizeMolecule(mol,'MMFF94s',maxIters=5000)
     w.write(mol)

import rdkit
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors
from rdkit.ML.Descriptors import MoleculeDescriptors

suppl = Chem.SDMolSupplier('myfile_3D.sdf')

foutput = open("myfile_descr.txt", "w")
foutput.close()

for mol in suppl:
     descriptorsRDF=rdMolDescriptors.CalcRDF(mol)
     descriptorsMORSE=rdMolDescriptors.CalcMORSE(mol)
     descriptorsMW=rdMolDescriptors.CalcExactMolWt(mol)
     descriptorsWHIM=rdMolDescriptors.CalcWHIM(mol)
     descriptorsAUTOCORR3D=rdMolDescriptors.CalcAUTOCORR3D(mol)
     descriptorsGETAWAY=rdMolDescriptors.CalcGETAWAY(mol)
     descriptorsPEOE=rdMolDescriptors.PEOE_VSA_(mol)
     descriptorsSMR=rdMolDescriptors.SMR_VSA_(mol)
     foutput = open("myfile_descr.txt", "a")
     for item in descriptorsRDF:
         foutput.write("%s," % item)
     for item in descriptorsMORSE:
         foutput.write("%s," % item)
     for item in descriptorsWHIM:
         foutput.write("%s," % item)
     for item in descriptorsAUTOCORR3D:
         foutput.write("%s," % item)
     for item in descriptorsGETAWAY:
         foutput.write("%s," % item)
     for item in descriptorsPEOE:
         foutput.write("%s," % item)
     for item in descriptorsSMR:
         foutput.write("%s," % item)
     foutput.write("{:f},".format(descriptorsMW))
     mol = Chem.RemoveHs(mol)
     foutput.write(Chem.MolToSmiles(mol))
     foutput.write("\n")
     foutput.close()

_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to