Hi Greg,

here the first code prototype of the article mappiing in rdkit: for the moment 
I have some issues with angle the rest start to be ok


Can you look at it and see if my interpretation is ok ?


BR,


Guillaume


---------------
import sys
import re
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdMolTransforms
from numpy import *
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
import matplotlib.pyplot as plt


def Occ(smart,mol):
    func = Chem.MolFromSmarts(smart)
    maps = mol.GetSubstructMatches(func)
    return len(maps)


def Angles(mol):
    tors = 
mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]-[#6,#7,#16]~[#6,#7,#16]-[#6]'))
    conf = mol.GetConformer()
    A60=0
    A90=0
    A102=0
    for tor in tors:
        Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2], tor[3]))
        if Angle<=60:
            A60+=1
        elif Angle>60 and Angle<=90:
            A90+=1
        elif Angle>90 and Angle<=102:
            A102+=1
    return (A60,A90,A102)

# 3 molecules not parse with rdkit ? so can explain some occurence diff
# Guanidinium chloride
# Creatine
# Arginine


def SumdHfCombustionProducts(mol):
#(data extracted from Domalski1 and Skinner2) dHf(kJ/mol:
    dHfH3BO3=-1094.7226
    dHfCO2 =-393.7727
    dHfH2O=-286.0212
    dHfH2SO4=-888.405
    dHfH3PO4=-1279.90476
    dHfSiO2=-847.827
    dHfHF=-316.6435
    dHfHCl=-165.5879
    dHfHBr=-121.4172
    dHfHI=-142.0
    NB=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#5]')))
    NC=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]')) )
    NS=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#16]')))
    NP=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#15]')))
    NSi=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#14]')))
    NF=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#9]')))
    NCl=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#17]')))
    NBr=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#35]')))
    NI=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#53]')))
    
NH2O=(len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#1]')))-3*NB-2*NS-3*NP-NF-NCl-NBr-NI)/2;
    return 
NB*dHfH3BO3+NC*dHfCO2+NH2O*dHfH2O+NS*dHfH2SO4+NP*dHfH3PO4+NSi*dHfSiO2+NF*dHfHF+NCl*dHfHCl+NBr*dHfHBr+NI*dHfHI
# equation to convert Hc to Hf => HeatOfFormation = SumdHfCombustionProducts - 
HeatOfCombustion;


L=['[BX3H0]([#6])([#6])[#6]',
'[CX4H3][#5]',
'[CX4H3][#6]',
'[CX4H3][#7;+0]',
'[CX4H3][#7+]',
'[CX4H3][#8]',
'[CX4H3][#16]',
'[CX4H3][#15]',
'[CX4H3][#14]',
'[CX4H2]([#5])[#6]',
'[CX4H2]([#6])[#6]',
'[CX4H2]([#6])[#7;+0]',
'[CX4H2]([#6])[#7+]',
'[CX4H2]([#6])[#8]',
'[CX4H2]([#6])[#16]',
'[CX4H2]([#6])[#15]',
'[CX4H2]([#6])[#9]',
'[CX4H2]([#6])[#17]',
'[CX4H2]([#6])[#35]',
'[CX4H2]([#6])[#53]',
'[CX4H2]([#6])[#14]',
'[CX4H2]([#7])[#7;+0]',
'[CX4H2]([#7])[#7+]',
'[CX4H2]([#8])[#7]',
'[CX4H2]([#8])[#8]',
'[CX4H2]([#8])[#17]',
'[CX4H2]([#16])[#16]',
'[CX4H1]([#6])([#6])[#6]',
'[CX4H1]([#6])([#6])[#7;+0]',
'[CX4H1]([#6])([#6])[#7+]',
'[CX4H1]([#6])([#6])[#8]',
'[CX4H1]([#6])([#6])[#16]',
'[CX4H1]([#6])([#6])[#9]',
'[CX4H1]([#6])([#6])[#17]',
'[CX4H1]([#6])([#6])[#35]',
'[CX4H1]([#6])([#6])[#53]',
'[CX4H1]([#6])([#7])[#7;+0]',
'[CX4H1]([#6])([#7])[#7+]',
'[CX4H1]([#6])([#8])[#8]',
'[CX4H1]([#6])([#9])[#9]',
'[CX4H1]([#6])([#9])[#17]',
'[CX4H1]([#6])([#17])[#17]',
'[CX4H1]([#6])([#17])[#35]',
'[CX4H1]([#6])([#35])[#35]',
'[CX4H1]([#7])([#7])[#7+]',
'[CX4H1]([#8])([#8])[#8]',
'[CX4H1]([#8])([#9])[#9]',
'[CX4H0]([#6])([#6])([#6])[#6]',
'[CX4H0]([#6])([#6])([#6])[#7;+0]',
'[CX4H0]([#6])([#6])([#6])[#7+]',
'[CX4H0]([#6])([#6])([#6])[#8]',
'[CX4H0]([#6])([#6])([#6])[#16]',
'[CX4H0]([#6])([#6])([#6])[#9]',
'[CX4H0]([#6])([#6])([#6])[#17]',
'[CX4H0]([#6])([#6])([#6])[#35]',
'[CX4H0]([#6])([#6])([#6])[#53]',
'[CX4H0]([#6])([#6])([#7])[#7+]',
'[CX4H0]([#6])([#6])([#8])[#8]',
'[CX4H0]([#6])([#6])([#9])[#9]',
'[CX4H0]([#6])([#6])([#9])[#17]',
'[CX4H0]([#6])([#6])([#17])[#17]',
'[CX4H0]([#6])([#7])([#7])[#7+]',
'[CX4H0]([#6])([#8])([#8])[#8]',
'[CX4H0]([#6])([#8])([#9])[#9]',
'[CX4H0]([#6])([#9])([#9])[#9]',
'[CX4H0]([#6])([#9])([#9])[#17]',
'[CX4H0]([#6])([#9])([#9])[#35]',
'[CX4H0]([#6])([#9])([#17])[#17]',
'[CX4H0]([#6])([#9])([#17])[#35]',
'[CX4H0]([#6])([#17])([#17])[#17]',
'[CX4H0]([#6])([#35])([#35])[#35]',
'[CX4H0]([#7])([#7])([#7])[#7+]',
'[CX4H0]([#8])([#8])([#8])[#8]',
'[CX4H0]([#8])([#9])([#9])[#9]',
'[CX3H2]=[#6]',
'[CX3H2]=[#7]',
'[CX3H1](=[#6])[#6]',
'[CX3H1]([#6])=[#7]',
'[CX3H1]([#6])=[#8]',
'[CX3H1](=[#6])[#7;+0]',
'[CX3H1](=[#6])[#7+]',
'[CX3H1](=[#6])[#8]',
'[CX3H1](=[#6])[#16]',
'[CX3H1](=[#6])[#9]',
'[CX3H1](=[#6])[#17]',
'[CX3H1](=[#6])[#35]',
'[CX3H1](=[#6])[#14]',
'[CX3H1]([#7])=[#7]',
'[CX3H1]([#7])=[#8]',
'[CX3H1](=[#7])[#8]',
'[CX3H1](=[#8])[#8]',
'[CX3H1]([#16])=[#7]',
'[CX3H0](=[#6])([#6])[#6]',
'[CX3H0]([#6])([#6])=[#7]',
'[CX3H0]([#6])([#6])=[#8]',
'[CX3H0](=[#6])([#6])[#7]',
'[CX3H0](=[#6])([#6])[#8]',
'[CX3H0](=[#6])([#6])[#16]',
'[CX3H0](=[#6])([#6])[#9]',
'[CX3H0](=[#6])([#6])[#17]',
'[CX3H0]([#6])([#7])=[#7]',
'[CX3H0]([#6])([#7])=[#8]',
'[CX3H0]([#6])([#7])=[#16]',
'[CX3H0]([#6])(=[#8])[#8]',
'[CX3H0]([#6])(=[#8])[#8-]',
'[CX3H0]([#6])(=[#8])[#16]',
'[CX3H0]([#6])(=[#8])[#9]',
'[CX3H0]([#6])(=[#8])[#17]',
'[CX3H0]([#6])(=[#8])[#35]',
'[CX3H0]([#6])(=[#8])[#53]',
'[CX3H0](=[#6])([#7])[#7]',
'[CX3H0](=[#6])([#7+])[#8]',
'[CX3H0](=[#6])([#8])[#9]',
'[CX3H0](=[#6])([#9])[#9]',
'[CX3H0](=[#6])([#9])[#17]',
'[CX3H0](=[#6])([#17])[#17]',
'[CX3H0](=[#6])([#53])[#53]',
'[CX3H0](=[#7])([#7])[#7]',
'[CX3H0]([#7])([#7])=[#8]',
'[CX3H0]([#7])([#7])=[#16]',
'[CX3H0](=[#7])([#7])[#8]',
'[CX3H0]([#7])(=[#8])[#8]',
'[CX3H0]([#7])(=[#8])[#16]',
'[CX3H0]([#7])(=[#16])[#16]',
'[CX3H0](=[#8])([#8])[#8]',
'[CX3H0](=[#8])([#8])[#17]',
'[cX3H1](:[c]):[c]',
'[cX3H1](:[c]):[n;+0]',
'[cX3H1](:[c]):[n+]',
'[cX3H1](:[n]):[n]',
'[cX3H0]([c])([c])[c]',
'[cX3H0]([C])([c])[c]',
'[cX3H0]([C])([c])[n+0]',
'[cX3H0]([C])([c])[n+]',
'[cX3H0]([c])([c])[N;+0]',
'[cX3H0]([c])([c])[N+]',
'[cX3H0]([c])([c])[n]',
'[cX3H0]([c])([c])[O]',
'[cX3H0]([c])([c])[S]',
'[cX3H0]([c])([c])[F]',
'[cX3H0]([c])([c])[Cl]',
'[cX3H0]([c])([c])[Br]',
'[cX3H0]([c])([c])[I]',
'[cX3H0]([c])([c])[Si]',
'[cX3H0]([c])([n])[N]',
'[cX3H0]([c])([n])[O]',
'[cX3H0]([N])([n])[n]',
'[cX3H0]([n])([n])[n]',
'[cX3H0]([n])([n])[Cl]',
'[CX2H1]#[#6]',
'[CX2H0](#[#6])[#6]',
'[CX2H0]([#6])#[#7]',
'[CX2H0](#[#6])[#7]',
'[CX2H0](#[#6])[#17]',
'[CX2H0](#[#7])[#7]',
'[CX2H0](#[#7])[#8]',
'[CX2H0](=[#6])=[#6]',
'[CX2H0](=[#6])=[#7]',
'[CX2H0](=[#6])=[#8]',
'[CX2H0](=[#7])=[#8]',
'[CX2H0](=[#7])=[#16]',# 161
'[NX3H2;^3][#6;^3]',
'[NX3H2;^2][#6;^2]',
'[NX3H2;^3][#7;^3]',
'[NX3H2;^2][#7;^2]',
'[NX3H2][#16]',# 166
'[NX3H1;^3]([#6;^3])[#6;^3]',
'[NX3H1;^2]([#6;^2])[#6;^3]',
'[NX3H1;^2]([#6;^2,^1])[#6;^2,^1]',
'[NX3H1;^3]([#6;^3])[#7;^3]',
'[$([NX3H1;^2]([#6;^2])[#7;^3]),$([NX3H1;^2]([#6;^3])[#7;^2])]',
'[NX3H1;^2]([#6;^2])[#7;^2]',
'[NX3H1;^2]([#6;^2])[#7+;^2]',
'[NX3H0;^3]([#6;^3])([#6;^3])[#6;^3]',
'[NX3H0;^2]([#6;^2])([#6;^3])[#6;^3]',
'[NX3H0;^2]([#6;^2])([#6;^2])[#6;^3]',
'[NX3H0;^2]([#6;^2])([#6;^2])[#6;^2]',
'[NX3H0;^3]([#6;^3])([#6;^3])[#7;^3]',
'[$([NX3H0;^2]([#6;^2])([#6;^3])[#7;^3]),$([NX3H0;^2]([#6;^3])([#6;^3])[#7;^2])]',
'[$([NX3H0;^2]([#6;^2])([#6;^3])[#7+;^3]),$([NX3H0;^2]([#6;^3])([#6;^3])[#7+;^2])]',
'[$([NX3H0;^2]([#6;^2])([#6;^2])[#7;^3]),$([NX3H0;^2]([#6;^2])([#6;^3])[#7;^2])]',
'[$([NX3H0;^2]([#6;^2])([#6;^2])[#7+;^3]),$([NX3H0;^2]([#6;^2])([#6;^3])[#7+;^2])]',
'[NX3H0;^2]([#6;^2])([#6;^2])[#7;^2]',
'[NX3H0]([#6])([#6])[#14]',# 184
'[NX3H0]([#6])([#6])[#17]',
'[NX3H0]([#6])([#6])[#35]',
'[$([NX3H0;^2]([#6;^2])([#7;^2])[#7]),$([NX3H0;^2]([#6;^3])([#7;^2])[#7;^2])]',
'[NX3H0;^2]([#6;^2])([#7;^2])[#7;^2]',
'[NX2H1]=[#6]',# 189
'[NX2H0](=[#6])[#6]',
'[NX2H0]([#6])=[#7;+0]',
'[NX2H0]([#6])=[#7;+1]',
'[NX2H0](=[#6])[#7;+0]',
'[NX2H0](=[#6])[#7;+1]',
'[NX2H0]([#6])=[#8]',
'[NX2H0](=[#6])[#8]',
'[NX2H0]([#7])=[#7]',
'[NX2H0]([#7])=[#8]',
'[NX2H0]([#8])=[#8]',
'[nX3H2][c+]',# 200
'[nX3H1]([#6]):[c+]',
'[nX3H0](:[#6+])([#6])[#6]',
'[nX2H0](:[#6]):[#6]',
'[nX2H0](:[#6]):[#7]',
'[NH3+;^3][#6]',
'[NH2+;^3]([#6])[#6]',
'[NH+;^3]([#6])([#6])[#6]',
'[N+;^2](=[#8])([#6])[O-]',
'[N+;^2](=[#7])([#6])[O-]',
'[N+;^2](=[#8])([#7])[O-]',
'[N+;^2](=[#8])([#8])[O-]',
'[nH+X3H1]([c])[c]',
'[N+X2H0;^1]([C])#[C-]',
'[N+X2H0;^1](=[#7])=[#7-]',
'[OX2H1;^3][#6;^3]',
'[OX2H1;^2][#6;^2]',
'[OX2H1;^2][#7;^2]',
'[OX2H1][#8]',#218
'[OX2H1][#16]',# 219
'[OX2H1][#14]',# 220
'[OX2H0;^3]([#6;^3])[#6;^3]',
'[OX2H0;^2]([#6;^2])[#6;^3]',
'[OX2H0;^2]([#6;^2])[#6;^2]',
'[$([OX2H0;^2]([#6;^2])[#7;^3]),$([OX2H0;^2]([#6;^3])[#7;^2])]',
'[$([OX2H0;^2]([#6;^2])[#7+;^3]),$([OX2H0;^2]([#6;^3])[#7+;^2])]',
'[OX2H0;^2]([#6;^2])[#7;^2]',
'[OX2H0;^3]([#6;^3])[#8]',
'[OX2H0;^2]([#6;^2])[#8]',
'[OX2H0]([#6])[#16]',# 229
'[OX2H0;^3]([#6;^3])[#15]',
'[OX2H0;^2]([#6;^2])[#15]',
'[OX2H0]([#6])[#14]',#232
'[OX2H0]([#14])[#14]',# 233
'[PX3H0]([#6])([#6])[#6]',# 234-237
'[PX4H0]([#6])([#6])(=[#8])[#8]',
'[PX4H0]([#6])([#6])([#6])=[#8]',
'[PX4H0](=[#8])([#8])([#8])[#8]',
'[#16H1;^3][#6;^3]',
'[#16H1;^3][#6;^2]',
'[#16H0;^3]([#6;^3])[#6;^3]',
'[#16H0;^3]([#6;^2])[#6;^3]',
'[#16H0;^3]([#6;^2])[#6;^2]',
'[#16H0;^3]([#6;^3])[#16]',
'[#16H0;^3]([#6;^2])[#16]',
'[SX3H0]([#6])([#6])=[#8]',# 245-267
'[SX4H0]([#6])([#6])(=[#8])=[#8]',
'[SX4H0]([#6])(=[#8])(=[#8])[#8-]',
'[SX4H0]([#7])(=[#8])(=[#8])[#8]',
'[SX3H0](=[#8])([#8])[#8]',
'[SX4H0](=[#8])(=[#8])([#8])[#8]',
'[SX4H0](=[#8])(=[#8])([#8])[#9]',
'[SX4H0](=[#8])(=[#8])([#8])[#17]',
'[SiX4H3][#6]',
'[SiX4H2]([#6])[#6]',
'[SiX4H1]([#6])([#6])[#6]',
'[SiX4H1]([#6])([#6])[#17]',
'[SiX4H1]([#6])([#17])[#17]',
'[SiX4H1]([#8])([#8])[#8]',
'[SiX4H0]([#6])([#6])([#6])[#6]',
'[SiX4H0]([#6])([#6])([#6])[#7]',
'[SiX4H0]([#6])([#6])([#6])[#8]',
'[SiX4H0]([#6])([#6])([#6])[#17]',
'[SiX4H0]([#6])([#6])([#6])[#35]',
'[SiX4H0]([#6])([#6])([#8])[#8]',
'[SiX4H0]([#6])([#6])([#17])[#17]',
'[SiX4H0]([#6])([#8])([#8])[#8]',
'[SiX4H0]([#6])([#17])([#17])[#17]']


Coef=[-4309.05, 439.88, -773.83, -1199.1, -817.94, -1112.98, -1396.74, 
-1052.64, -1008.77, 553.89, -652.47, -1074.2, -705.22, -980.99, -1274.78,
-852.22, -623.15, -617.4, -623.3, -685.52, -932.85, -1480.52, -807.51, 
-1375.72, -1279.46, -951.95, -1932.88, -529.62, -957.93, -575.78, -850.09,
-1152.31, -504.42, -497.94, -500.7, -558.92, -1363.17, -672.56, -1153.93, 
-433.94, -472.96, -494.62, -518.18, -476.37, -870.19, -1433.08, -729.48,
-403.8, -813.97, -426.89, -730.08, -1023.12, -179.93, -361.21, -362.53, -432.3, 
-626.56, -1004.06, -320.26, -318.84, -356.73, -746.41, -1284.92,
-649.83, -243.86, -302.73, -320.46, -323.43, -275.67, -366.35, -339.39, 
-896.07, -1580.14, -531.65, -702.52, -928.8, -566.63, -762.24, -396.09,
-958.41, -595.08, -747.98, -1161.32, -546.98, -555.33, -573.39, -833.05, 
-1134.46, -762.26, -916.53, -545.94, -1372.72, -433.99, -630.4, -248.77,
-825.51, -602.48, -1031.71, -439.03, -397.75, -991.99, -621.43, -1460.28, 
-389.6, -534.91, -844.48, -174.28, -205.8, -204.22, -281.7, -1249.51,
-678.42, -430.57, -415.97, -359.75, -407.94, -544.25, -1416.91, -1022.83, 
-1839.83, -1202.52, -772.08, -1488.48, -2092.99, -546.67, -338.25,
-543.64, -776.86, -497.1, -1022.16, -407.72, -413.58, -630.62, -361.54, 
-844.05, -494.97, -644.82, -619.12, -1044.73, -401.83, -393.22,
-399.84, -468.14, -686.6, -1064.47, -835.9, -1260.9, -583.18, -828.48, -653.92, 
-506.41, -508.61, -1006.69, -512.21, -912.2, -801.89,
-554.47, -741.19, -323.55, -433.06, -1250.0, 144.43, 191.61, -321.73, -263.0, 
-356.54, 657.84, 707.92, 714.3, 209.21, 254.66, 274.14,
 382.93, 1170.07, 1214.78, 1214.87, 1229.16, 739.41, 781.06, 919.58, 771.9, 
879.1, 787.25, 750.91, 747.48, 769.45, 384.22, 424.65,
-7.7, 550.75, 310.59, 237.35, 119.1, 302.14, 396.97, 192.01, -89.71, -43.12, 
356.35, -122.03, 814.57, 1314.74, 412.85, 134.45,
 259.93, 381.05, 531.03, 116.31, 139.32, -143.13, 436.53, 297.18, -520.51, 
-156.85, 389.05, 283.41, -67.43, -30.25, -2.73, 209.41,
 778.41, 676.08, 540.34, 0.0, 0.0, 242.71, 377.06, 233.2, 309.6, 386.05, 
225.09, 392.6, 34.75, 0.0, -128.03, -172.02, 8.54, -110.34,
-117.44, 629.97, 613.04, 652.85, 25.47, 13.65, 764.28, 1000.31, 113.62, 2.73, 
-121.52, 89.44, -120.52, -114.1, -1004.63, -581.65,
-193.18, -561.69, -414.48, -463.15, 130.9, 0.0, -97.16, 70.09, 57.55, 38.94, 
-0.66, 8.62, -133.9, 1.25, -1.22, -1.09, -38.45, -25.28, -5.65]

def 
findHBonds(m,confId=-1,possiblePartners='[#8,#7,#9]',possibleHs='[#1][#8,#7,#16]',distThresh=2.5):
    conf = m.GetConformer(confId)
    partners =[x[0] for x in  
m.GetSubstructMatches(Chem.MolFromSmarts(possiblePartners))]
    hs=  [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possibleHs))]
    res = []
    for h in hs:
        ph = conf.GetAtomPosition(h)
        for partner in partners:
            if m.GetBondBetweenAtoms(h,partner) is not None:
                continue
            d = conf.GetAtomPosition(partner).Distance(ph)
            if d<=distThresh:
                res.append((h,partner,d))
    return res

def 
findintramolecularHH(m,confId=-1,possiblePartners='[#1][*]',possibleHs='[#1]',distThresh=2.5):
    conf = m.GetConformer(confId)
    partners =[x[0] for x in  
m.GetSubstructMatches(Chem.MolFromSmarts(possiblePartners))]
    linkers =[x[1] for x in  
m.GetSubstructMatches(Chem.MolFromSmarts(possiblePartners))]
    hs=  [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possibleHs))]
    res = []
    for h in hs:
        ph = conf.GetAtomPosition(h)
        for partner,linker in zip(partners,linkers):
            if m.GetBondBetweenAtoms(h,linker) is not None:
                continue
            d = conf.GetAtomPosition(partner).Distance(ph)
            if d<=distThresh:
                res.append((h,partner,d))
    return res


def HeatOfFormation(mol,Heat_of_combustion):
    return SumdHfCombustionProducts(mol)-Heat_of_combustion

def HeatOfCombustion(mol,printing):
    C = zeros(len(Coef))
    for j in range(0,len(L)):
        c= Occ(L[j],mol)
        C[j]=c
    C[267]=len(findHBonds(mol,distThresh=1.74))  #Donor_Acceptor(mol) # need to 
check that parameter!
    #Dist=HHdistances(mol)
    C[268]=len(findintramolecularHH(mol,distThresh=1.999)) #Dist[0]
    C[269]=len(findintramolecularHH(mol,distThresh=2.3))-C[268]#Dist[1]
    # angle issue!
    A=Angles(mol)
    C[270]=A[0]
    C[271]=A[1]
    C[272]=A[2]
    if printing:
        print C
    return (C,sum(C*array(Coef)))


def maincomputeOpt(mol,V):
    mol=Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol)
    AllChem.MMFFOptimizeMolecule(mol)
    H = HeatOfCombustion(mol,0)
    #Hf= HeatOfFormation(mol,H[1])
    print H[1]-float(mol.GetProp('deltaH(c)'))

    #print Hf/4.1843 to convert in kcal/mol
    #V.append(H[0])

def maincomputenoOpt(mol,V):
    H = HeatOfCombustion(mol,0)
    #Hf= HeatOfFormation(mol,H[1])
    #h.write(str(H)+","+str(abs(H-float(mol.GetProp('deltaH(c)'))))+'\n')
    #print H[1]-float(mol.GetProp('deltaH(c)'))
    V.append(H[0])
    return H[1]

#h = open('Molecules_Score.txt','w')
suppl = 
Chem.SDMolSupplier('/Users/mbp/Downloads/molecules-20-18279-s001/Compounds List 
for Heat-of-Combustion Calculations.sdf',removeHs=False)
V=[]
j=0
i=0
K=[]
for mol in suppl:
    i=i+1
    if i>3000:
        break
    try:
        H=maincomputenoOpt(mol,V)
        #maincomputeOpt(mol,V)
        K.append(H-float(mol.GetProp('deltaH(c)')))
    except:
        1


L.append("Hdonor")
L.append("DH2")
L.append("DH23")
L.append("A60")
L.append("A90")
L.append("A102")

# look at the global occurence of the smarts & NNI:
tV=[list(i) for i in zip(*V)]
j=0
for tv,lv in zip(tV,L):
    j=j+1
    x=0
    for itv in tv:
        x +=int(itv)
    if x>0:
        1
        print j,lv,x
    if x==0:
        #print j,lv
        1


# distribution of diff Hcalc vs Hofficial
fig,ax = plt.subplots()
plt.ioff()

n,bins,patches = ax.hist(K,bins=100)
ax.set_xlabel('Dihedral Angle')
ax.set_ylabel('Counts');
plt.show()

?





Dr. Guillaume GODIN
Principal Scientist
Chemoinformatic & Datamining
Innovation
CORPORATE R&D DIVISION
DIRECT LINE +41 (0)22 780 3645
MOBILE          +41 (0)79 536 1039
        Firmenich SA
        RUE DES JEUNES 1 | CASE POSTALE 239 | CH-1211 GENEVE 8

________________________________
De : Greg Landrum <greg.land...@gmail.com>
Envoyé : mercredi 14 septembre 2016 14:14
À : Guillaume GODIN
Cc : RDKit Discuss
Objet : Re: [Rdkit-discuss] Angstroms Hydrogen bonding



On Wed, Sep 14, 2016 at 4:16 AM, Guillaume GODIN 
<guillaume.go...@firmenich.com<mailto:guillaume.go...@firmenich.com>> wrote:

Your solution is perfect!


glad it worked

I am currently implementing this article 
"http://www.mdpi.com/1420-3049/20/10/18279"; using RDKit.


Interesting, I hadn't seen that one.


It's now almost done, I need to check the results on Heat of Formation now and 
understand why my code to get Angle in torsions is not working.

def Angles(mol):
    tors = 
mol.GetSubstructMatches(Chem.MolFromSmarts('[C]-[C;O;S]-[C;O;S]-[C]'))
    conf = mol.GetConformer()
    A60=0
    A90=0
    A102=0
    for tor in tors:
        Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2], tor[3]))
        if Angle<=60:
            A60+=1
        elif Angle>60 and Angle<=90:
            A90+=1
        elif Angle>90 and Angle<=102:
            A102+=1
    return (A60,A90,A102)

Comparing to the publication on Heat of Formation page 18294, I have to much 
more occurences (ie. a factor of 50-100 more then expected).

I suspect that the problem is that the SMARTS you specify: 
'[C]-[C;O;S]-[C;O;S]-[C]' won't actually match anything. [C;O;S] means 
'aliphatic C AND aliphatic O AND aliphatic S'. It think you like mean 
'[C,O,S]', which is 'aliphatic C OR aliphatic O OR aliphatic S'.
You might also want to think about whether or not you should replace C, O, S 
(all of which only match aliphatic elements) with #6, #8, #16 (which can match 
aliphatic or aromatic) throughout the SMARTS

When it will be done I will share the code with you guys.


Cool, I look forward to trying that out.

Best,
-greg


BR,


Guillaume

________________________________
De : Greg Landrum <greg.land...@gmail.com<mailto:greg.land...@gmail.com>>
Envoyé : mercredi 14 septembre 2016 12:16
À : Guillaume GODIN
Cc : RDKit Discuss
Objet : Re: [Rdkit-discuss] Angstroms Hydrogen bonding

Hi Guillaume,

On Tue, Sep 13, 2016 at 10:12 PM, Guillaume GODIN 
<guillaume.go...@firmenich.com<mailto:guillaume.go...@firmenich.com>> wrote:

1 Does 3D coordinates of a conformer is in Angstroms ?

If you read the conformer from a file, for example a mol file, then the 3D 
coordinates are in whatever units they were in in that file. This is usually 
Angstrsom.
If you generate the conformation using one of the RDKit embedding functions, 
then they are certainly in Angstroms.

2 How to enumerate all HBonding to determine the bond length ?

Interesting question. Here's a python function that might be a starting point:
def 
findHBonds(m,confId=-1,possiblePartners='[#8,#7]',possibleHs='[#1][#8,#7]',distThresh=2.5):
    conf = m.GetConformer(confId)
    partners =[x[0] for x in  
m.GetSubstructMatches(Chem.MolFromSmarts(possiblePartners))]
    hs=  [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possibleHs))]
    res = []
    for h in hs:
        ph = conf.GetAtomPosition(h)
        for partner in partners:
            if m.GetBondBetweenAtoms(h,partner) is not None:
                continue
            d = conf.GetAtomPosition(partner).Distance(ph)
            if d<=distThresh:
                res.append((h,partner,d))
    return res

In order to allow flexibility about what an H bond is, I left the definitions 
of the acceptors (partners in the above code) and polar Hs (just Hs in the 
above code) as SMARTS definitions so that they can be customized.



**********************************************************************
DISCLAIMER
This email and any files transmitted with it, including replies and forwarded 
copies (which may contain alterations) subsequently transmitted from Firmenich, 
are confidential and solely for the use of the intended recipient. The contents 
do not represent the opinion of Firmenich except to the extent that it relates 
to their official business.
**********************************************************************

------------------------------------------------------------------------------
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to