Hi Guillaume,

An FYI: I'm at the KNIME Summit in San Francisco this week and won't have
time to go through the code until I'm back in Basel.

-greg


On Wed, Sep 14, 2016 at 7:08 AM, Guillaume GODIN <
guillaume.go...@firmenich.com> wrote:

> Hi Greg,
>
>
> here the first code prototype of the article mappiing in rdkit: for the
> moment I have some issues with angle the rest start to be ok
>
>
> Can you look at it and see if my interpretation is ok ?
>
>
> BR,
>
>
> Guillaume
>
>
> ---------------
> import sys
> import re
> from rdkit import Chem
> from rdkit.Chem import AllChem
> from rdkit.Chem import rdMolTransforms
> from numpy import *
> from rdkit.Chem.rdMolDescriptors import CalcMolFormula
> import matplotlib.pyplot as plt
>
>
> def Occ(smart,mol):
>     func = Chem.MolFromSmarts(smart)
>     maps = mol.GetSubstructMatches(func)
>     return len(maps)
>
>
> def Angles(mol):
>     tors = mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]-[#6,#7,#
> 16]~[#6,#7,#16]-[#6]'))
>     conf = mol.GetConformer()
>     A60=0
>     A90=0
>     A102=0
>     for tor in tors:
>         Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2],
> tor[3]))
>         if Angle<=60:
>             A60+=1
>         elif Angle>60 and Angle<=90:
>             A90+=1
>         elif Angle>90 and Angle<=102:
>             A102+=1
>     return (A60,A90,A102)
>
> # 3 molecules not parse with rdkit ? so can explain some occurence diff
> # Guanidinium chloride
> # Creatine
> # Arginine
>
>
> def SumdHfCombustionProducts(mol):
> #(data extracted from Domalski1 and Skinner2) dHf(kJ/mol:
>     dHfH3BO3=-1094.7226
>     dHfCO2 =-393.7727
>     dHfH2O=-286.0212
>     dHfH2SO4=-888.405
>     dHfH3PO4=-1279.90476
>     dHfSiO2=-847.827
>     dHfHF=-316.6435
>     dHfHCl=-165.5879
>     dHfHBr=-121.4172
>     dHfHI=-142.0
>     NB=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#5]')))
>     NC=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]')) )
>     NS=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#16]')))
>     NP=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#15]')))
>     NSi=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#14]')))
>     NF=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#9]')))
>     NCl=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#17]')))
>     NBr=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#35]')))
>     NI=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#53]')))
>     NH2O=(len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#1]')))-3*NB-
> 2*NS-3*NP-NF-NCl-NBr-NI)/2;
>     return NB*dHfH3BO3+NC*dHfCO2+NH2O*dHfH2O+NS*dHfH2SO4+NP*
> dHfH3PO4+NSi*dHfSiO2+NF*dHfHF+NCl*dHfHCl+NBr*dHfHBr+NI*dHfHI
> # equation to convert Hc to Hf => HeatOfFormation =
> SumdHfCombustionProducts - HeatOfCombustion;
>
>
> L=['[BX3H0]([#6])([#6])[#6]',
> '[CX4H3][#5]',
> '[CX4H3][#6]',
> '[CX4H3][#7;+0]',
> '[CX4H3][#7+]',
> '[CX4H3][#8]',
> '[CX4H3][#16]',
> '[CX4H3][#15]',
> '[CX4H3][#14]',
> '[CX4H2]([#5])[#6]',
> '[CX4H2]([#6])[#6]',
> '[CX4H2]([#6])[#7;+0]',
> '[CX4H2]([#6])[#7+]',
> '[CX4H2]([#6])[#8]',
> '[CX4H2]([#6])[#16]',
> '[CX4H2]([#6])[#15]',
> '[CX4H2]([#6])[#9]',
> '[CX4H2]([#6])[#17]',
> '[CX4H2]([#6])[#35]',
> '[CX4H2]([#6])[#53]',
> '[CX4H2]([#6])[#14]',
> '[CX4H2]([#7])[#7;+0]',
> '[CX4H2]([#7])[#7+]',
> '[CX4H2]([#8])[#7]',
> '[CX4H2]([#8])[#8]',
> '[CX4H2]([#8])[#17]',
> '[CX4H2]([#16])[#16]',
> '[CX4H1]([#6])([#6])[#6]',
> '[CX4H1]([#6])([#6])[#7;+0]',
> '[CX4H1]([#6])([#6])[#7+]',
> '[CX4H1]([#6])([#6])[#8]',
> '[CX4H1]([#6])([#6])[#16]',
> '[CX4H1]([#6])([#6])[#9]',
> '[CX4H1]([#6])([#6])[#17]',
> '[CX4H1]([#6])([#6])[#35]',
> '[CX4H1]([#6])([#6])[#53]',
> '[CX4H1]([#6])([#7])[#7;+0]',
> '[CX4H1]([#6])([#7])[#7+]',
> '[CX4H1]([#6])([#8])[#8]',
> '[CX4H1]([#6])([#9])[#9]',
> '[CX4H1]([#6])([#9])[#17]',
> '[CX4H1]([#6])([#17])[#17]',
> '[CX4H1]([#6])([#17])[#35]',
> '[CX4H1]([#6])([#35])[#35]',
> '[CX4H1]([#7])([#7])[#7+]',
> '[CX4H1]([#8])([#8])[#8]',
> '[CX4H1]([#8])([#9])[#9]',
> '[CX4H0]([#6])([#6])([#6])[#6]',
> '[CX4H0]([#6])([#6])([#6])[#7;+0]',
> '[CX4H0]([#6])([#6])([#6])[#7+]',
> '[CX4H0]([#6])([#6])([#6])[#8]',
> '[CX4H0]([#6])([#6])([#6])[#16]',
> '[CX4H0]([#6])([#6])([#6])[#9]',
> '[CX4H0]([#6])([#6])([#6])[#17]',
> '[CX4H0]([#6])([#6])([#6])[#35]',
> '[CX4H0]([#6])([#6])([#6])[#53]',
> '[CX4H0]([#6])([#6])([#7])[#7+]',
> '[CX4H0]([#6])([#6])([#8])[#8]',
> '[CX4H0]([#6])([#6])([#9])[#9]',
> '[CX4H0]([#6])([#6])([#9])[#17]',
> '[CX4H0]([#6])([#6])([#17])[#17]',
> '[CX4H0]([#6])([#7])([#7])[#7+]',
> '[CX4H0]([#6])([#8])([#8])[#8]',
> '[CX4H0]([#6])([#8])([#9])[#9]',
> '[CX4H0]([#6])([#9])([#9])[#9]',
> '[CX4H0]([#6])([#9])([#9])[#17]',
> '[CX4H0]([#6])([#9])([#9])[#35]',
> '[CX4H0]([#6])([#9])([#17])[#17]',
> '[CX4H0]([#6])([#9])([#17])[#35]',
> '[CX4H0]([#6])([#17])([#17])[#17]',
> '[CX4H0]([#6])([#35])([#35])[#35]',
> '[CX4H0]([#7])([#7])([#7])[#7+]',
> '[CX4H0]([#8])([#8])([#8])[#8]',
> '[CX4H0]([#8])([#9])([#9])[#9]',
> '[CX3H2]=[#6]',
> '[CX3H2]=[#7]',
> '[CX3H1](=[#6])[#6]',
> '[CX3H1]([#6])=[#7]',
> '[CX3H1]([#6])=[#8]',
> '[CX3H1](=[#6])[#7;+0]',
> '[CX3H1](=[#6])[#7+]',
> '[CX3H1](=[#6])[#8]',
> '[CX3H1](=[#6])[#16]',
> '[CX3H1](=[#6])[#9]',
> '[CX3H1](=[#6])[#17]',
> '[CX3H1](=[#6])[#35]',
> '[CX3H1](=[#6])[#14]',
> '[CX3H1]([#7])=[#7]',
> '[CX3H1]([#7])=[#8]',
> '[CX3H1](=[#7])[#8]',
> '[CX3H1](=[#8])[#8]',
> '[CX3H1]([#16])=[#7]',
> '[CX3H0](=[#6])([#6])[#6]',
> '[CX3H0]([#6])([#6])=[#7]',
> '[CX3H0]([#6])([#6])=[#8]',
> '[CX3H0](=[#6])([#6])[#7]',
> '[CX3H0](=[#6])([#6])[#8]',
> '[CX3H0](=[#6])([#6])[#16]',
> '[CX3H0](=[#6])([#6])[#9]',
> '[CX3H0](=[#6])([#6])[#17]',
> '[CX3H0]([#6])([#7])=[#7]',
> '[CX3H0]([#6])([#7])=[#8]',
> '[CX3H0]([#6])([#7])=[#16]',
> '[CX3H0]([#6])(=[#8])[#8]',
> '[CX3H0]([#6])(=[#8])[#8-]',
> '[CX3H0]([#6])(=[#8])[#16]',
> '[CX3H0]([#6])(=[#8])[#9]',
> '[CX3H0]([#6])(=[#8])[#17]',
> '[CX3H0]([#6])(=[#8])[#35]',
> '[CX3H0]([#6])(=[#8])[#53]',
> '[CX3H0](=[#6])([#7])[#7]',
> '[CX3H0](=[#6])([#7+])[#8]',
> '[CX3H0](=[#6])([#8])[#9]',
> '[CX3H0](=[#6])([#9])[#9]',
> '[CX3H0](=[#6])([#9])[#17]',
> '[CX3H0](=[#6])([#17])[#17]',
> '[CX3H0](=[#6])([#53])[#53]',
> '[CX3H0](=[#7])([#7])[#7]',
> '[CX3H0]([#7])([#7])=[#8]',
> '[CX3H0]([#7])([#7])=[#16]',
> '[CX3H0](=[#7])([#7])[#8]',
> '[CX3H0]([#7])(=[#8])[#8]',
> '[CX3H0]([#7])(=[#8])[#16]',
> '[CX3H0]([#7])(=[#16])[#16]',
> '[CX3H0](=[#8])([#8])[#8]',
> '[CX3H0](=[#8])([#8])[#17]',
> '[cX3H1](:[c]):[c]',
> '[cX3H1](:[c]):[n;+0]',
> '[cX3H1](:[c]):[n+]',
> '[cX3H1](:[n]):[n]',
> '[cX3H0]([c])([c])[c]',
> '[cX3H0]([C])([c])[c]',
> '[cX3H0]([C])([c])[n+0]',
> '[cX3H0]([C])([c])[n+]',
> '[cX3H0]([c])([c])[N;+0]',
> '[cX3H0]([c])([c])[N+]',
> '[cX3H0]([c])([c])[n]',
> '[cX3H0]([c])([c])[O]',
> '[cX3H0]([c])([c])[S]',
> '[cX3H0]([c])([c])[F]',
> '[cX3H0]([c])([c])[Cl]',
> '[cX3H0]([c])([c])[Br]',
> '[cX3H0]([c])([c])[I]',
> '[cX3H0]([c])([c])[Si]',
> '[cX3H0]([c])([n])[N]',
> '[cX3H0]([c])([n])[O]',
> '[cX3H0]([N])([n])[n]',
> '[cX3H0]([n])([n])[n]',
> '[cX3H0]([n])([n])[Cl]',
> '[CX2H1]#[#6]',
> '[CX2H0](#[#6])[#6]',
> '[CX2H0]([#6])#[#7]',
> '[CX2H0](#[#6])[#7]',
> '[CX2H0](#[#6])[#17]',
> '[CX2H0](#[#7])[#7]',
> '[CX2H0](#[#7])[#8]',
> '[CX2H0](=[#6])=[#6]',
> '[CX2H0](=[#6])=[#7]',
> '[CX2H0](=[#6])=[#8]',
> '[CX2H0](=[#7])=[#8]',
> '[CX2H0](=[#7])=[#16]',# 161
> '[NX3H2;^3][#6;^3]',
> '[NX3H2;^2][#6;^2]',
> '[NX3H2;^3][#7;^3]',
> '[NX3H2;^2][#7;^2]',
> '[NX3H2][#16]',# 166
> '[NX3H1;^3]([#6;^3])[#6;^3]',
> '[NX3H1;^2]([#6;^2])[#6;^3]',
> '[NX3H1;^2]([#6;^2,^1])[#6;^2,^1]',
> '[NX3H1;^3]([#6;^3])[#7;^3]',
> '[$([NX3H1;^2]([#6;^2])[#7;^3]),$([NX3H1;^2]([#6;^3])[#7;^2])]',
> '[NX3H1;^2]([#6;^2])[#7;^2]',
> '[NX3H1;^2]([#6;^2])[#7+;^2]',
> '[NX3H0;^3]([#6;^3])([#6;^3])[#6;^3]',
> '[NX3H0;^2]([#6;^2])([#6;^3])[#6;^3]',
> '[NX3H0;^2]([#6;^2])([#6;^2])[#6;^3]',
> '[NX3H0;^2]([#6;^2])([#6;^2])[#6;^2]',
> '[NX3H0;^3]([#6;^3])([#6;^3])[#7;^3]',
> '[$([NX3H0;^2]([#6;^2])([#6;^3])[#7;^3]),$([NX3H0;^2]([#6;^
> 3])([#6;^3])[#7;^2])]',
> '[$([NX3H0;^2]([#6;^2])([#6;^3])[#7+;^3]),$([NX3H0;^2]([#6;
> ^3])([#6;^3])[#7+;^2])]',
> '[$([NX3H0;^2]([#6;^2])([#6;^2])[#7;^3]),$([NX3H0;^2]([#6;^
> 2])([#6;^3])[#7;^2])]',
> '[$([NX3H0;^2]([#6;^2])([#6;^2])[#7+;^3]),$([NX3H0;^2]([#6;
> ^2])([#6;^3])[#7+;^2])]',
> '[NX3H0;^2]([#6;^2])([#6;^2])[#7;^2]',
> '[NX3H0]([#6])([#6])[#14]',# 184
> '[NX3H0]([#6])([#6])[#17]',
> '[NX3H0]([#6])([#6])[#35]',
> '[$([NX3H0;^2]([#6;^2])([#7;^2])[#7]),$([NX3H0;^2]([#6;^3])
> ([#7;^2])[#7;^2])]',
> '[NX3H0;^2]([#6;^2])([#7;^2])[#7;^2]',
> '[NX2H1]=[#6]',# 189
> '[NX2H0](=[#6])[#6]',
> '[NX2H0]([#6])=[#7;+0]',
> '[NX2H0]([#6])=[#7;+1]',
> '[NX2H0](=[#6])[#7;+0]',
> '[NX2H0](=[#6])[#7;+1]',
> '[NX2H0]([#6])=[#8]',
> '[NX2H0](=[#6])[#8]',
> '[NX2H0]([#7])=[#7]',
> '[NX2H0]([#7])=[#8]',
> '[NX2H0]([#8])=[#8]',
> '[nX3H2][c+]',# 200
> '[nX3H1]([#6]):[c+]',
> '[nX3H0](:[#6+])([#6])[#6]',
> '[nX2H0](:[#6]):[#6]',
> '[nX2H0](:[#6]):[#7]',
> '[NH3+;^3][#6]',
> '[NH2+;^3]([#6])[#6]',
> '[NH+;^3]([#6])([#6])[#6]',
> '[N+;^2](=[#8])([#6])[O-]',
> '[N+;^2](=[#7])([#6])[O-]',
> '[N+;^2](=[#8])([#7])[O-]',
> '[N+;^2](=[#8])([#8])[O-]',
> '[nH+X3H1]([c])[c]',
> '[N+X2H0;^1]([C])#[C-]',
> '[N+X2H0;^1](=[#7])=[#7-]',
> '[OX2H1;^3][#6;^3]',
> '[OX2H1;^2][#6;^2]',
> '[OX2H1;^2][#7;^2]',
> '[OX2H1][#8]',#218
> '[OX2H1][#16]',# 219
> '[OX2H1][#14]',# 220
> '[OX2H0;^3]([#6;^3])[#6;^3]',
> '[OX2H0;^2]([#6;^2])[#6;^3]',
> '[OX2H0;^2]([#6;^2])[#6;^2]',
> '[$([OX2H0;^2]([#6;^2])[#7;^3]),$([OX2H0;^2]([#6;^3])[#7;^2])]',
> '[$([OX2H0;^2]([#6;^2])[#7+;^3]),$([OX2H0;^2]([#6;^3])[#7+;^2])]',
> '[OX2H0;^2]([#6;^2])[#7;^2]',
> '[OX2H0;^3]([#6;^3])[#8]',
> '[OX2H0;^2]([#6;^2])[#8]',
> '[OX2H0]([#6])[#16]',# 229
> '[OX2H0;^3]([#6;^3])[#15]',
> '[OX2H0;^2]([#6;^2])[#15]',
> '[OX2H0]([#6])[#14]',#232
> '[OX2H0]([#14])[#14]',# 233
> '[PX3H0]([#6])([#6])[#6]',# 234-237
> '[PX4H0]([#6])([#6])(=[#8])[#8]',
> '[PX4H0]([#6])([#6])([#6])=[#8]',
> '[PX4H0](=[#8])([#8])([#8])[#8]',
> '[#16H1;^3][#6;^3]',
> '[#16H1;^3][#6;^2]',
> '[#16H0;^3]([#6;^3])[#6;^3]',
> '[#16H0;^3]([#6;^2])[#6;^3]',
> '[#16H0;^3]([#6;^2])[#6;^2]',
> '[#16H0;^3]([#6;^3])[#16]',
> '[#16H0;^3]([#6;^2])[#16]',
> '[SX3H0]([#6])([#6])=[#8]',# 245-267
> '[SX4H0]([#6])([#6])(=[#8])=[#8]',
> '[SX4H0]([#6])(=[#8])(=[#8])[#8-]',
> '[SX4H0]([#7])(=[#8])(=[#8])[#8]',
> '[SX3H0](=[#8])([#8])[#8]',
> '[SX4H0](=[#8])(=[#8])([#8])[#8]',
> '[SX4H0](=[#8])(=[#8])([#8])[#9]',
> '[SX4H0](=[#8])(=[#8])([#8])[#17]',
> '[SiX4H3][#6]',
> '[SiX4H2]([#6])[#6]',
> '[SiX4H1]([#6])([#6])[#6]',
> '[SiX4H1]([#6])([#6])[#17]',
> '[SiX4H1]([#6])([#17])[#17]',
> '[SiX4H1]([#8])([#8])[#8]',
> '[SiX4H0]([#6])([#6])([#6])[#6]',
> '[SiX4H0]([#6])([#6])([#6])[#7]',
> '[SiX4H0]([#6])([#6])([#6])[#8]',
> '[SiX4H0]([#6])([#6])([#6])[#17]',
> '[SiX4H0]([#6])([#6])([#6])[#35]',
> '[SiX4H0]([#6])([#6])([#8])[#8]',
> '[SiX4H0]([#6])([#6])([#17])[#17]',
> '[SiX4H0]([#6])([#8])([#8])[#8]',
> '[SiX4H0]([#6])([#17])([#17])[#17]']
>
>
> Coef=[-4309.05, 439.88, -773.83, -1199.1, -817.94, -1112.98, -1396.74,
> -1052.64, -1008.77, 553.89, -652.47, -1074.2, -705.22, -980.99, -1274.78,
> -852.22, -623.15, -617.4, -623.3, -685.52, -932.85, -1480.52, -807.51,
> -1375.72, -1279.46, -951.95, -1932.88, -529.62, -957.93, -575.78, -850.09,
> -1152.31, -504.42, -497.94, -500.7, -558.92, -1363.17, -672.56, -1153.93,
> -433.94, -472.96, -494.62, -518.18, -476.37, -870.19, -1433.08, -729.48,
> -403.8, -813.97, -426.89, -730.08, -1023.12, -179.93, -361.21, -362.53,
> -432.3, -626.56, -1004.06, -320.26, -318.84, -356.73, -746.41, -1284.92,
> -649.83, -243.86, -302.73, -320.46, -323.43, -275.67, -366.35, -339.39,
> -896.07, -1580.14, -531.65, -702.52, -928.8, -566.63, -762.24, -396.09,
> -958.41, -595.08, -747.98, -1161.32, -546.98, -555.33, -573.39, -833.05,
> -1134.46, -762.26, -916.53, -545.94, -1372.72, -433.99, -630.4, -248.77,
> -825.51, -602.48, -1031.71, -439.03, -397.75, -991.99, -621.43, -1460.28,
> -389.6, -534.91, -844.48, -174.28, -205.8, -204.22, -281.7, -1249.51,
> -678.42, -430.57, -415.97, -359.75, -407.94, -544.25, -1416.91, -1022.83,
> -1839.83, -1202.52, -772.08, -1488.48, -2092.99, -546.67, -338.25,
> -543.64, -776.86, -497.1, -1022.16, -407.72, -413.58, -630.62, -361.54,
> -844.05, -494.97, -644.82, -619.12, -1044.73, -401.83, -393.22,
> -399.84, -468.14, -686.6, -1064.47, -835.9, -1260.9, -583.18, -828.48,
> -653.92, -506.41, -508.61, -1006.69, -512.21, -912.2, -801.89,
> -554.47, -741.19, -323.55, -433.06, -1250.0, 144.43, 191.61, -321.73,
> -263.0, -356.54, 657.84, 707.92, 714.3, 209.21, 254.66, 274.14,
>  382.93, 1170.07, 1214.78, 1214.87, 1229.16, 739.41, 781.06, 919.58,
> 771.9, 879.1, 787.25, 750.91, 747.48, 769.45, 384.22, 424.65,
> -7.7, 550.75, 310.59, 237.35, 119.1, 302.14, 396.97, 192.01, -89.71,
> -43.12, 356.35, -122.03, 814.57, 1314.74, 412.85, 134.45,
>  259.93, 381.05, 531.03, 116.31, 139.32, -143.13, 436.53, 297.18, -520.51,
> -156.85, 389.05, 283.41, -67.43, -30.25, -2.73, 209.41,
>  778.41, 676.08, 540.34, 0.0, 0.0, 242.71, 377.06, 233.2, 309.6, 386.05,
> 225.09, 392.6, 34.75, 0.0, -128.03, -172.02, 8.54, -110.34,
> -117.44, 629.97, 613.04, 652.85, 25.47, 13.65, 764.28, 1000.31, 113.62,
> 2.73, -121.52, 89.44, -120.52, -114.1, -1004.63, -581.65,
> -193.18, -561.69, -414.48, -463.15, 130.9, 0.0, -97.16, 70.09, 57.55,
> 38.94, -0.66, 8.62, -133.9, 1.25, -1.22, -1.09, -38.45, -25.28, -5.65]
>
> def findHBonds(m,confId=-1,possiblePartners='[#8,#7,#9]',
> possibleHs='[#1][#8,#7,#16]',distThresh=2.5):
>     conf = m.GetConformer(confId)
>     partners =[x[0] for x in  m.GetSubstructMatches(Chem.MolFromSmarts(
> possiblePartners))]
>     hs=  [x[0] for x in m.GetSubstructMatches(Chem.
> MolFromSmarts(possibleHs))]
>     res = []
>     for h in hs:
>         ph = conf.GetAtomPosition(h)
>         for partner in partners:
>             if m.GetBondBetweenAtoms(h,partner) is not None:
>                 continue
>             d = conf.GetAtomPosition(partner).Distance(ph)
>             if d<=distThresh:
>                 res.append((h,partner,d))
>     return res
>
> def findintramolecularHH(m,confId=-1,possiblePartners='[#1][*]',
> possibleHs='[#1]',distThresh=2.5):
>     conf = m.GetConformer(confId)
>     partners =[x[0] for x in  m.GetSubstructMatches(Chem.MolFromSmarts(
> possiblePartners))]
>     linkers =[x[1] for x in  m.GetSubstructMatches(Chem.MolFromSmarts(
> possiblePartners))]
>     hs=  [x[0] for x in m.GetSubstructMatches(Chem.
> MolFromSmarts(possibleHs))]
>     res = []
>     for h in hs:
>         ph = conf.GetAtomPosition(h)
>         for partner,linker in zip(partners,linkers):
>             if m.GetBondBetweenAtoms(h,linker) is not None:
>                 continue
>             d = conf.GetAtomPosition(partner).Distance(ph)
>             if d<=distThresh:
>                 res.append((h,partner,d))
>     return res
>
>
> def HeatOfFormation(mol,Heat_of_combustion):
>     return SumdHfCombustionProducts(mol)-Heat_of_combustion
>
> def HeatOfCombustion(mol,printing):
>     C = zeros(len(Coef))
>     for j in range(0,len(L)):
>         c= Occ(L[j],mol)
>         C[j]=c
>     C[267]=len(findHBonds(mol,distThresh=1.74))  #Donor_Acceptor(mol) #
> need to check that parameter!
>     #Dist=HHdistances(mol)
>     C[268]=len(findintramolecularHH(mol,distThresh=1.999)) #Dist[0]
>     C[269]=len(findintramolecularHH(mol,distThresh=2.3))-C[268]#Dist[1]
>     # angle issue!
>     A=Angles(mol)
>     C[270]=A[0]
>     C[271]=A[1]
>     C[272]=A[2]
>     if printing:
>         print C
>     return (C,sum(C*array(Coef)))
>
>
> def maincomputeOpt(mol,V):
>     mol=Chem.AddHs(mol)
>     AllChem.EmbedMolecule(mol)
>     AllChem.MMFFOptimizeMolecule(mol)
>     H = HeatOfCombustion(mol,0)
>     #Hf= HeatOfFormation(mol,H[1])
>     print H[1]-float(mol.GetProp('deltaH(c)'))
>
>     #print Hf/4.1843 to convert in kcal/mol
>     #V.append(H[0])
>
> def maincomputenoOpt(mol,V):
>     H = HeatOfCombustion(mol,0)
>     #Hf= HeatOfFormation(mol,H[1])
>     #h.write(str(H)+","+str(abs(H-float(mol.GetProp('deltaH(c)'))))+'\n')
>     #print H[1]-float(mol.GetProp('deltaH(c)'))
>     V.append(H[0])
>     return H[1]
>
> #h = open('Molecules_Score.txt','w')
> suppl = 
> Chem.SDMolSupplier('/Users/mbp/Downloads/molecules-20-18279-s001/Compounds
> List for Heat-of-Combustion Calculations.sdf',removeHs=False)
> V=[]
> j=0
> i=0
> K=[]
> for mol in suppl:
>     i=i+1
>     if i>3000:
>         break
>     try:
>         H=maincomputenoOpt(mol,V)
>         #maincomputeOpt(mol,V)
>         K.append(H-float(mol.GetProp('deltaH(c)')))
>     except:
>         1
>
>
> L.append("Hdonor")
> L.append("DH2")
> L.append("DH23")
> L.append("A60")
> L.append("A90")
> L.append("A102")
>
> # look at the global occurence of the smarts & NNI:
> tV=[list(i) for i in zip(*V)]
> j=0
> for tv,lv in zip(tV,L):
>     j=j+1
>     x=0
>     for itv in tv:
>         x +=int(itv)
>     if x>0:
>         1
>         print j,lv,x
>     if x==0:
>         #print j,lv
>         1
>
>
> # distribution of diff Hcalc vs Hofficial
> fig,ax = plt.subplots()
> plt.ioff()
>
> n,bins,patches = ax.hist(K,bins=100)
> ax.set_xlabel('Dihedral Angle')
> ax.set_ylabel('Counts');
> plt.show()
>
> ​
>
>
>
>
>
>
> *Dr. Guillaume GODIN*
> Principal Scientist
> Chemoinformatic & Datamining
> Innovation
> CORPORATE R&D DIVISION
> DIRECT LINE +41 (0)22 780 3645
> MOBILE          +41 (0)79 536 1039
>         Firmenich SA
>         RUE DES JEUNES 1 | CASE POSTALE 239 | CH-1211 GENEVE 8
>
> ------------------------------
> *De :* Greg Landrum <greg.land...@gmail.com>
> *Envoyé :* mercredi 14 septembre 2016 14:14
>
> *À :* Guillaume GODIN
> *Cc :* RDKit Discuss
> *Objet :* Re: [Rdkit-discuss] Angstroms Hydrogen bonding
>
>
>
> On Wed, Sep 14, 2016 at 4:16 AM, Guillaume GODIN <
> guillaume.go...@firmenich.com> wrote:
>
>> Your solution is perfect!
>>
>>
>> glad it worked
>
>
>> I am currently implementing this article "http://www.mdpi.com/1420-3049
>> /20/10/18279" using RDKit.
>>
>>
> Interesting, I hadn't seen that one.
>
>
>
>> It's now almost done, I need to check the results on Heat of Formation
>> now and understand why my code to get Angle in torsions is not working.
>>
>> def Angles(mol):
>>     tors = mol.GetSubstructMatches(Chem.MolFromSmarts('[C]-[C;O;S]-[C;O
>> ;S]-[C]'))
>>     conf = mol.GetConformer()
>>     A60=0
>>     A90=0
>>     A102=0
>>     for tor in tors:
>>         Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2],
>> tor[3]))
>>         if Angle<=60:
>>             A60+=1
>>         elif Angle>60 and Angle<=90:
>>             A90+=1
>>         elif Angle>90 and Angle<=102:
>>             A102+=1
>>     return (A60,A90,A102)
>>
>> Comparing to the publication on Heat of Formation page 18294, I have to
>> much more occurences (ie. a factor of 50-100 more then expected).
>>
>
> I suspect that the problem is that the SMARTS you
> specify: '[C]-[C;O;S]-[C;O;S]-[C]' won't actually match anything. [C;O;S]
> means 'aliphatic C AND aliphatic O AND aliphatic S'. It think you like mean
> '[C,O,S]', which is 'aliphatic C OR aliphatic O OR aliphatic S'.
> You might also want to think about whether or not you should replace C, O,
> S (all of which only match aliphatic elements) with #6, #8, #16 (which can
> match aliphatic or aromatic) throughout the SMARTS
>
>
>> When it will be done I will share the code with you guys.
>>
>>
> Cool, I look forward to trying that out.
>
> Best,
> -greg
>
>
>
>> BR,
>>
>> Guillaume
>> ------------------------------
>> *De :* Greg Landrum <greg.land...@gmail.com>
>> *Envoyé :* mercredi 14 septembre 2016 12:16
>> *À :* Guillaume GODIN
>> *Cc :* RDKit Discuss
>> *Objet :* Re: [Rdkit-discuss] Angstroms Hydrogen bonding
>>
>> Hi Guillaume,
>>
>> On Tue, Sep 13, 2016 at 10:12 PM, Guillaume GODIN <
>> guillaume.go...@firmenich.com> wrote:
>>
>>> 1 Does 3D coordinates of a conformer is in Angstroms ?
>>>
>>> If you read the conformer from a file, for example a mol file, then the
>> 3D coordinates are in whatever units they were in in that file. This is
>> usually Angstrsom.
>> If you generate the conformation using one of the RDKit embedding
>> functions, then they are certainly in Angstroms.
>>
>>> 2 How to enumerate all HBonding to determine the bond length ?
>>>
>>
>> Interesting question. Here's a python function that might be a starting
>> point:
>>
>> def findHBonds(m,confId=-1,possiblePartners='[#8,#7]',possibleHs
>> ='[#1][#8,#7]',distThresh=2.5):
>>     conf = m.GetConformer(confId)
>>     partners =[x[0] for x in  m.GetSubstructMatches(Chem.Mo
>> lFromSmarts(possiblePartners))]
>>     hs=  [x[0] for x in m.GetSubstructMatches(Chem.Mol
>> FromSmarts(possibleHs))]
>>     res = []
>>     for h in hs:
>>         ph = conf.GetAtomPosition(h)
>>         for partner in partners:
>>             if m.GetBondBetweenAtoms(h,partner) is not None:
>>                 continue
>>             d = conf.GetAtomPosition(partner).Distance(ph)
>>             if d<=distThresh:
>>                 res.append((h,partner,d))
>>     return res
>>
>>
>> In order to allow flexibility about what an H bond is, I left the
>> definitions of the acceptors (partners in the above code) and polar Hs
>> (just Hs in the above code) as SMARTS definitions so that they can be
>> customized.
>>
>>
>>
>> **********************************************************************
>> DISCLAIMER
>> This email and any files transmitted with it, including replies and
>> forwarded copies (which may contain alterations) subsequently transmitted
>> from Firmenich, are confidential and solely for the use of the intended
>> recipient. The contents do not represent the opinion of Firmenich except to
>> the extent that it relates to their official business.
>> **********************************************************************
>>
>
>
------------------------------------------------------------------------------
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to