Hi Guillaume, An FYI: I'm at the KNIME Summit in San Francisco this week and won't have time to go through the code until I'm back in Basel.
-greg On Wed, Sep 14, 2016 at 7:08 AM, Guillaume GODIN < guillaume.go...@firmenich.com> wrote: > Hi Greg, > > > here the first code prototype of the article mappiing in rdkit: for the > moment I have some issues with angle the rest start to be ok > > > Can you look at it and see if my interpretation is ok ? > > > BR, > > > Guillaume > > > --------------- > import sys > import re > from rdkit import Chem > from rdkit.Chem import AllChem > from rdkit.Chem import rdMolTransforms > from numpy import * > from rdkit.Chem.rdMolDescriptors import CalcMolFormula > import matplotlib.pyplot as plt > > > def Occ(smart,mol): > func = Chem.MolFromSmarts(smart) > maps = mol.GetSubstructMatches(func) > return len(maps) > > > def Angles(mol): > tors = mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]-[#6,#7,# > 16]~[#6,#7,#16]-[#6]')) > conf = mol.GetConformer() > A60=0 > A90=0 > A102=0 > for tor in tors: > Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2], > tor[3])) > if Angle<=60: > A60+=1 > elif Angle>60 and Angle<=90: > A90+=1 > elif Angle>90 and Angle<=102: > A102+=1 > return (A60,A90,A102) > > # 3 molecules not parse with rdkit ? so can explain some occurence diff > # Guanidinium chloride > # Creatine > # Arginine > > > def SumdHfCombustionProducts(mol): > #(data extracted from Domalski1 and Skinner2) dHf(kJ/mol: > dHfH3BO3=-1094.7226 > dHfCO2 =-393.7727 > dHfH2O=-286.0212 > dHfH2SO4=-888.405 > dHfH3PO4=-1279.90476 > dHfSiO2=-847.827 > dHfHF=-316.6435 > dHfHCl=-165.5879 > dHfHBr=-121.4172 > dHfHI=-142.0 > NB=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#5]'))) > NC=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]')) ) > NS=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#16]'))) > NP=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#15]'))) > NSi=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#14]'))) > NF=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#9]'))) > NCl=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#17]'))) > NBr=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#35]'))) > NI=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#53]'))) > NH2O=(len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#1]')))-3*NB- > 2*NS-3*NP-NF-NCl-NBr-NI)/2; > return NB*dHfH3BO3+NC*dHfCO2+NH2O*dHfH2O+NS*dHfH2SO4+NP* > dHfH3PO4+NSi*dHfSiO2+NF*dHfHF+NCl*dHfHCl+NBr*dHfHBr+NI*dHfHI > # equation to convert Hc to Hf => HeatOfFormation = > SumdHfCombustionProducts - HeatOfCombustion; > > > L=['[BX3H0]([#6])([#6])[#6]', > '[CX4H3][#5]', > '[CX4H3][#6]', > '[CX4H3][#7;+0]', > '[CX4H3][#7+]', > '[CX4H3][#8]', > '[CX4H3][#16]', > '[CX4H3][#15]', > '[CX4H3][#14]', > '[CX4H2]([#5])[#6]', > '[CX4H2]([#6])[#6]', > '[CX4H2]([#6])[#7;+0]', > '[CX4H2]([#6])[#7+]', > '[CX4H2]([#6])[#8]', > '[CX4H2]([#6])[#16]', > '[CX4H2]([#6])[#15]', > '[CX4H2]([#6])[#9]', > '[CX4H2]([#6])[#17]', > '[CX4H2]([#6])[#35]', > '[CX4H2]([#6])[#53]', > '[CX4H2]([#6])[#14]', > '[CX4H2]([#7])[#7;+0]', > '[CX4H2]([#7])[#7+]', > '[CX4H2]([#8])[#7]', > '[CX4H2]([#8])[#8]', > '[CX4H2]([#8])[#17]', > '[CX4H2]([#16])[#16]', > '[CX4H1]([#6])([#6])[#6]', > '[CX4H1]([#6])([#6])[#7;+0]', > '[CX4H1]([#6])([#6])[#7+]', > '[CX4H1]([#6])([#6])[#8]', > '[CX4H1]([#6])([#6])[#16]', > '[CX4H1]([#6])([#6])[#9]', > '[CX4H1]([#6])([#6])[#17]', > '[CX4H1]([#6])([#6])[#35]', > '[CX4H1]([#6])([#6])[#53]', > '[CX4H1]([#6])([#7])[#7;+0]', > '[CX4H1]([#6])([#7])[#7+]', > '[CX4H1]([#6])([#8])[#8]', > '[CX4H1]([#6])([#9])[#9]', > '[CX4H1]([#6])([#9])[#17]', > '[CX4H1]([#6])([#17])[#17]', > '[CX4H1]([#6])([#17])[#35]', > '[CX4H1]([#6])([#35])[#35]', > '[CX4H1]([#7])([#7])[#7+]', > '[CX4H1]([#8])([#8])[#8]', > '[CX4H1]([#8])([#9])[#9]', > '[CX4H0]([#6])([#6])([#6])[#6]', > '[CX4H0]([#6])([#6])([#6])[#7;+0]', > '[CX4H0]([#6])([#6])([#6])[#7+]', > '[CX4H0]([#6])([#6])([#6])[#8]', > '[CX4H0]([#6])([#6])([#6])[#16]', > '[CX4H0]([#6])([#6])([#6])[#9]', > '[CX4H0]([#6])([#6])([#6])[#17]', > '[CX4H0]([#6])([#6])([#6])[#35]', > '[CX4H0]([#6])([#6])([#6])[#53]', > '[CX4H0]([#6])([#6])([#7])[#7+]', > '[CX4H0]([#6])([#6])([#8])[#8]', > '[CX4H0]([#6])([#6])([#9])[#9]', > '[CX4H0]([#6])([#6])([#9])[#17]', > '[CX4H0]([#6])([#6])([#17])[#17]', > '[CX4H0]([#6])([#7])([#7])[#7+]', > '[CX4H0]([#6])([#8])([#8])[#8]', > '[CX4H0]([#6])([#8])([#9])[#9]', > '[CX4H0]([#6])([#9])([#9])[#9]', > '[CX4H0]([#6])([#9])([#9])[#17]', > '[CX4H0]([#6])([#9])([#9])[#35]', > '[CX4H0]([#6])([#9])([#17])[#17]', > '[CX4H0]([#6])([#9])([#17])[#35]', > '[CX4H0]([#6])([#17])([#17])[#17]', > '[CX4H0]([#6])([#35])([#35])[#35]', > '[CX4H0]([#7])([#7])([#7])[#7+]', > '[CX4H0]([#8])([#8])([#8])[#8]', > '[CX4H0]([#8])([#9])([#9])[#9]', > '[CX3H2]=[#6]', > '[CX3H2]=[#7]', > '[CX3H1](=[#6])[#6]', > '[CX3H1]([#6])=[#7]', > '[CX3H1]([#6])=[#8]', > '[CX3H1](=[#6])[#7;+0]', > '[CX3H1](=[#6])[#7+]', > '[CX3H1](=[#6])[#8]', > '[CX3H1](=[#6])[#16]', > '[CX3H1](=[#6])[#9]', > '[CX3H1](=[#6])[#17]', > '[CX3H1](=[#6])[#35]', > '[CX3H1](=[#6])[#14]', > '[CX3H1]([#7])=[#7]', > '[CX3H1]([#7])=[#8]', > '[CX3H1](=[#7])[#8]', > '[CX3H1](=[#8])[#8]', > '[CX3H1]([#16])=[#7]', > '[CX3H0](=[#6])([#6])[#6]', > '[CX3H0]([#6])([#6])=[#7]', > '[CX3H0]([#6])([#6])=[#8]', > '[CX3H0](=[#6])([#6])[#7]', > '[CX3H0](=[#6])([#6])[#8]', > '[CX3H0](=[#6])([#6])[#16]', > '[CX3H0](=[#6])([#6])[#9]', > '[CX3H0](=[#6])([#6])[#17]', > '[CX3H0]([#6])([#7])=[#7]', > '[CX3H0]([#6])([#7])=[#8]', > '[CX3H0]([#6])([#7])=[#16]', > '[CX3H0]([#6])(=[#8])[#8]', > '[CX3H0]([#6])(=[#8])[#8-]', > '[CX3H0]([#6])(=[#8])[#16]', > '[CX3H0]([#6])(=[#8])[#9]', > '[CX3H0]([#6])(=[#8])[#17]', > '[CX3H0]([#6])(=[#8])[#35]', > '[CX3H0]([#6])(=[#8])[#53]', > '[CX3H0](=[#6])([#7])[#7]', > '[CX3H0](=[#6])([#7+])[#8]', > '[CX3H0](=[#6])([#8])[#9]', > '[CX3H0](=[#6])([#9])[#9]', > '[CX3H0](=[#6])([#9])[#17]', > '[CX3H0](=[#6])([#17])[#17]', > '[CX3H0](=[#6])([#53])[#53]', > '[CX3H0](=[#7])([#7])[#7]', > '[CX3H0]([#7])([#7])=[#8]', > '[CX3H0]([#7])([#7])=[#16]', > '[CX3H0](=[#7])([#7])[#8]', > '[CX3H0]([#7])(=[#8])[#8]', > '[CX3H0]([#7])(=[#8])[#16]', > '[CX3H0]([#7])(=[#16])[#16]', > '[CX3H0](=[#8])([#8])[#8]', > '[CX3H0](=[#8])([#8])[#17]', > '[cX3H1](:[c]):[c]', > '[cX3H1](:[c]):[n;+0]', > '[cX3H1](:[c]):[n+]', > '[cX3H1](:[n]):[n]', > '[cX3H0]([c])([c])[c]', > '[cX3H0]([C])([c])[c]', > '[cX3H0]([C])([c])[n+0]', > '[cX3H0]([C])([c])[n+]', > '[cX3H0]([c])([c])[N;+0]', > '[cX3H0]([c])([c])[N+]', > '[cX3H0]([c])([c])[n]', > '[cX3H0]([c])([c])[O]', > '[cX3H0]([c])([c])[S]', > '[cX3H0]([c])([c])[F]', > '[cX3H0]([c])([c])[Cl]', > '[cX3H0]([c])([c])[Br]', > '[cX3H0]([c])([c])[I]', > '[cX3H0]([c])([c])[Si]', > '[cX3H0]([c])([n])[N]', > '[cX3H0]([c])([n])[O]', > '[cX3H0]([N])([n])[n]', > '[cX3H0]([n])([n])[n]', > '[cX3H0]([n])([n])[Cl]', > '[CX2H1]#[#6]', > '[CX2H0](#[#6])[#6]', > '[CX2H0]([#6])#[#7]', > '[CX2H0](#[#6])[#7]', > '[CX2H0](#[#6])[#17]', > '[CX2H0](#[#7])[#7]', > '[CX2H0](#[#7])[#8]', > '[CX2H0](=[#6])=[#6]', > '[CX2H0](=[#6])=[#7]', > '[CX2H0](=[#6])=[#8]', > '[CX2H0](=[#7])=[#8]', > '[CX2H0](=[#7])=[#16]',# 161 > '[NX3H2;^3][#6;^3]', > '[NX3H2;^2][#6;^2]', > '[NX3H2;^3][#7;^3]', > '[NX3H2;^2][#7;^2]', > '[NX3H2][#16]',# 166 > '[NX3H1;^3]([#6;^3])[#6;^3]', > '[NX3H1;^2]([#6;^2])[#6;^3]', > '[NX3H1;^2]([#6;^2,^1])[#6;^2,^1]', > '[NX3H1;^3]([#6;^3])[#7;^3]', > '[$([NX3H1;^2]([#6;^2])[#7;^3]),$([NX3H1;^2]([#6;^3])[#7;^2])]', > '[NX3H1;^2]([#6;^2])[#7;^2]', > '[NX3H1;^2]([#6;^2])[#7+;^2]', > '[NX3H0;^3]([#6;^3])([#6;^3])[#6;^3]', > '[NX3H0;^2]([#6;^2])([#6;^3])[#6;^3]', > '[NX3H0;^2]([#6;^2])([#6;^2])[#6;^3]', > '[NX3H0;^2]([#6;^2])([#6;^2])[#6;^2]', > '[NX3H0;^3]([#6;^3])([#6;^3])[#7;^3]', > '[$([NX3H0;^2]([#6;^2])([#6;^3])[#7;^3]),$([NX3H0;^2]([#6;^ > 3])([#6;^3])[#7;^2])]', > '[$([NX3H0;^2]([#6;^2])([#6;^3])[#7+;^3]),$([NX3H0;^2]([#6; > ^3])([#6;^3])[#7+;^2])]', > '[$([NX3H0;^2]([#6;^2])([#6;^2])[#7;^3]),$([NX3H0;^2]([#6;^ > 2])([#6;^3])[#7;^2])]', > '[$([NX3H0;^2]([#6;^2])([#6;^2])[#7+;^3]),$([NX3H0;^2]([#6; > ^2])([#6;^3])[#7+;^2])]', > '[NX3H0;^2]([#6;^2])([#6;^2])[#7;^2]', > '[NX3H0]([#6])([#6])[#14]',# 184 > '[NX3H0]([#6])([#6])[#17]', > '[NX3H0]([#6])([#6])[#35]', > '[$([NX3H0;^2]([#6;^2])([#7;^2])[#7]),$([NX3H0;^2]([#6;^3]) > ([#7;^2])[#7;^2])]', > '[NX3H0;^2]([#6;^2])([#7;^2])[#7;^2]', > '[NX2H1]=[#6]',# 189 > '[NX2H0](=[#6])[#6]', > '[NX2H0]([#6])=[#7;+0]', > '[NX2H0]([#6])=[#7;+1]', > '[NX2H0](=[#6])[#7;+0]', > '[NX2H0](=[#6])[#7;+1]', > '[NX2H0]([#6])=[#8]', > '[NX2H0](=[#6])[#8]', > '[NX2H0]([#7])=[#7]', > '[NX2H0]([#7])=[#8]', > '[NX2H0]([#8])=[#8]', > '[nX3H2][c+]',# 200 > '[nX3H1]([#6]):[c+]', > '[nX3H0](:[#6+])([#6])[#6]', > '[nX2H0](:[#6]):[#6]', > '[nX2H0](:[#6]):[#7]', > '[NH3+;^3][#6]', > '[NH2+;^3]([#6])[#6]', > '[NH+;^3]([#6])([#6])[#6]', > '[N+;^2](=[#8])([#6])[O-]', > '[N+;^2](=[#7])([#6])[O-]', > '[N+;^2](=[#8])([#7])[O-]', > '[N+;^2](=[#8])([#8])[O-]', > '[nH+X3H1]([c])[c]', > '[N+X2H0;^1]([C])#[C-]', > '[N+X2H0;^1](=[#7])=[#7-]', > '[OX2H1;^3][#6;^3]', > '[OX2H1;^2][#6;^2]', > '[OX2H1;^2][#7;^2]', > '[OX2H1][#8]',#218 > '[OX2H1][#16]',# 219 > '[OX2H1][#14]',# 220 > '[OX2H0;^3]([#6;^3])[#6;^3]', > '[OX2H0;^2]([#6;^2])[#6;^3]', > '[OX2H0;^2]([#6;^2])[#6;^2]', > '[$([OX2H0;^2]([#6;^2])[#7;^3]),$([OX2H0;^2]([#6;^3])[#7;^2])]', > '[$([OX2H0;^2]([#6;^2])[#7+;^3]),$([OX2H0;^2]([#6;^3])[#7+;^2])]', > '[OX2H0;^2]([#6;^2])[#7;^2]', > '[OX2H0;^3]([#6;^3])[#8]', > '[OX2H0;^2]([#6;^2])[#8]', > '[OX2H0]([#6])[#16]',# 229 > '[OX2H0;^3]([#6;^3])[#15]', > '[OX2H0;^2]([#6;^2])[#15]', > '[OX2H0]([#6])[#14]',#232 > '[OX2H0]([#14])[#14]',# 233 > '[PX3H0]([#6])([#6])[#6]',# 234-237 > '[PX4H0]([#6])([#6])(=[#8])[#8]', > '[PX4H0]([#6])([#6])([#6])=[#8]', > '[PX4H0](=[#8])([#8])([#8])[#8]', > '[#16H1;^3][#6;^3]', > '[#16H1;^3][#6;^2]', > '[#16H0;^3]([#6;^3])[#6;^3]', > '[#16H0;^3]([#6;^2])[#6;^3]', > '[#16H0;^3]([#6;^2])[#6;^2]', > '[#16H0;^3]([#6;^3])[#16]', > '[#16H0;^3]([#6;^2])[#16]', > '[SX3H0]([#6])([#6])=[#8]',# 245-267 > '[SX4H0]([#6])([#6])(=[#8])=[#8]', > '[SX4H0]([#6])(=[#8])(=[#8])[#8-]', > '[SX4H0]([#7])(=[#8])(=[#8])[#8]', > '[SX3H0](=[#8])([#8])[#8]', > '[SX4H0](=[#8])(=[#8])([#8])[#8]', > '[SX4H0](=[#8])(=[#8])([#8])[#9]', > '[SX4H0](=[#8])(=[#8])([#8])[#17]', > '[SiX4H3][#6]', > '[SiX4H2]([#6])[#6]', > '[SiX4H1]([#6])([#6])[#6]', > '[SiX4H1]([#6])([#6])[#17]', > '[SiX4H1]([#6])([#17])[#17]', > '[SiX4H1]([#8])([#8])[#8]', > '[SiX4H0]([#6])([#6])([#6])[#6]', > '[SiX4H0]([#6])([#6])([#6])[#7]', > '[SiX4H0]([#6])([#6])([#6])[#8]', > '[SiX4H0]([#6])([#6])([#6])[#17]', > '[SiX4H0]([#6])([#6])([#6])[#35]', > '[SiX4H0]([#6])([#6])([#8])[#8]', > '[SiX4H0]([#6])([#6])([#17])[#17]', > '[SiX4H0]([#6])([#8])([#8])[#8]', > '[SiX4H0]([#6])([#17])([#17])[#17]'] > > > Coef=[-4309.05, 439.88, -773.83, -1199.1, -817.94, -1112.98, -1396.74, > -1052.64, -1008.77, 553.89, -652.47, -1074.2, -705.22, -980.99, -1274.78, > -852.22, -623.15, -617.4, -623.3, -685.52, -932.85, -1480.52, -807.51, > -1375.72, -1279.46, -951.95, -1932.88, -529.62, -957.93, -575.78, -850.09, > -1152.31, -504.42, -497.94, -500.7, -558.92, -1363.17, -672.56, -1153.93, > -433.94, -472.96, -494.62, -518.18, -476.37, -870.19, -1433.08, -729.48, > -403.8, -813.97, -426.89, -730.08, -1023.12, -179.93, -361.21, -362.53, > -432.3, -626.56, -1004.06, -320.26, -318.84, -356.73, -746.41, -1284.92, > -649.83, -243.86, -302.73, -320.46, -323.43, -275.67, -366.35, -339.39, > -896.07, -1580.14, -531.65, -702.52, -928.8, -566.63, -762.24, -396.09, > -958.41, -595.08, -747.98, -1161.32, -546.98, -555.33, -573.39, -833.05, > -1134.46, -762.26, -916.53, -545.94, -1372.72, -433.99, -630.4, -248.77, > -825.51, -602.48, -1031.71, -439.03, -397.75, -991.99, -621.43, -1460.28, > -389.6, -534.91, -844.48, -174.28, -205.8, -204.22, -281.7, -1249.51, > -678.42, -430.57, -415.97, -359.75, -407.94, -544.25, -1416.91, -1022.83, > -1839.83, -1202.52, -772.08, -1488.48, -2092.99, -546.67, -338.25, > -543.64, -776.86, -497.1, -1022.16, -407.72, -413.58, -630.62, -361.54, > -844.05, -494.97, -644.82, -619.12, -1044.73, -401.83, -393.22, > -399.84, -468.14, -686.6, -1064.47, -835.9, -1260.9, -583.18, -828.48, > -653.92, -506.41, -508.61, -1006.69, -512.21, -912.2, -801.89, > -554.47, -741.19, -323.55, -433.06, -1250.0, 144.43, 191.61, -321.73, > -263.0, -356.54, 657.84, 707.92, 714.3, 209.21, 254.66, 274.14, > 382.93, 1170.07, 1214.78, 1214.87, 1229.16, 739.41, 781.06, 919.58, > 771.9, 879.1, 787.25, 750.91, 747.48, 769.45, 384.22, 424.65, > -7.7, 550.75, 310.59, 237.35, 119.1, 302.14, 396.97, 192.01, -89.71, > -43.12, 356.35, -122.03, 814.57, 1314.74, 412.85, 134.45, > 259.93, 381.05, 531.03, 116.31, 139.32, -143.13, 436.53, 297.18, -520.51, > -156.85, 389.05, 283.41, -67.43, -30.25, -2.73, 209.41, > 778.41, 676.08, 540.34, 0.0, 0.0, 242.71, 377.06, 233.2, 309.6, 386.05, > 225.09, 392.6, 34.75, 0.0, -128.03, -172.02, 8.54, -110.34, > -117.44, 629.97, 613.04, 652.85, 25.47, 13.65, 764.28, 1000.31, 113.62, > 2.73, -121.52, 89.44, -120.52, -114.1, -1004.63, -581.65, > -193.18, -561.69, -414.48, -463.15, 130.9, 0.0, -97.16, 70.09, 57.55, > 38.94, -0.66, 8.62, -133.9, 1.25, -1.22, -1.09, -38.45, -25.28, -5.65] > > def findHBonds(m,confId=-1,possiblePartners='[#8,#7,#9]', > possibleHs='[#1][#8,#7,#16]',distThresh=2.5): > conf = m.GetConformer(confId) > partners =[x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts( > possiblePartners))] > hs= [x[0] for x in m.GetSubstructMatches(Chem. > MolFromSmarts(possibleHs))] > res = [] > for h in hs: > ph = conf.GetAtomPosition(h) > for partner in partners: > if m.GetBondBetweenAtoms(h,partner) is not None: > continue > d = conf.GetAtomPosition(partner).Distance(ph) > if d<=distThresh: > res.append((h,partner,d)) > return res > > def findintramolecularHH(m,confId=-1,possiblePartners='[#1][*]', > possibleHs='[#1]',distThresh=2.5): > conf = m.GetConformer(confId) > partners =[x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts( > possiblePartners))] > linkers =[x[1] for x in m.GetSubstructMatches(Chem.MolFromSmarts( > possiblePartners))] > hs= [x[0] for x in m.GetSubstructMatches(Chem. > MolFromSmarts(possibleHs))] > res = [] > for h in hs: > ph = conf.GetAtomPosition(h) > for partner,linker in zip(partners,linkers): > if m.GetBondBetweenAtoms(h,linker) is not None: > continue > d = conf.GetAtomPosition(partner).Distance(ph) > if d<=distThresh: > res.append((h,partner,d)) > return res > > > def HeatOfFormation(mol,Heat_of_combustion): > return SumdHfCombustionProducts(mol)-Heat_of_combustion > > def HeatOfCombustion(mol,printing): > C = zeros(len(Coef)) > for j in range(0,len(L)): > c= Occ(L[j],mol) > C[j]=c > C[267]=len(findHBonds(mol,distThresh=1.74)) #Donor_Acceptor(mol) # > need to check that parameter! > #Dist=HHdistances(mol) > C[268]=len(findintramolecularHH(mol,distThresh=1.999)) #Dist[0] > C[269]=len(findintramolecularHH(mol,distThresh=2.3))-C[268]#Dist[1] > # angle issue! > A=Angles(mol) > C[270]=A[0] > C[271]=A[1] > C[272]=A[2] > if printing: > print C > return (C,sum(C*array(Coef))) > > > def maincomputeOpt(mol,V): > mol=Chem.AddHs(mol) > AllChem.EmbedMolecule(mol) > AllChem.MMFFOptimizeMolecule(mol) > H = HeatOfCombustion(mol,0) > #Hf= HeatOfFormation(mol,H[1]) > print H[1]-float(mol.GetProp('deltaH(c)')) > > #print Hf/4.1843 to convert in kcal/mol > #V.append(H[0]) > > def maincomputenoOpt(mol,V): > H = HeatOfCombustion(mol,0) > #Hf= HeatOfFormation(mol,H[1]) > #h.write(str(H)+","+str(abs(H-float(mol.GetProp('deltaH(c)'))))+'\n') > #print H[1]-float(mol.GetProp('deltaH(c)')) > V.append(H[0]) > return H[1] > > #h = open('Molecules_Score.txt','w') > suppl = > Chem.SDMolSupplier('/Users/mbp/Downloads/molecules-20-18279-s001/Compounds > List for Heat-of-Combustion Calculations.sdf',removeHs=False) > V=[] > j=0 > i=0 > K=[] > for mol in suppl: > i=i+1 > if i>3000: > break > try: > H=maincomputenoOpt(mol,V) > #maincomputeOpt(mol,V) > K.append(H-float(mol.GetProp('deltaH(c)'))) > except: > 1 > > > L.append("Hdonor") > L.append("DH2") > L.append("DH23") > L.append("A60") > L.append("A90") > L.append("A102") > > # look at the global occurence of the smarts & NNI: > tV=[list(i) for i in zip(*V)] > j=0 > for tv,lv in zip(tV,L): > j=j+1 > x=0 > for itv in tv: > x +=int(itv) > if x>0: > 1 > print j,lv,x > if x==0: > #print j,lv > 1 > > > # distribution of diff Hcalc vs Hofficial > fig,ax = plt.subplots() > plt.ioff() > > n,bins,patches = ax.hist(K,bins=100) > ax.set_xlabel('Dihedral Angle') > ax.set_ylabel('Counts'); > plt.show() > > > > > > > > > *Dr. Guillaume GODIN* > Principal Scientist > Chemoinformatic & Datamining > Innovation > CORPORATE R&D DIVISION > DIRECT LINE +41 (0)22 780 3645 > MOBILE +41 (0)79 536 1039 > Firmenich SA > RUE DES JEUNES 1 | CASE POSTALE 239 | CH-1211 GENEVE 8 > > ------------------------------ > *De :* Greg Landrum <greg.land...@gmail.com> > *Envoyé :* mercredi 14 septembre 2016 14:14 > > *À :* Guillaume GODIN > *Cc :* RDKit Discuss > *Objet :* Re: [Rdkit-discuss] Angstroms Hydrogen bonding > > > > On Wed, Sep 14, 2016 at 4:16 AM, Guillaume GODIN < > guillaume.go...@firmenich.com> wrote: > >> Your solution is perfect! >> >> >> glad it worked > > >> I am currently implementing this article "http://www.mdpi.com/1420-3049 >> /20/10/18279" using RDKit. >> >> > Interesting, I hadn't seen that one. > > > >> It's now almost done, I need to check the results on Heat of Formation >> now and understand why my code to get Angle in torsions is not working. >> >> def Angles(mol): >> tors = mol.GetSubstructMatches(Chem.MolFromSmarts('[C]-[C;O;S]-[C;O >> ;S]-[C]')) >> conf = mol.GetConformer() >> A60=0 >> A90=0 >> A102=0 >> for tor in tors: >> Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2], >> tor[3])) >> if Angle<=60: >> A60+=1 >> elif Angle>60 and Angle<=90: >> A90+=1 >> elif Angle>90 and Angle<=102: >> A102+=1 >> return (A60,A90,A102) >> >> Comparing to the publication on Heat of Formation page 18294, I have to >> much more occurences (ie. a factor of 50-100 more then expected). >> > > I suspect that the problem is that the SMARTS you > specify: '[C]-[C;O;S]-[C;O;S]-[C]' won't actually match anything. [C;O;S] > means 'aliphatic C AND aliphatic O AND aliphatic S'. It think you like mean > '[C,O,S]', which is 'aliphatic C OR aliphatic O OR aliphatic S'. > You might also want to think about whether or not you should replace C, O, > S (all of which only match aliphatic elements) with #6, #8, #16 (which can > match aliphatic or aromatic) throughout the SMARTS > > >> When it will be done I will share the code with you guys. >> >> > Cool, I look forward to trying that out. > > Best, > -greg > > > >> BR, >> >> Guillaume >> ------------------------------ >> *De :* Greg Landrum <greg.land...@gmail.com> >> *Envoyé :* mercredi 14 septembre 2016 12:16 >> *À :* Guillaume GODIN >> *Cc :* RDKit Discuss >> *Objet :* Re: [Rdkit-discuss] Angstroms Hydrogen bonding >> >> Hi Guillaume, >> >> On Tue, Sep 13, 2016 at 10:12 PM, Guillaume GODIN < >> guillaume.go...@firmenich.com> wrote: >> >>> 1 Does 3D coordinates of a conformer is in Angstroms ? >>> >>> If you read the conformer from a file, for example a mol file, then the >> 3D coordinates are in whatever units they were in in that file. This is >> usually Angstrsom. >> If you generate the conformation using one of the RDKit embedding >> functions, then they are certainly in Angstroms. >> >>> 2 How to enumerate all HBonding to determine the bond length ? >>> >> >> Interesting question. Here's a python function that might be a starting >> point: >> >> def findHBonds(m,confId=-1,possiblePartners='[#8,#7]',possibleHs >> ='[#1][#8,#7]',distThresh=2.5): >> conf = m.GetConformer(confId) >> partners =[x[0] for x in m.GetSubstructMatches(Chem.Mo >> lFromSmarts(possiblePartners))] >> hs= [x[0] for x in m.GetSubstructMatches(Chem.Mol >> FromSmarts(possibleHs))] >> res = [] >> for h in hs: >> ph = conf.GetAtomPosition(h) >> for partner in partners: >> if m.GetBondBetweenAtoms(h,partner) is not None: >> continue >> d = conf.GetAtomPosition(partner).Distance(ph) >> if d<=distThresh: >> res.append((h,partner,d)) >> return res >> >> >> In order to allow flexibility about what an H bond is, I left the >> definitions of the acceptors (partners in the above code) and polar Hs >> (just Hs in the above code) as SMARTS definitions so that they can be >> customized. >> >> >> >> ********************************************************************** >> DISCLAIMER >> This email and any files transmitted with it, including replies and >> forwarded copies (which may contain alterations) subsequently transmitted >> from Firmenich, are confidential and solely for the use of the intended >> recipient. The contents do not represent the opinion of Firmenich except to >> the extent that it relates to their official business. >> ********************************************************************** >> > >
------------------------------------------------------------------------------
_______________________________________________ Rdkit-discuss mailing list Rdkit-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/rdkit-discuss