Hi Greg,
here the first code prototype of the article mappiing in rdkit: for the moment I have some issues with angle the rest start to be ok Can you look at it and see if my interpretation is ok ? BR, Guillaume --------------- import sys import re from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import rdMolTransforms from numpy import * from rdkit.Chem.rdMolDescriptors import CalcMolFormula import matplotlib.pyplot as plt def Occ(smart,mol): func = Chem.MolFromSmarts(smart) maps = mol.GetSubstructMatches(func) return len(maps) def Angles(mol): tors = mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]-[#6,#7,#16]~[#6,#7,#16]-[#6]')) conf = mol.GetConformer() A60=0 A90=0 A102=0 for tor in tors: Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2], tor[3])) if Angle<=60: A60+=1 elif Angle>60 and Angle<=90: A90+=1 elif Angle>90 and Angle<=102: A102+=1 return (A60,A90,A102) # 3 molecules not parse with rdkit ? so can explain some occurence diff # Guanidinium chloride # Creatine # Arginine def SumdHfCombustionProducts(mol): #(data extracted from Domalski1 and Skinner2) dHf(kJ/mol: dHfH3BO3=-1094.7226 dHfCO2 =-393.7727 dHfH2O=-286.0212 dHfH2SO4=-888.405 dHfH3PO4=-1279.90476 dHfSiO2=-847.827 dHfHF=-316.6435 dHfHCl=-165.5879 dHfHBr=-121.4172 dHfHI=-142.0 NB=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#5]'))) NC=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]')) ) NS=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#16]'))) NP=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#15]'))) NSi=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#14]'))) NF=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#9]'))) NCl=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#17]'))) NBr=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#35]'))) NI=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#53]'))) NH2O=(len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#1]')))-3*NB-2*NS-3*NP-NF-NCl-NBr-NI)/2; return NB*dHfH3BO3+NC*dHfCO2+NH2O*dHfH2O+NS*dHfH2SO4+NP*dHfH3PO4+NSi*dHfSiO2+NF*dHfHF+NCl*dHfHCl+NBr*dHfHBr+NI*dHfHI # equation to convert Hc to Hf => HeatOfFormation = SumdHfCombustionProducts - HeatOfCombustion; L=['[BX3H0]([#6])([#6])[#6]', '[CX4H3][#5]', '[CX4H3][#6]', '[CX4H3][#7;+0]', '[CX4H3][#7+]', '[CX4H3][#8]', '[CX4H3][#16]', '[CX4H3][#15]', '[CX4H3][#14]', '[CX4H2]([#5])[#6]', '[CX4H2]([#6])[#6]', '[CX4H2]([#6])[#7;+0]', '[CX4H2]([#6])[#7+]', '[CX4H2]([#6])[#8]', '[CX4H2]([#6])[#16]', '[CX4H2]([#6])[#15]', '[CX4H2]([#6])[#9]', '[CX4H2]([#6])[#17]', '[CX4H2]([#6])[#35]', '[CX4H2]([#6])[#53]', '[CX4H2]([#6])[#14]', '[CX4H2]([#7])[#7;+0]', '[CX4H2]([#7])[#7+]', '[CX4H2]([#8])[#7]', '[CX4H2]([#8])[#8]', '[CX4H2]([#8])[#17]', '[CX4H2]([#16])[#16]', '[CX4H1]([#6])([#6])[#6]', '[CX4H1]([#6])([#6])[#7;+0]', '[CX4H1]([#6])([#6])[#7+]', '[CX4H1]([#6])([#6])[#8]', '[CX4H1]([#6])([#6])[#16]', '[CX4H1]([#6])([#6])[#9]', '[CX4H1]([#6])([#6])[#17]', '[CX4H1]([#6])([#6])[#35]', '[CX4H1]([#6])([#6])[#53]', '[CX4H1]([#6])([#7])[#7;+0]', '[CX4H1]([#6])([#7])[#7+]', '[CX4H1]([#6])([#8])[#8]', '[CX4H1]([#6])([#9])[#9]', '[CX4H1]([#6])([#9])[#17]', '[CX4H1]([#6])([#17])[#17]', '[CX4H1]([#6])([#17])[#35]', '[CX4H1]([#6])([#35])[#35]', '[CX4H1]([#7])([#7])[#7+]', '[CX4H1]([#8])([#8])[#8]', '[CX4H1]([#8])([#9])[#9]', '[CX4H0]([#6])([#6])([#6])[#6]', '[CX4H0]([#6])([#6])([#6])[#7;+0]', '[CX4H0]([#6])([#6])([#6])[#7+]', '[CX4H0]([#6])([#6])([#6])[#8]', '[CX4H0]([#6])([#6])([#6])[#16]', '[CX4H0]([#6])([#6])([#6])[#9]', '[CX4H0]([#6])([#6])([#6])[#17]', '[CX4H0]([#6])([#6])([#6])[#35]', '[CX4H0]([#6])([#6])([#6])[#53]', '[CX4H0]([#6])([#6])([#7])[#7+]', '[CX4H0]([#6])([#6])([#8])[#8]', '[CX4H0]([#6])([#6])([#9])[#9]', '[CX4H0]([#6])([#6])([#9])[#17]', '[CX4H0]([#6])([#6])([#17])[#17]', '[CX4H0]([#6])([#7])([#7])[#7+]', '[CX4H0]([#6])([#8])([#8])[#8]', '[CX4H0]([#6])([#8])([#9])[#9]', '[CX4H0]([#6])([#9])([#9])[#9]', '[CX4H0]([#6])([#9])([#9])[#17]', '[CX4H0]([#6])([#9])([#9])[#35]', '[CX4H0]([#6])([#9])([#17])[#17]', '[CX4H0]([#6])([#9])([#17])[#35]', '[CX4H0]([#6])([#17])([#17])[#17]', '[CX4H0]([#6])([#35])([#35])[#35]', '[CX4H0]([#7])([#7])([#7])[#7+]', '[CX4H0]([#8])([#8])([#8])[#8]', '[CX4H0]([#8])([#9])([#9])[#9]', '[CX3H2]=[#6]', '[CX3H2]=[#7]', '[CX3H1](=[#6])[#6]', '[CX3H1]([#6])=[#7]', '[CX3H1]([#6])=[#8]', '[CX3H1](=[#6])[#7;+0]', '[CX3H1](=[#6])[#7+]', '[CX3H1](=[#6])[#8]', '[CX3H1](=[#6])[#16]', '[CX3H1](=[#6])[#9]', '[CX3H1](=[#6])[#17]', '[CX3H1](=[#6])[#35]', '[CX3H1](=[#6])[#14]', '[CX3H1]([#7])=[#7]', '[CX3H1]([#7])=[#8]', '[CX3H1](=[#7])[#8]', '[CX3H1](=[#8])[#8]', '[CX3H1]([#16])=[#7]', '[CX3H0](=[#6])([#6])[#6]', '[CX3H0]([#6])([#6])=[#7]', '[CX3H0]([#6])([#6])=[#8]', '[CX3H0](=[#6])([#6])[#7]', '[CX3H0](=[#6])([#6])[#8]', '[CX3H0](=[#6])([#6])[#16]', '[CX3H0](=[#6])([#6])[#9]', '[CX3H0](=[#6])([#6])[#17]', '[CX3H0]([#6])([#7])=[#7]', '[CX3H0]([#6])([#7])=[#8]', '[CX3H0]([#6])([#7])=[#16]', '[CX3H0]([#6])(=[#8])[#8]', '[CX3H0]([#6])(=[#8])[#8-]', '[CX3H0]([#6])(=[#8])[#16]', '[CX3H0]([#6])(=[#8])[#9]', '[CX3H0]([#6])(=[#8])[#17]', '[CX3H0]([#6])(=[#8])[#35]', '[CX3H0]([#6])(=[#8])[#53]', '[CX3H0](=[#6])([#7])[#7]', '[CX3H0](=[#6])([#7+])[#8]', '[CX3H0](=[#6])([#8])[#9]', '[CX3H0](=[#6])([#9])[#9]', '[CX3H0](=[#6])([#9])[#17]', '[CX3H0](=[#6])([#17])[#17]', '[CX3H0](=[#6])([#53])[#53]', '[CX3H0](=[#7])([#7])[#7]', '[CX3H0]([#7])([#7])=[#8]', '[CX3H0]([#7])([#7])=[#16]', '[CX3H0](=[#7])([#7])[#8]', '[CX3H0]([#7])(=[#8])[#8]', '[CX3H0]([#7])(=[#8])[#16]', '[CX3H0]([#7])(=[#16])[#16]', '[CX3H0](=[#8])([#8])[#8]', '[CX3H0](=[#8])([#8])[#17]', '[cX3H1](:[c]):[c]', '[cX3H1](:[c]):[n;+0]', '[cX3H1](:[c]):[n+]', '[cX3H1](:[n]):[n]', '[cX3H0]([c])([c])[c]', '[cX3H0]([C])([c])[c]', '[cX3H0]([C])([c])[n+0]', '[cX3H0]([C])([c])[n+]', '[cX3H0]([c])([c])[N;+0]', '[cX3H0]([c])([c])[N+]', '[cX3H0]([c])([c])[n]', '[cX3H0]([c])([c])[O]', '[cX3H0]([c])([c])[S]', '[cX3H0]([c])([c])[F]', '[cX3H0]([c])([c])[Cl]', '[cX3H0]([c])([c])[Br]', '[cX3H0]([c])([c])[I]', '[cX3H0]([c])([c])[Si]', '[cX3H0]([c])([n])[N]', '[cX3H0]([c])([n])[O]', '[cX3H0]([N])([n])[n]', '[cX3H0]([n])([n])[n]', '[cX3H0]([n])([n])[Cl]', '[CX2H1]#[#6]', '[CX2H0](#[#6])[#6]', '[CX2H0]([#6])#[#7]', '[CX2H0](#[#6])[#7]', '[CX2H0](#[#6])[#17]', '[CX2H0](#[#7])[#7]', '[CX2H0](#[#7])[#8]', '[CX2H0](=[#6])=[#6]', '[CX2H0](=[#6])=[#7]', '[CX2H0](=[#6])=[#8]', '[CX2H0](=[#7])=[#8]', '[CX2H0](=[#7])=[#16]',# 161 '[NX3H2;^3][#6;^3]', '[NX3H2;^2][#6;^2]', '[NX3H2;^3][#7;^3]', '[NX3H2;^2][#7;^2]', '[NX3H2][#16]',# 166 '[NX3H1;^3]([#6;^3])[#6;^3]', '[NX3H1;^2]([#6;^2])[#6;^3]', '[NX3H1;^2]([#6;^2,^1])[#6;^2,^1]', '[NX3H1;^3]([#6;^3])[#7;^3]', '[$([NX3H1;^2]([#6;^2])[#7;^3]),$([NX3H1;^2]([#6;^3])[#7;^2])]', '[NX3H1;^2]([#6;^2])[#7;^2]', '[NX3H1;^2]([#6;^2])[#7+;^2]', '[NX3H0;^3]([#6;^3])([#6;^3])[#6;^3]', '[NX3H0;^2]([#6;^2])([#6;^3])[#6;^3]', '[NX3H0;^2]([#6;^2])([#6;^2])[#6;^3]', '[NX3H0;^2]([#6;^2])([#6;^2])[#6;^2]', '[NX3H0;^3]([#6;^3])([#6;^3])[#7;^3]', '[$([NX3H0;^2]([#6;^2])([#6;^3])[#7;^3]),$([NX3H0;^2]([#6;^3])([#6;^3])[#7;^2])]', '[$([NX3H0;^2]([#6;^2])([#6;^3])[#7+;^3]),$([NX3H0;^2]([#6;^3])([#6;^3])[#7+;^2])]', '[$([NX3H0;^2]([#6;^2])([#6;^2])[#7;^3]),$([NX3H0;^2]([#6;^2])([#6;^3])[#7;^2])]', '[$([NX3H0;^2]([#6;^2])([#6;^2])[#7+;^3]),$([NX3H0;^2]([#6;^2])([#6;^3])[#7+;^2])]', '[NX3H0;^2]([#6;^2])([#6;^2])[#7;^2]', '[NX3H0]([#6])([#6])[#14]',# 184 '[NX3H0]([#6])([#6])[#17]', '[NX3H0]([#6])([#6])[#35]', '[$([NX3H0;^2]([#6;^2])([#7;^2])[#7]),$([NX3H0;^2]([#6;^3])([#7;^2])[#7;^2])]', '[NX3H0;^2]([#6;^2])([#7;^2])[#7;^2]', '[NX2H1]=[#6]',# 189 '[NX2H0](=[#6])[#6]', '[NX2H0]([#6])=[#7;+0]', '[NX2H0]([#6])=[#7;+1]', '[NX2H0](=[#6])[#7;+0]', '[NX2H0](=[#6])[#7;+1]', '[NX2H0]([#6])=[#8]', '[NX2H0](=[#6])[#8]', '[NX2H0]([#7])=[#7]', '[NX2H0]([#7])=[#8]', '[NX2H0]([#8])=[#8]', '[nX3H2][c+]',# 200 '[nX3H1]([#6]):[c+]', '[nX3H0](:[#6+])([#6])[#6]', '[nX2H0](:[#6]):[#6]', '[nX2H0](:[#6]):[#7]', '[NH3+;^3][#6]', '[NH2+;^3]([#6])[#6]', '[NH+;^3]([#6])([#6])[#6]', '[N+;^2](=[#8])([#6])[O-]', '[N+;^2](=[#7])([#6])[O-]', '[N+;^2](=[#8])([#7])[O-]', '[N+;^2](=[#8])([#8])[O-]', '[nH+X3H1]([c])[c]', '[N+X2H0;^1]([C])#[C-]', '[N+X2H0;^1](=[#7])=[#7-]', '[OX2H1;^3][#6;^3]', '[OX2H1;^2][#6;^2]', '[OX2H1;^2][#7;^2]', '[OX2H1][#8]',#218 '[OX2H1][#16]',# 219 '[OX2H1][#14]',# 220 '[OX2H0;^3]([#6;^3])[#6;^3]', '[OX2H0;^2]([#6;^2])[#6;^3]', '[OX2H0;^2]([#6;^2])[#6;^2]', '[$([OX2H0;^2]([#6;^2])[#7;^3]),$([OX2H0;^2]([#6;^3])[#7;^2])]', '[$([OX2H0;^2]([#6;^2])[#7+;^3]),$([OX2H0;^2]([#6;^3])[#7+;^2])]', '[OX2H0;^2]([#6;^2])[#7;^2]', '[OX2H0;^3]([#6;^3])[#8]', '[OX2H0;^2]([#6;^2])[#8]', '[OX2H0]([#6])[#16]',# 229 '[OX2H0;^3]([#6;^3])[#15]', '[OX2H0;^2]([#6;^2])[#15]', '[OX2H0]([#6])[#14]',#232 '[OX2H0]([#14])[#14]',# 233 '[PX3H0]([#6])([#6])[#6]',# 234-237 '[PX4H0]([#6])([#6])(=[#8])[#8]', '[PX4H0]([#6])([#6])([#6])=[#8]', '[PX4H0](=[#8])([#8])([#8])[#8]', '[#16H1;^3][#6;^3]', '[#16H1;^3][#6;^2]', '[#16H0;^3]([#6;^3])[#6;^3]', '[#16H0;^3]([#6;^2])[#6;^3]', '[#16H0;^3]([#6;^2])[#6;^2]', '[#16H0;^3]([#6;^3])[#16]', '[#16H0;^3]([#6;^2])[#16]', '[SX3H0]([#6])([#6])=[#8]',# 245-267 '[SX4H0]([#6])([#6])(=[#8])=[#8]', '[SX4H0]([#6])(=[#8])(=[#8])[#8-]', '[SX4H0]([#7])(=[#8])(=[#8])[#8]', '[SX3H0](=[#8])([#8])[#8]', '[SX4H0](=[#8])(=[#8])([#8])[#8]', '[SX4H0](=[#8])(=[#8])([#8])[#9]', '[SX4H0](=[#8])(=[#8])([#8])[#17]', '[SiX4H3][#6]', '[SiX4H2]([#6])[#6]', '[SiX4H1]([#6])([#6])[#6]', '[SiX4H1]([#6])([#6])[#17]', '[SiX4H1]([#6])([#17])[#17]', '[SiX4H1]([#8])([#8])[#8]', '[SiX4H0]([#6])([#6])([#6])[#6]', '[SiX4H0]([#6])([#6])([#6])[#7]', '[SiX4H0]([#6])([#6])([#6])[#8]', '[SiX4H0]([#6])([#6])([#6])[#17]', '[SiX4H0]([#6])([#6])([#6])[#35]', '[SiX4H0]([#6])([#6])([#8])[#8]', '[SiX4H0]([#6])([#6])([#17])[#17]', '[SiX4H0]([#6])([#8])([#8])[#8]', '[SiX4H0]([#6])([#17])([#17])[#17]'] Coef=[-4309.05, 439.88, -773.83, -1199.1, -817.94, -1112.98, -1396.74, -1052.64, -1008.77, 553.89, -652.47, -1074.2, -705.22, -980.99, -1274.78, -852.22, -623.15, -617.4, -623.3, -685.52, -932.85, -1480.52, -807.51, -1375.72, -1279.46, -951.95, -1932.88, -529.62, -957.93, -575.78, -850.09, -1152.31, -504.42, -497.94, -500.7, -558.92, -1363.17, -672.56, -1153.93, -433.94, -472.96, -494.62, -518.18, -476.37, -870.19, -1433.08, -729.48, -403.8, -813.97, -426.89, -730.08, -1023.12, -179.93, -361.21, -362.53, -432.3, -626.56, -1004.06, -320.26, -318.84, -356.73, -746.41, -1284.92, -649.83, -243.86, -302.73, -320.46, -323.43, -275.67, -366.35, -339.39, -896.07, -1580.14, -531.65, -702.52, -928.8, -566.63, -762.24, -396.09, -958.41, -595.08, -747.98, -1161.32, -546.98, -555.33, -573.39, -833.05, -1134.46, -762.26, -916.53, -545.94, -1372.72, -433.99, -630.4, -248.77, -825.51, -602.48, -1031.71, -439.03, -397.75, -991.99, -621.43, -1460.28, -389.6, -534.91, -844.48, -174.28, -205.8, -204.22, -281.7, -1249.51, -678.42, -430.57, -415.97, -359.75, -407.94, -544.25, -1416.91, -1022.83, -1839.83, -1202.52, -772.08, -1488.48, -2092.99, -546.67, -338.25, -543.64, -776.86, -497.1, -1022.16, -407.72, -413.58, -630.62, -361.54, -844.05, -494.97, -644.82, -619.12, -1044.73, -401.83, -393.22, -399.84, -468.14, -686.6, -1064.47, -835.9, -1260.9, -583.18, -828.48, -653.92, -506.41, -508.61, -1006.69, -512.21, -912.2, -801.89, -554.47, -741.19, -323.55, -433.06, -1250.0, 144.43, 191.61, -321.73, -263.0, -356.54, 657.84, 707.92, 714.3, 209.21, 254.66, 274.14, 382.93, 1170.07, 1214.78, 1214.87, 1229.16, 739.41, 781.06, 919.58, 771.9, 879.1, 787.25, 750.91, 747.48, 769.45, 384.22, 424.65, -7.7, 550.75, 310.59, 237.35, 119.1, 302.14, 396.97, 192.01, -89.71, -43.12, 356.35, -122.03, 814.57, 1314.74, 412.85, 134.45, 259.93, 381.05, 531.03, 116.31, 139.32, -143.13, 436.53, 297.18, -520.51, -156.85, 389.05, 283.41, -67.43, -30.25, -2.73, 209.41, 778.41, 676.08, 540.34, 0.0, 0.0, 242.71, 377.06, 233.2, 309.6, 386.05, 225.09, 392.6, 34.75, 0.0, -128.03, -172.02, 8.54, -110.34, -117.44, 629.97, 613.04, 652.85, 25.47, 13.65, 764.28, 1000.31, 113.62, 2.73, -121.52, 89.44, -120.52, -114.1, -1004.63, -581.65, -193.18, -561.69, -414.48, -463.15, 130.9, 0.0, -97.16, 70.09, 57.55, 38.94, -0.66, 8.62, -133.9, 1.25, -1.22, -1.09, -38.45, -25.28, -5.65] def findHBonds(m,confId=-1,possiblePartners='[#8,#7,#9]',possibleHs='[#1][#8,#7,#16]',distThresh=2.5): conf = m.GetConformer(confId) partners =[x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possiblePartners))] hs= [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possibleHs))] res = [] for h in hs: ph = conf.GetAtomPosition(h) for partner in partners: if m.GetBondBetweenAtoms(h,partner) is not None: continue d = conf.GetAtomPosition(partner).Distance(ph) if d<=distThresh: res.append((h,partner,d)) return res def findintramolecularHH(m,confId=-1,possiblePartners='[#1][*]',possibleHs='[#1]',distThresh=2.5): conf = m.GetConformer(confId) partners =[x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possiblePartners))] linkers =[x[1] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possiblePartners))] hs= [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possibleHs))] res = [] for h in hs: ph = conf.GetAtomPosition(h) for partner,linker in zip(partners,linkers): if m.GetBondBetweenAtoms(h,linker) is not None: continue d = conf.GetAtomPosition(partner).Distance(ph) if d<=distThresh: res.append((h,partner,d)) return res def HeatOfFormation(mol,Heat_of_combustion): return SumdHfCombustionProducts(mol)-Heat_of_combustion def HeatOfCombustion(mol,printing): C = zeros(len(Coef)) for j in range(0,len(L)): c= Occ(L[j],mol) C[j]=c C[267]=len(findHBonds(mol,distThresh=1.74)) #Donor_Acceptor(mol) # need to check that parameter! #Dist=HHdistances(mol) C[268]=len(findintramolecularHH(mol,distThresh=1.999)) #Dist[0] C[269]=len(findintramolecularHH(mol,distThresh=2.3))-C[268]#Dist[1] # angle issue! A=Angles(mol) C[270]=A[0] C[271]=A[1] C[272]=A[2] if printing: print C return (C,sum(C*array(Coef))) def maincomputeOpt(mol,V): mol=Chem.AddHs(mol) AllChem.EmbedMolecule(mol) AllChem.MMFFOptimizeMolecule(mol) H = HeatOfCombustion(mol,0) #Hf= HeatOfFormation(mol,H[1]) print H[1]-float(mol.GetProp('deltaH(c)')) #print Hf/4.1843 to convert in kcal/mol #V.append(H[0]) def maincomputenoOpt(mol,V): H = HeatOfCombustion(mol,0) #Hf= HeatOfFormation(mol,H[1]) #h.write(str(H)+","+str(abs(H-float(mol.GetProp('deltaH(c)'))))+'\n') #print H[1]-float(mol.GetProp('deltaH(c)')) V.append(H[0]) return H[1] #h = open('Molecules_Score.txt','w') suppl = Chem.SDMolSupplier('/Users/mbp/Downloads/molecules-20-18279-s001/Compounds List for Heat-of-Combustion Calculations.sdf',removeHs=False) V=[] j=0 i=0 K=[] for mol in suppl: i=i+1 if i>3000: break try: H=maincomputenoOpt(mol,V) #maincomputeOpt(mol,V) K.append(H-float(mol.GetProp('deltaH(c)'))) except: 1 L.append("Hdonor") L.append("DH2") L.append("DH23") L.append("A60") L.append("A90") L.append("A102") # look at the global occurence of the smarts & NNI: tV=[list(i) for i in zip(*V)] j=0 for tv,lv in zip(tV,L): j=j+1 x=0 for itv in tv: x +=int(itv) if x>0: 1 print j,lv,x if x==0: #print j,lv 1 # distribution of diff Hcalc vs Hofficial fig,ax = plt.subplots() plt.ioff() n,bins,patches = ax.hist(K,bins=100) ax.set_xlabel('Dihedral Angle') ax.set_ylabel('Counts'); plt.show() ? Dr. Guillaume GODIN Principal Scientist Chemoinformatic & Datamining Innovation CORPORATE R&D DIVISION DIRECT LINE +41 (0)22 780 3645 MOBILE +41 (0)79 536 1039 Firmenich SA RUE DES JEUNES 1 | CASE POSTALE 239 | CH-1211 GENEVE 8 ________________________________ De : Greg Landrum <greg.land...@gmail.com> Envoyé : mercredi 14 septembre 2016 14:14 À : Guillaume GODIN Cc : RDKit Discuss Objet : Re: [Rdkit-discuss] Angstroms Hydrogen bonding On Wed, Sep 14, 2016 at 4:16 AM, Guillaume GODIN <guillaume.go...@firmenich.com<mailto:guillaume.go...@firmenich.com>> wrote: Your solution is perfect! glad it worked I am currently implementing this article "http://www.mdpi.com/1420-3049/20/10/18279" using RDKit. Interesting, I hadn't seen that one. It's now almost done, I need to check the results on Heat of Formation now and understand why my code to get Angle in torsions is not working. def Angles(mol): tors = mol.GetSubstructMatches(Chem.MolFromSmarts('[C]-[C;O;S]-[C;O;S]-[C]')) conf = mol.GetConformer() A60=0 A90=0 A102=0 for tor in tors: Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2], tor[3])) if Angle<=60: A60+=1 elif Angle>60 and Angle<=90: A90+=1 elif Angle>90 and Angle<=102: A102+=1 return (A60,A90,A102) Comparing to the publication on Heat of Formation page 18294, I have to much more occurences (ie. a factor of 50-100 more then expected). I suspect that the problem is that the SMARTS you specify: '[C]-[C;O;S]-[C;O;S]-[C]' won't actually match anything. [C;O;S] means 'aliphatic C AND aliphatic O AND aliphatic S'. It think you like mean '[C,O,S]', which is 'aliphatic C OR aliphatic O OR aliphatic S'. You might also want to think about whether or not you should replace C, O, S (all of which only match aliphatic elements) with #6, #8, #16 (which can match aliphatic or aromatic) throughout the SMARTS When it will be done I will share the code with you guys. Cool, I look forward to trying that out. Best, -greg BR, Guillaume ________________________________ De : Greg Landrum <greg.land...@gmail.com<mailto:greg.land...@gmail.com>> Envoyé : mercredi 14 septembre 2016 12:16 À : Guillaume GODIN Cc : RDKit Discuss Objet : Re: [Rdkit-discuss] Angstroms Hydrogen bonding Hi Guillaume, On Tue, Sep 13, 2016 at 10:12 PM, Guillaume GODIN <guillaume.go...@firmenich.com<mailto:guillaume.go...@firmenich.com>> wrote: 1 Does 3D coordinates of a conformer is in Angstroms ? If you read the conformer from a file, for example a mol file, then the 3D coordinates are in whatever units they were in in that file. This is usually Angstrsom. If you generate the conformation using one of the RDKit embedding functions, then they are certainly in Angstroms. 2 How to enumerate all HBonding to determine the bond length ? Interesting question. Here's a python function that might be a starting point: def findHBonds(m,confId=-1,possiblePartners='[#8,#7]',possibleHs='[#1][#8,#7]',distThresh=2.5): conf = m.GetConformer(confId) partners =[x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possiblePartners))] hs= [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts(possibleHs))] res = [] for h in hs: ph = conf.GetAtomPosition(h) for partner in partners: if m.GetBondBetweenAtoms(h,partner) is not None: continue d = conf.GetAtomPosition(partner).Distance(ph) if d<=distThresh: res.append((h,partner,d)) return res In order to allow flexibility about what an H bond is, I left the definitions of the acceptors (partners in the above code) and polar Hs (just Hs in the above code) as SMARTS definitions so that they can be customized. ********************************************************************** DISCLAIMER This email and any files transmitted with it, including replies and forwarded copies (which may contain alterations) subsequently transmitted from Firmenich, are confidential and solely for the use of the intended recipient. The contents do not represent the opinion of Firmenich except to the extent that it relates to their official business. **********************************************************************
------------------------------------------------------------------------------
_______________________________________________ Rdkit-discuss mailing list Rdkit-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/rdkit-discuss