Hi Guillaume,
An FYI: I'm at the KNIME Summit in San Francisco this week and won't have
time to go through the code until I'm back in Basel.
-greg
On Wed, Sep 14, 2016 at 7:08 AM, Guillaume GODIN <
guillaume.go...@firmenich.com> wrote:
> Hi Greg,
>
>
> here the first code prototype of the article mappiing in rdkit: for the
> moment I have some issues with angle the rest start to be ok
>
>
> Can you look at it and see if my interpretation is ok ?
>
>
> BR,
>
>
> Guillaume
>
>
> ---
> import sys
> import re
> from rdkit import Chem
> from rdkit.Chem import AllChem
> from rdkit.Chem import rdMolTransforms
> from numpy import *
> from rdkit.Chem.rdMolDescriptors import CalcMolFormula
> import matplotlib.pyplot as plt
>
>
> def Occ(smart,mol):
> func = Chem.MolFromSmarts(smart)
> maps = mol.GetSubstructMatches(func)
> return len(maps)
>
>
> def Angles(mol):
> tors = mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]-[#6,#7,#
> 16]~[#6,#7,#16]-[#6]'))
> conf = mol.GetConformer()
> A60=0
> A90=0
> A102=0
> for tor in tors:
> Angle = abs(AllChem.GetDihedralDeg(conf,tor[0], tor[1], tor[2],
> tor[3]))
> if Angle<=60:
> A60+=1
> elif Angle>60 and Angle<=90:
> A90+=1
> elif Angle>90 and Angle<=102:
> A102+=1
> return (A60,A90,A102)
>
> # 3 molecules not parse with rdkit ? so can explain some occurence diff
> # Guanidinium chloride
> # Creatine
> # Arginine
>
>
> def SumdHfCombustionProducts(mol):
> #(data extracted from Domalski1 and Skinner2) dHf(kJ/mol:
> dHfH3BO3=-1094.7226
> dHfCO2 =-393.7727
> dHfH2O=-286.0212
> dHfH2SO4=-888.405
> dHfH3PO4=-1279.90476
> dHfSiO2=-847.827
> dHfHF=-316.6435
> dHfHCl=-165.5879
> dHfHBr=-121.4172
> dHfHI=-142.0
> NB=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#5]')))
> NC=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]')) )
> NS=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#16]')))
> NP=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#15]')))
> NSi=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#14]')))
> NF=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#9]')))
> NCl=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#17]')))
> NBr=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#35]')))
> NI=len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#53]')))
> NH2O=(len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#1]')))-3*NB-
> 2*NS-3*NP-NF-NCl-NBr-NI)/2;
> return NB*dHfH3BO3+NC*dHfCO2+NH2O*dHfH2O+NS*dHfH2SO4+NP*
> dHfH3PO4+NSi*dHfSiO2+NF*dHfHF+NCl*dHfHCl+NBr*dHfHBr+NI*dHfHI
> # equation to convert Hc to Hf => HeatOfFormation =
> SumdHfCombustionProducts - HeatOfCombustion;
>
>
> L=['[BX3H0]([#6])([#6])[#6]',
> '[CX4H3][#5]',
> '[CX4H3][#6]',
> '[CX4H3][#7;+0]',
> '[CX4H3][#7+]',
> '[CX4H3][#8]',
> '[CX4H3][#16]',
> '[CX4H3][#15]',
> '[CX4H3][#14]',
> '[CX4H2]([#5])[#6]',
> '[CX4H2]([#6])[#6]',
> '[CX4H2]([#6])[#7;+0]',
> '[CX4H2]([#6])[#7+]',
> '[CX4H2]([#6])[#8]',
> '[CX4H2]([#6])[#16]',
> '[CX4H2]([#6])[#15]',
> '[CX4H2]([#6])[#9]',
> '[CX4H2]([#6])[#17]',
> '[CX4H2]([#6])[#35]',
> '[CX4H2]([#6])[#53]',
> '[CX4H2]([#6])[#14]',
> '[CX4H2]([#7])[#7;+0]',
> '[CX4H2]([#7])[#7+]',
> '[CX4H2]([#8])[#7]',
> '[CX4H2]([#8])[#8]',
> '[CX4H2]([#8])[#17]',
> '[CX4H2]([#16])[#16]',
> '[CX4H1]([#6])([#6])[#6]',
> '[CX4H1]([#6])([#6])[#7;+0]',
> '[CX4H1]([#6])([#6])[#7+]',
> '[CX4H1]([#6])([#6])[#8]',
> '[CX4H1]([#6])([#6])[#16]',
> '[CX4H1]([#6])([#6])[#9]',
> '[CX4H1]([#6])([#6])[#17]',
> '[CX4H1]([#6])([#6])[#35]',
> '[CX4H1]([#6])([#6])[#53]',
> '[CX4H1]([#6])([#7])[#7;+0]',
> '[CX4H1]([#6])([#7])[#7+]',
> '[CX4H1]([#6])([#8])[#8]',
> '[CX4H1]([#6])([#9])[#9]',
> '[CX4H1]([#6])([#9])[#17]',
> '[CX4H1]([#6])([#17])[#17]',
> '[CX4H1]([#6])([#17])[#35]',
> '[CX4H1]([#6])([#35])[#35]',
> '[CX4H1]([#7])([#7])[#7+]',
> '[CX4H1]([#8])([#8])[#8]',
> '[CX4H1]([#8])([#9])[#9]',
> '[CX4H0]([#6])([#6])([#6])[#6]',
> '[CX4H0]([#6])([#6])([#6])[#7;+0]',
> '[CX4H0]([#6])([#6])([#6])[#7+]',
> '[CX4H0]([#6])([#6])([#6])[#8]',
> '[CX4H0]([#6])([#6])([#6])[#16]',
> '[CX4H0]([#6])([#6])([#6])[#9]',
> '[CX4H0]([#6])([#6])([#6])[#17]',
> '[CX4H0]([#6])([#6])([#6])[#35]',
> '[CX4H0]([#6])([#6])([#6])[#53]',
> '[CX4H0]([#6])([#6])([#7])[#7+]',
> '[CX4H0]([#6])([#6])([#8])[#8]',
> '[CX4H0]([#6])([#6])([#9])[#9]',
> '[CX4H0]([#6])([#6])([#9])[#17]',
> '[CX4H0]([#6])([#6])([#17])[#17]',
> '[CX4H0]([#6])([#7])([#7])[#7+]',
> '[CX4H0]([#6])([#8])([#8])[#8]',
> '[CX4H0]([#6])([#8])([#9])[#9]',
> '[CX4H0]([#6])([#9])([#9])[#9]',
> '[CX4H0]([#6])([#9])([#9])[#17]',
> '[CX4H0]([#6])([#9])([#9])[#35]',
> '[CX4H0]([#6])([#9])([#17])[#17]',
> '[CX4H0]([#6])([#9])([#17])[#35]',
> '[CX4H0]([#6])([#17])([#17])[#17]',
> '[CX4H0]([#6])([#35])([#35])[#35]',
> '[CX4H0]([#7])([#7])([#7])[#7+]',
> '[CX4H0]([#8])([#8])([#8])[#8]',
> '[CX4H0]([#8])([#9])([#9])[#9]',
> '[CX3H2]=[#6]',
> '[CX3H2]=[#7]',
>