Dear RDKit Developers
​,​

First and foremost thank you for your excellent offering of a singularly
useful set of molecular descriptors!

As one of the authors cited by Dr Labute (
http://www.chemcomp.com/journal/vsadesc.htm)
​,​
i've appreciated RDKit's implementation of the "Labute descriptor" set.  So
i've noticed that two particular descriptors in that set, specifically
SlogP_VSA9 and SMR_VSA8, appear to always return a value of 0.0 no matter
which of a
​substantial ​
set of small molecules are tested via:

from rdkit.Chem import Descriptors
Descriptors.SlogP_VSA9(molecule)
Descriptors.SMR_VSA8(molecule)

All other descriptors i've tried seem to produce sensible results (with
those same molecules)

Of course it's always possible that it's something i've mis-coded.  I'm
attaching a simple python program which returns the anomalous descriptor
values for me.

Thank you very much for all your efforts!
-- 
jeff godden
#!/usr/bin/python
# output some Labute descriptors via RDKit
from rdkit import Chem
from rdkit.Chem import Descriptors
import sys

if len(sys.argv) < 2:
    print("need an SDF filename on the commandline.")
    sys.exit(-1)

# Labute descriptors function pointers
dl = { 'SlogP_VSA1' : Descriptors.SlogP_VSA1, 'SlogP_VSA2' : Descriptors.SlogP_VSA2,
       'SlogP_VSA3' : Descriptors.SlogP_VSA3, 'SlogP_VSA4' : Descriptors.SlogP_VSA4,
       'SlogP_VSA5' : Descriptors.SlogP_VSA5, 'SlogP_VSA6' : Descriptors.SlogP_VSA6,
       'SlogP_VSA7' : Descriptors.SlogP_VSA7, 'SlogP_VSA8' : Descriptors.SlogP_VSA8,
       'SlogP_VSA9' : Descriptors.SlogP_VSA9, 'SlogP_VSA10' : Descriptors.SlogP_VSA10,
       'SlogP_VSA11' : Descriptors.SlogP_VSA11, 'SlogP_VSA12' : Descriptors.SlogP_VSA12,
       'SMR_VSA1' : Descriptors.SMR_VSA1, 'SMR_VSA2' : Descriptors.SMR_VSA2,
       'SMR_VSA3' : Descriptors.SMR_VSA3, 'SMR_VSA4' : Descriptors.SMR_VSA4,
       'SMR_VSA5' : Descriptors.SMR_VSA5, 'SMR_VSA6' : Descriptors.SMR_VSA6,
       'SMR_VSA7' : Descriptors.SMR_VSA7, 'SMR_VSA8' : Descriptors.SMR_VSA8,
       'SMR_VSA9' : Descriptors.SMR_VSA9, 'SMR_VSA10' : Descriptors.SMR_VSA10,
       'PEOE_VSA1' : Descriptors.PEOE_VSA1, 'PEOE_VSA2' : Descriptors.PEOE_VSA2,
       'PEOE_VSA3' : Descriptors.PEOE_VSA3, 'PEOE_VSA4' : Descriptors.PEOE_VSA4,
       'PEOE_VSA5' : Descriptors.PEOE_VSA5, 'PEOE_VSA6' : Descriptors.PEOE_VSA6,
       'PEOE_VSA7' : Descriptors.PEOE_VSA7, 'PEOE_VSA8' : Descriptors.PEOE_VSA8,
       'PEOE_VSA9' : Descriptors.PEOE_VSA9, 'PEOE_VSA10' : Descriptors.PEOE_VSA10,
       'PEOE_VSA11' : Descriptors.PEOE_VSA11, 'PEOE_VSA12' : Descriptors.PEOE_VSA12,
       'PEOE_VSA13' : Descriptors.PEOE_VSA13, 'PEOE_VSA14' : Descriptors.PEOE_VSA14 }

a = dl.keys()

print('{0:13s} {1:7s} '
      '{2:12s} {3:12s} {4:12s} {5:12s} {6:12s} {7:12s} {8:12s} {9:12s} '
      '{10:12s} {11:12s} {12:12s} {13:12s} {14:12s} {15:12s} {16:12s} '
      '{17:12s} {18:12s} {19:12s} {20:12s} {21:12s} {22:12s} {23:12s} '
      '{24:12s} {25:12s} {26:12s} {27:12s} {28:12s} {29:12s} {30:12s} '
      '{31:12s} {32:12s} {33:12s} {34:12s} {35:12s} {36:12s} {37:12s}'.format('name', 'mw', *a))

with open(sys.argv[1], 'rb') as fin: # 'rb' rdkit wants this to be bytes *sigh*
    mols = Chem.ForwardSDMolSupplier(fin)
    for m in mols:
        name = m.GetProp('_Name') # s
        mw = Descriptors.MolWt(m) # f
        a = []
        for k, f in dl.items():
            a.append(f(m))
        print('{0:13s} {1:7.3f} '
              '{2:12.6f} {3:12.6f} {4:12.6f} {5:12.6f} {6:12.6f} {7:12.6f} {8:12.6f} {9:12.6f} '
              '{10:12.6f} {11:12.6f} {12:12.6f} {13:12.6f} {14:12.6f} {15:12.6f} {16:12.6f} '
              '{17:12.6f} {18:12.6f} {19:12.6f} {20:12.6f} {21:12.6f} {22:12.6f} {23:12.6f} '
              '{24:12.6f} {25:12.6f} {26:12.6f} {27:12.6f} {28:12.6f} {29:12.6f} {30:12.6f} '
              '{31:12.6f} {32:12.6f} {33:12.6f} {34:12.6f} {35:12.6f} {36:12.6f} {37:12.6f}'.format(name, mw, *a))

# import pandas as pd
# d = pd.read_fwf('data')
# for k in d.keys():
#     if 'name' not in k:
#         print(k, d[k].mean())

# output from 3133 'random' ZINC database molecules
## descriptor	mean
# mw		319.9400134014041
# SlogP_VSA1	9.619846179961822
# SlogP_VSA2	34.997968996171075
# SlogP_VSA3	9.793027915762599
# SlogP_VSA4	6.049321886407102
# SlogP_VSA5	25.947096066368807
# SlogP_VSA6	32.45248890906195
# SlogP_VSA7	0.7908396506062566
# SlogP_VSA8	4.296596145181884
# SlogP_VSA9	0.0
# SlogP_VSA10	4.070114919272454
# SlogP_VSA11	2.3151243225909495
# SlogP_VSA12	4.265721061263566
# SMR_VSA1	12.63901541544354
# SMR_VSA2	0.25907870899808505
# SMR_VSA3	10.056625507019914
# SMR_VSA4	2.714953877153796
# SMR_VSA5	25.574662986917733
# SMR_VSA6	19.44462612635612
# SMR_VSA7	44.14966515762617
# SMR_VSA8	0.0
# SMR_VSA9	4.360574342054924
# SMR_VSA10	15.398944225590286
# PEOE_VSA1	12.060297876515577
# PEOE_VSA2	6.086653529674543
# PEOE_VSA3	5.443402438098357
# PEOE_VSA4	2.052723992022967
# PEOE_VSA5	1.953408211550721
# PEOE_VSA6	18.40846272878091
# PEOE_VSA7	29.38461704626691
# PEOE_VSA8	21.670243503829038
# PEOE_VSA9	16.54696112635614
# PEOE_VSA10	9.817199155711478
# PEOE_VSA11	3.8629171110402245
# PEOE_VSA12	2.973432065092553
# PEOE_VSA13	2.845943177728157
# PEOE_VSA14	1.4918843372686643
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Rdkit-devel mailing list
Rdkit-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-devel

Reply via email to