Hello,
I am trying to generate a set of pharmacophore fingerprints in python using 
RDKIT from a list of SMILES (20k molecules)
No matter what I do the script keeps throwing an error saying my code doesn't 
match the C++ signature.

Here  is a copy of my code as it stands right now

from __future__ import print_function
import os
import csv
import numpy as np
import pandas as pd
from rdkit import RDConfig, Chem, DataStructs, rdBase
from rdkit.Chem import rdFingerprintGenerator, rdMolDescriptors, AllChem, 
rdFMCS, MACCSkeys, Draw, PandasTools, ChemicalFeatures, rdDepictor
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem.Draw import IPythonConsole, MolDraw2D
from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate
from rdkit.Chem.Pharm2D.SigFactory import SigFactory
from IPython.display import SVG
from tabulate import tabulate

os.chdir(r'C:\Users\adumas\Desktop\')
input1 = r'C:\Users\adumas\Desktop\FILE.csv'
output = r'C:\Users\adumas\Desktop\FILE.csv'

df = pd.read_csv(input1, delimiter = ',', header = 0, index_col = [''], names = 
['','row ID','MOL_ID', 'SMILES',])
PandasTools.AddMoleculeColumnToFrame(df, smilesCol = 'SMILES')
SMILES = []
SMILES = df.iloc[0:,4]
ID = df.iloc[0:,1]

molecules = [Chem.MolFromSmiles(x) for x in SMILES]
fingerprints = [FingerprintMols.FingerprintMol(x) for x in molecules]

morganfps = rdFingerprintGenerator.GetFPs(list(df['ROMol']))
df['Morgan Fingerprint'] = morganfps

mcassfps = [MACCSkeys.GenMACCSKeys(x) for x in list(df['ROMol'])]
df['MCASS Fingerprint'] = mcassfps

fdefName = 'BaseFeatures.fdef'
featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName)
sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=9)
sigFactory.SetBins([(0,2),(2,5),(5,8)])
sigFactory.Init()
sigFactory.GetSigSize()
pharmacophorefps = Generate.Gen2DFingerprint(list(df['ROMol']), sigFactory)     
 ************* Line throwing error constantly no matter whether I specify 
(SMILES, sigFactory) or (molecules, sigFactory) or (df.iloc[0:,4], sigFactory)
df['Pharmacophore Fingerprints'] = pharmacophorefps

And the error it throws me every time no matter how I try to define the list of 
smiles
ArgumentError: Python argument types in
    rdkit.Chem.rdmolops.GetDistanceMatrix(Series, bool)
did not match C++ signature:
    GetDistanceMatrix(class RDKit::ROMol {lvalue} mol, bool useBO=False, bool 
useAtomWts=False, bool force=False, char const * __ptr64 prefix='')



_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to