Hello, I am trying to generate a set of pharmacophore fingerprints in python using RDKIT from a list of SMILES (20k molecules) No matter what I do the script keeps throwing an error saying my code doesn't match the C++ signature.
Here is a copy of my code as it stands right now from __future__ import print_function import os import csv import numpy as np import pandas as pd from rdkit import RDConfig, Chem, DataStructs, rdBase from rdkit.Chem import rdFingerprintGenerator, rdMolDescriptors, AllChem, rdFMCS, MACCSkeys, Draw, PandasTools, ChemicalFeatures, rdDepictor from rdkit.Chem.Fingerprints import FingerprintMols from rdkit.Chem.Draw import IPythonConsole, MolDraw2D from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate from rdkit.Chem.Pharm2D.SigFactory import SigFactory from IPython.display import SVG from tabulate import tabulate os.chdir(r'C:\Users\adumas\Desktop\') input1 = r'C:\Users\adumas\Desktop\FILE.csv' output = r'C:\Users\adumas\Desktop\FILE.csv' df = pd.read_csv(input1, delimiter = ',', header = 0, index_col = [''], names = ['','row ID','MOL_ID', 'SMILES',]) PandasTools.AddMoleculeColumnToFrame(df, smilesCol = 'SMILES') SMILES = [] SMILES = df.iloc[0:,4] ID = df.iloc[0:,1] molecules = [Chem.MolFromSmiles(x) for x in SMILES] fingerprints = [FingerprintMols.FingerprintMol(x) for x in molecules] morganfps = rdFingerprintGenerator.GetFPs(list(df['ROMol'])) df['Morgan Fingerprint'] = morganfps mcassfps = [MACCSkeys.GenMACCSKeys(x) for x in list(df['ROMol'])] df['MCASS Fingerprint'] = mcassfps fdefName = 'BaseFeatures.fdef' featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName) sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=9) sigFactory.SetBins([(0,2),(2,5),(5,8)]) sigFactory.Init() sigFactory.GetSigSize() pharmacophorefps = Generate.Gen2DFingerprint(list(df['ROMol']), sigFactory) ************* Line throwing error constantly no matter whether I specify (SMILES, sigFactory) or (molecules, sigFactory) or (df.iloc[0:,4], sigFactory) df['Pharmacophore Fingerprints'] = pharmacophorefps And the error it throws me every time no matter how I try to define the list of smiles ArgumentError: Python argument types in rdkit.Chem.rdmolops.GetDistanceMatrix(Series, bool) did not match C++ signature: GetDistanceMatrix(class RDKit::ROMol {lvalue} mol, bool useBO=False, bool useAtomWts=False, bool force=False, char const * __ptr64 prefix='')
_______________________________________________ Rdkit-discuss mailing list Rdkit-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/rdkit-discuss