Hi All, I've been trying to calculate BCUT2D descriptors in parallel with Dask and get this error with the code below. TypeError: cannot pickle 'Boost.Python.function' object
Everything works if I call mw_df, which calculates molecular weight, but I get the error above if I call bcut_df. Does anyone have a workaround? Thanks, Pat #!/usr/bin/env python import sys import dask.dataframe as dd import pandas as pd from rdkit import Chem from rdkit.Chem.Descriptors import MolWt from rdkit.Chem.rdMolDescriptors import BCUT2D import time # -- molecular weight functions def calc_mw(smi): mol = Chem.MolFromSmiles(smi) return MolWt(mol) def mw_df(df): return df.SMILES.apply(calc_mw) # -- bcut functions def bcut_df(df): return df.apply(calc_bcut) def calc_bcut(smi): mol = Chem.MolFromSmiles(smi) return BCUT2D(mol) def main(): start = time.time() df = pd.read_csv(sys.argv[1],sep=" ",names=["SMILES","Name"]) ddf = dd.from_pandas(df,npartitions=16) ddf['MW'] = ddf.map_partitions(mw_df,meta='float').compute(scheduler='processes') ddf['BCUT'] = ddf.map_partitions(bcut_df,meta='float').compute(scheduler='processes') print(time.time()-start) print(ddf.head()) if __name__ == "__main__": main()
_______________________________________________ Rdkit-discuss mailing list Rdkit-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/rdkit-discuss