Hi All,

I've been trying to calculate BCUT2D descriptors in parallel with Dask and
get this error with the code below.
TypeError: cannot pickle 'Boost.Python.function' object

Everything works if I call mw_df, which calculates molecular weight, but I
get the error above if I call bcut_df.  Does anyone have a workaround?

Thanks,

Pat

#!/usr/bin/env python

import sys
import dask.dataframe as dd
import pandas as pd
from rdkit import Chem
from rdkit.Chem.Descriptors import MolWt
from rdkit.Chem.rdMolDescriptors import BCUT2D
import time

# --  molecular weight functions
def calc_mw(smi):
    mol = Chem.MolFromSmiles(smi)
    return MolWt(mol)

def mw_df(df):
    return df.SMILES.apply(calc_mw)

# -- bcut functions
def bcut_df(df):
    return df.apply(calc_bcut)

def calc_bcut(smi):
    mol = Chem.MolFromSmiles(smi)
    return BCUT2D(mol)

def main():
    start = time.time()
    df = pd.read_csv(sys.argv[1],sep=" ",names=["SMILES","Name"])
    ddf = dd.from_pandas(df,npartitions=16)
    ddf['MW'] =
ddf.map_partitions(mw_df,meta='float').compute(scheduler='processes')
    ddf['BCUT'] =
ddf.map_partitions(bcut_df,meta='float').compute(scheduler='processes')
    print(time.time()-start)
    print(ddf.head())


if __name__ == "__main__":
    main()
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to