Dear all lovely people, first of all, I'm rather new to to programming/python/Rdkit and probably my issue is quite easy to solve if you're more experienced. So I wrote a Python-Sycript simulating a reaction-workflow for a multi step synthesis with a lot of different building blocks. The program works as intended, but it takes a lot of time (few days) because it uses only one CPU thread. Therefore I thought about using more via multiprocessing/multithreading, but I couldn't get it to run. I tried a lot, but didn't even figure out exactly where to start. Maybe you guys can give me a hint in the right direction?
First of all the "Synthesis-Class": from rdkit import Chem from rdkit.Chem import AllChem class Synthesis: """Combination of the single synthesis steps.""" def __init__(self, sdf_a, sdf_b, sdf_f, sdf_k, s1_smarts, s2_smarts, s3_smarts, s4_smarts, s5_smarts, ): """Initialize building block molecules""" self.sdf_a = sdf_a self.sdf_b = sdf_b self.sdf_f = sdf_f self.sdf_k = sdf_k self.s1_smarts = s1_smarts self.s2_smarts = s2_smarts self.s3_smarts = s3_smarts self.s4_smarts = s4_smarts self.s5_smarts = s5_smarts def react1(self, t1): """First step of the reaction""" rxn1 = AllChem.ReactionFromSmarts(self.s1_smarts) with open(t1, "w") as t1: with open(self.sdf_a, "r") as f_a: while True: line_a = f_a.readline() if not line_a: break line_a = Chem.MolFromSmiles(line_a) with open(self.sdf_b) as f_b: while True: line_b = f_b.readline() if not line_b: break line_b = Chem.MolFromSmiles(line_b) p1 = rxn1.RunReactants((line_a, line_b)) p1 = [x for t in p1 for x in t] for x in p1: x = Chem.MolToSmiles(x) t1.write(x) t1.write("\n") f_a.close() f_b.close() t1.close() def react2(self, t1, t2): """Second step of the synthesis""" rxn2 = AllChem.ReactionFromSmarts(self.s2_smarts) with open(t2, "w") as t2: with open(t1, "r") as f_c: while True: line_c = f_c.readline() if not line_c: break line_c = Chem.MolFromSmiles(line_c) if line_c == None: pass else: p2 = rxn2.RunReactants((line_c,)) p2 = [x for t in p2 for x in t] for x in p2: x = Chem.MolToSmiles(x) t2.write(x) t2.write("\n") f_c.close() t2.close() def react3 (self, t2, t3): """Third step of the synthesis""" rxn3 = AllChem.ReactionFromSmarts(self.s3_smarts) with open(t3, "w") as t3: with open(t2, "r") as f_d: while True: line_d = f_d.readline() if not line_d: break line_d = Chem.MolFromSmiles(line_d) p3 = rxn3.RunReactants((line_d,)) p3 = [x for t in p3 for x in t] for x in p3: x = Chem.MolToSmiles(x) t3.write(x) t3.write("\n") f_d.close() t3.close() def react4 (self, t3, t4): """Fourth step of the synthesis""" rxn4 = AllChem.ReactionFromSmarts(self.s4_smarts) with open(t4, "w") as t4: with open(self.sdf_f, "r") as f_f: while True: line_f = f_f.readline() if not line_f: break line_f = Chem.MolFromSmiles(line_f) with open(t3) as f_g: while True: line_g = f_g.readline() if not line_g: break line_g = Chem.MolFromSmiles(line_g) p4 = rxn4.RunReactants((line_g, line_f)) p4 = [x for t in p4 for x in t] for x in p4: x = Chem.MolToSmiles(x) t4.write(x) t4.write("\n") f_f.close() f_g.close() t4.close() def react5(self, t4, t5): """Last step of the synthesis""" rxn5 = AllChem.ReactionFromSmarts(self.s5_smarts) with open(t5, "w") as t5: with open(self.sdf_k, "r") as f_k: while True: line_k = f_k.readline() if not line_k: break line_k = Chem.MolFromSmiles(line_k) with open(t4) as f_l: while True: line_l = f_l.readline() if not line_l: break line_l = Chem.MolFromSmiles(line_l) p5 = rxn5.RunReactants((line_l, line_k)) p5 = [x for t in p5 for x in t] for x in p5: x = Chem.MolToSmiles(x) t5.write(x) t5.write("\n") and then the actual execution program: from rdkit import Chem from rdkit.Chem import AllChem from synthesis_class2_1 import Synthesis from duplicate_class import DuplicateRemoval #simple class to remove duplicates from .txt files. sdf_a = ".txt file full of SMILE-Strings" sdf_b = " .txt file full of SMILE-Strings " sdf_f = " .txt file full of SMILE-Strings " sdf_k = " .txt file full of SMILE-Strings " s1_smarts = "SMARTS for the reaction" s2_smarts = " SMARTS for the reaction " s3_smarts = " SMARTS for the reaction " s4_smarts = " SMARTS for the reaction " s5_smarts = " SMARTS for the reaction " t1 = ".txt file with all the products of reaction 1" t1d = " .txt file without duplicates for the products of reaction 1 " t2 = " .txt file with all the products of reaction 2 " t2d = " .txt file without duplicates for the products of reaction 2 " t3 = " .txt file with all the products of reaction 3" t3d = " .txt file without duplicates for the products of reaction 3 " t4 = " .txt file with all the products of reaction 4 " t4d = " .txt file without duplicates for the products of reaction 4 " t5 = " .txt file with all the products of reaction 5 " t5d = " .txt file without duplicates for the products of reaction 5 " rxnrun = Synthesis(sdf_a, sdf_b, sdf_f, sdf_k, s1_smarts, s2_smarts, s3_smarts, s4_smarts, s5_smarts) rxnrun.react1(t1) DuplicateRemoval.check_duplicates(t1, t1d) rxnrun.react2(t1d, t2) DuplicateRemoval.check_duplicates(t2, t2d) rxnrun.react3(t2d, t3) DuplicateRemoval.check_duplicates(t3, t3d) rxnrun.react4(t3, t4) DuplicateRemoval.check_duplicates(t4, t4d) rxnrun.react5(t4d, t5) DuplicateRemoval.check_duplicates(t5, t5d) That's it. I tried to implement different "types" of multiprocessing/threading for each "RunReactants()" but haven't figured out how. Especially because everyone says, that implement multiprocessing in python/Rdkit is rather easy. Any tips apprecciated. Many, many thanks for your support. Kind regards Philipp
_______________________________________________ Rdkit-discuss mailing list Rdkit-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/rdkit-discuss