Dear all lovely people,
first of all, I'm rather new to to programming/python/Rdkit and probably my
issue is quite easy to solve if you're more experienced.
So I wrote a Python-Sycript simulating a reaction-workflow for a multi step
synthesis with a lot of different building blocks. The program works as
intended, but it takes a lot of time (few days) because it uses only one
CPU thread. Therefore I thought about using more via
multiprocessing/multithreading, but I couldn't get it to run. I tried a
lot, but didn't even figure out exactly where to start. Maybe you guys can
give me a hint in the right direction?
First of all the "Synthesis-Class":
from rdkit import Chem
from rdkit.Chem import AllChem
class Synthesis:
"""Combination of the single synthesis steps."""
def __init__(self,
sdf_a, sdf_b, sdf_f, sdf_k,
s1_smarts, s2_smarts, s3_smarts, s4_smarts, s5_smarts,
):
"""Initialize building block molecules"""
self.sdf_a = sdf_a
self.sdf_b = sdf_b
self.sdf_f = sdf_f
self.sdf_k = sdf_k
self.s1_smarts = s1_smarts
self.s2_smarts = s2_smarts
self.s3_smarts = s3_smarts
self.s4_smarts = s4_smarts
self.s5_smarts = s5_smarts
def react1(self, t1):
"""First step of the reaction"""
rxn1 = AllChem.ReactionFromSmarts(self.s1_smarts)
with open(t1, "w") as t1:
with open(self.sdf_a, "r") as f_a:
while True:
line_a = f_a.readline()
if not line_a:
break
line_a = Chem.MolFromSmiles(line_a)
with open(self.sdf_b) as f_b:
while True:
line_b = f_b.readline()
if not line_b:
break
line_b = Chem.MolFromSmiles(line_b)
p1 = rxn1.RunReactants((line_a, line_b))
p1 = [x for t in p1 for x in t]
for x in p1:
x = Chem.MolToSmiles(x)
t1.write(x)
t1.write("\n")
f_a.close()
f_b.close()
t1.close()
def react2(self, t1, t2):
"""Second step of the synthesis"""
rxn2 = AllChem.ReactionFromSmarts(self.s2_smarts)
with open(t2, "w") as t2:
with open(t1, "r") as f_c:
while True:
line_c = f_c.readline()
if not line_c:
break
line_c = Chem.MolFromSmiles(line_c)
if line_c == None:
pass
else:
p2 = rxn2.RunReactants((line_c,))
p2 = [x for t in p2 for x in t]
for x in p2:
x = Chem.MolToSmiles(x)
t2.write(x)
t2.write("\n")
f_c.close()
t2.close()
def react3 (self, t2, t3):
"""Third step of the synthesis"""
rxn3 = AllChem.ReactionFromSmarts(self.s3_smarts)
with open(t3, "w") as t3:
with open(t2, "r") as f_d:
while True:
line_d = f_d.readline()
if not line_d:
break
line_d = Chem.MolFromSmiles(line_d)
p3 = rxn3.RunReactants((line_d,))
p3 = [x for t in p3 for x in t]
for x in p3:
x = Chem.MolToSmiles(x)
t3.write(x)
t3.write("\n")
f_d.close()
t3.close()
def react4 (self, t3, t4):
"""Fourth step of the synthesis"""
rxn4 = AllChem.ReactionFromSmarts(self.s4_smarts)
with open(t4, "w") as t4:
with open(self.sdf_f, "r") as f_f:
while True:
line_f = f_f.readline()
if not line_f:
break
line_f = Chem.MolFromSmiles(line_f)
with open(t3) as f_g:
while True:
line_g = f_g.readline()
if not line_g:
break
line_g = Chem.MolFromSmiles(line_g)
p4 = rxn4.RunReactants((line_g, line_f))
p4 = [x for t in p4 for x in t]
for x in p4:
x = Chem.MolToSmiles(x)
t4.write(x)
t4.write("\n")
f_f.close()
f_g.close()
t4.close()
def react5(self, t4, t5):
"""Last step of the synthesis"""
rxn5 = AllChem.ReactionFromSmarts(self.s5_smarts)
with open(t5, "w") as t5:
with open(self.sdf_k, "r") as f_k:
while True:
line_k = f_k.readline()
if not line_k:
break
line_k = Chem.MolFromSmiles(line_k)
with open(t4) as f_l:
while True:
line_l = f_l.readline()
if not line_l:
break
line_l = Chem.MolFromSmiles(line_l)
p5 = rxn5.RunReactants((line_l, line_k))
p5 = [x for t in p5 for x in t]
for x in p5:
x = Chem.MolToSmiles(x)
t5.write(x)
t5.write("\n")
and then the actual execution program:
from rdkit import Chem
from rdkit.Chem import AllChem
from synthesis_class2_1 import Synthesis
from duplicate_class import DuplicateRemoval #simple class to remove
duplicates from .txt files.
sdf_a = ".txt file full of SMILE-Strings"
sdf_b = " .txt file full of SMILE-Strings "
sdf_f = " .txt file full of SMILE-Strings "
sdf_k = " .txt file full of SMILE-Strings "
s1_smarts = "SMARTS for the reaction"
s2_smarts = " SMARTS for the reaction "
s3_smarts = " SMARTS for the reaction "
s4_smarts = " SMARTS for the reaction "
s5_smarts = " SMARTS for the reaction "
t1 = ".txt file with all the products of reaction 1"
t1d = " .txt file without duplicates for the products of reaction 1 "
t2 = " .txt file with all the products of reaction 2 "
t2d = " .txt file without duplicates for the products of reaction 2 "
t3 = " .txt file with all the products of reaction 3"
t3d = " .txt file without duplicates for the products of reaction 3 "
t4 = " .txt file with all the products of reaction 4 "
t4d = " .txt file without duplicates for the products of reaction 4 "
t5 = " .txt file with all the products of reaction 5 "
t5d = " .txt file without duplicates for the products of reaction 5 "
rxnrun = Synthesis(sdf_a, sdf_b, sdf_f, sdf_k, s1_smarts, s2_smarts,
s3_smarts, s4_smarts, s5_smarts)
rxnrun.react1(t1)
DuplicateRemoval.check_duplicates(t1, t1d)
rxnrun.react2(t1d, t2)
DuplicateRemoval.check_duplicates(t2, t2d)
rxnrun.react3(t2d, t3)
DuplicateRemoval.check_duplicates(t3, t3d)
rxnrun.react4(t3, t4)
DuplicateRemoval.check_duplicates(t4, t4d)
rxnrun.react5(t4d, t5)
DuplicateRemoval.check_duplicates(t5, t5d)
That's it. I tried to implement different "types" of
multiprocessing/threading for each "RunReactants()" but haven't figured out
how. Especially because everyone says, that implement multiprocessing in
python/Rdkit is rather easy.
Any tips apprecciated.
Many, many thanks for your support.
Kind regards
Philipp
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss