Hi Philipp,

This is an embarrassingly parallel problem (that's the actual technical
term, so no need to feel embarrassed. :-), meaning there's no need for
communication between threads or processes, which makes it really easy:
just split the search space, run a separate job for each fraction, and
combine the outputs at the end.

For example, if your building block file "A" has 100 structures and you
want to use 4 CPUs, just split it into four files of 25 structures, run a
job using each file, and concatenate the outputs when the four jobs are
done. IMHO there's no need for any fancy multiprocessing modules; running a
completely independent process gives you more flexibility (you can run each
subjob on a different machine, submit it to a queue, email it to your
cousin, or whatever...)

Ivan

On Fri, Jun 11, 2021 at 6:47 AM Philipp Otten <philipp.ott...@gmail.com>
wrote:

> Dear all lovely people,
> first of all, I'm rather new to to programming/python/Rdkit and probably
> my issue is quite easy to solve if you're more experienced.
> So I wrote a Python-Sycript simulating a reaction-workflow for a multi
> step synthesis with a lot of different building blocks. The program works
> as intended, but it takes a lot of time (few days) because it uses only one
> CPU thread. Therefore I thought about using more via
> multiprocessing/multithreading, but I couldn't get it to run. I tried a
> lot, but didn't even figure out exactly where to start. Maybe you guys can
> give me a hint in the right direction?
>
> First of all the "Synthesis-Class":
>
> from rdkit import Chem
> from rdkit.Chem import AllChem
>
>
> class Synthesis:
>     """Combination of the single synthesis steps."""
>
>     def __init__(self,
>                  sdf_a, sdf_b, sdf_f, sdf_k,
>                  s1_smarts, s2_smarts, s3_smarts, s4_smarts, s5_smarts,
>                  ):
>         """Initialize building block molecules"""
>         self.sdf_a = sdf_a
>         self.sdf_b = sdf_b
>         self.sdf_f = sdf_f
>         self.sdf_k = sdf_k
>         self.s1_smarts = s1_smarts
>         self.s2_smarts = s2_smarts
>         self.s3_smarts = s3_smarts
>         self.s4_smarts = s4_smarts
>         self.s5_smarts = s5_smarts
>
>     def react1(self, t1):
>         """First step of the reaction"""
>         rxn1 = AllChem.ReactionFromSmarts(self.s1_smarts)
>         with open(t1, "w") as t1:
>             with open(self.sdf_a, "r") as f_a:
>                 while True:
>                     line_a = f_a.readline()
>
>                     if not line_a:
>                         break
>
>                     line_a = Chem.MolFromSmiles(line_a)
>
>                     with open(self.sdf_b) as f_b:
>                         while True:
>                             line_b = f_b.readline()
>
>                             if not line_b:
>                                 break
>
>                             line_b = Chem.MolFromSmiles(line_b)
>
>                             p1 = rxn1.RunReactants((line_a, line_b))
>                             p1 = [x for t in p1 for x in t]
>                             for x in p1:
>                                 x = Chem.MolToSmiles(x)
>                                 t1.write(x)
>                                 t1.write("\n")
>                 f_a.close()
>                 f_b.close()
>                 t1.close()
>
>     def react2(self, t1, t2):
>         """Second step of the synthesis"""
>         rxn2 = AllChem.ReactionFromSmarts(self.s2_smarts)
>         with open(t2, "w") as t2:
>             with open(t1, "r") as f_c:
>                 while True:
>                     line_c = f_c.readline()
>                     if not line_c:
>                         break
>
>                     line_c = Chem.MolFromSmiles(line_c)
>
>                     if line_c == None:
>                         pass
>                     else:
>                         p2 = rxn2.RunReactants((line_c,))
>                         p2 = [x for t in p2 for x in t]
>                         for x in p2:
>                             x = Chem.MolToSmiles(x)
>                             t2.write(x)
>                             t2.write("\n")
>             f_c.close()
>             t2.close()
>
>     def react3 (self, t2, t3):
>         """Third step of the synthesis"""
>         rxn3 = AllChem.ReactionFromSmarts(self.s3_smarts)
>         with open(t3, "w") as t3:
>             with open(t2, "r") as f_d:
>                     while True:
>                         line_d = f_d.readline()
>
>                         if not line_d:
>                             break
>
>                         line_d = Chem.MolFromSmiles(line_d)
>
>                         p3 = rxn3.RunReactants((line_d,))
>                         p3 = [x for t in p3 for x in t]
>                         for x in p3:
>                             x = Chem.MolToSmiles(x)
>                             t3.write(x)
>                             t3.write("\n")
>             f_d.close()
>             t3.close()
>
>
>     def react4 (self, t3,  t4):
>         """Fourth step of the synthesis"""
>         rxn4 = AllChem.ReactionFromSmarts(self.s4_smarts)
>
>         with open(t4, "w") as t4:
>             with open(self.sdf_f, "r") as f_f:
>                 while True:
>                     line_f = f_f.readline()
>
>                     if not line_f:
>                         break
>
>                     line_f = Chem.MolFromSmiles(line_f)
>
>                     with open(t3) as f_g:
>                         while True:
>                             line_g = f_g.readline()
>
>                             if not line_g:
>                                 break
>
>                             line_g = Chem.MolFromSmiles(line_g)
>
>                             p4 = rxn4.RunReactants((line_g, line_f))
>                             p4 = [x for t in p4 for x in t]
>                             for x in p4:
>                                 x = Chem.MolToSmiles(x)
>                                 t4.write(x)
>                                 t4.write("\n")
>             f_f.close()
>             f_g.close()
>             t4.close()
>
>     def react5(self, t4, t5):
>         """Last step of the synthesis"""
>         rxn5 = AllChem.ReactionFromSmarts(self.s5_smarts)
>         with open(t5, "w") as t5:
>             with open(self.sdf_k, "r") as f_k:
>                 while True:
>                     line_k = f_k.readline()
>
>                     if not line_k:
>                         break
>                     line_k = Chem.MolFromSmiles(line_k)
>
>                     with open(t4) as f_l:
>                         while True:
>                             line_l = f_l.readline()
>
>                             if not line_l:
>                                 break
>
>                             line_l = Chem.MolFromSmiles(line_l)
>
>                             p5 = rxn5.RunReactants((line_l, line_k))
>                             p5 = [x for t in p5 for x in t]
>                             for x in p5:
>                                 x = Chem.MolToSmiles(x)
>                                 t5.write(x)
>                                 t5.write("\n")
>
> and then the actual execution program:
>
> from rdkit import Chem
> from rdkit.Chem import AllChem
> from synthesis_class2_1 import Synthesis
> from duplicate_class import DuplicateRemoval #simple class to remove
> duplicates from .txt files.
>
>
> sdf_a = ".txt file full of SMILE-Strings"
> sdf_b = " .txt file full of SMILE-Strings "
> sdf_f = " .txt file full of SMILE-Strings "
> sdf_k = " .txt file full of SMILE-Strings "
>
> s1_smarts = "SMARTS for the reaction"
> s2_smarts = " SMARTS for the reaction "
> s3_smarts = " SMARTS for the reaction "
> s4_smarts = " SMARTS for the reaction "
> s5_smarts = " SMARTS for the reaction "
>
> t1 = ".txt file with all the products of reaction 1"
> t1d = " .txt file without duplicates for the products of reaction 1 "
> t2 = " .txt file with all the products of reaction 2 "
> t2d = " .txt file without duplicates for the products of reaction 2 "
> t3 = " .txt file with all the products of reaction 3"
> t3d = " .txt file without duplicates for the products of reaction 3 "
> t4 = " .txt file with all the products of reaction 4 "
> t4d = " .txt file without duplicates for the products of reaction 4 "
> t5 = " .txt file with all the products of reaction 5 "
> t5d = " .txt file without duplicates for the products of reaction 5 "
>
> rxnrun = Synthesis(sdf_a, sdf_b, sdf_f, sdf_k, s1_smarts, s2_smarts,
> s3_smarts, s4_smarts, s5_smarts)
> rxnrun.react1(t1)
> DuplicateRemoval.check_duplicates(t1, t1d)
> rxnrun.react2(t1d, t2)
> DuplicateRemoval.check_duplicates(t2, t2d)
> rxnrun.react3(t2d, t3)
> DuplicateRemoval.check_duplicates(t3, t3d)
> rxnrun.react4(t3, t4)
> DuplicateRemoval.check_duplicates(t4, t4d)
> rxnrun.react5(t4d, t5)
> DuplicateRemoval.check_duplicates(t5, t5d)
>
>
> That's it. I tried to implement different "types" of
> multiprocessing/threading for each "RunReactants()" but haven't figured out
> how. Especially because everyone says, that implement multiprocessing in
> python/Rdkit is rather easy.
> Any tips apprecciated.
> Many, many thanks for your support.
> Kind regards
> Philipp
> _______________________________________________
> Rdkit-discuss mailing list
> Rdkit-discuss@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/rdkit-discuss
>
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to