Dear all lovely people,
first of all, I'm rather new to to programming/python/Rdkit and probably my
issue is quite easy to solve if you're more experienced.
So I wrote a Python-Sycript simulating a reaction-workflow for a multi step
synthesis with a lot of different building blocks. The program works as
intended, but it takes a lot of time (few days) because it uses only one
CPU thread. Therefore I thought about using more via
multiprocessing/multithreading, but I couldn't get it to run. I tried a
lot, but didn't even figure out exactly where to start. Maybe you guys can
give me a hint in the right direction?

First of all the "Synthesis-Class":

from rdkit import Chem
from rdkit.Chem import AllChem


class Synthesis:
    """Combination of the single synthesis steps."""

    def __init__(self,
                 sdf_a, sdf_b, sdf_f, sdf_k,
                 s1_smarts, s2_smarts, s3_smarts, s4_smarts, s5_smarts,
                 ):
        """Initialize building block molecules"""
        self.sdf_a = sdf_a
        self.sdf_b = sdf_b
        self.sdf_f = sdf_f
        self.sdf_k = sdf_k
        self.s1_smarts = s1_smarts
        self.s2_smarts = s2_smarts
        self.s3_smarts = s3_smarts
        self.s4_smarts = s4_smarts
        self.s5_smarts = s5_smarts

    def react1(self, t1):
        """First step of the reaction"""
        rxn1 = AllChem.ReactionFromSmarts(self.s1_smarts)
        with open(t1, "w") as t1:
            with open(self.sdf_a, "r") as f_a:
                while True:
                    line_a = f_a.readline()

                    if not line_a:
                        break

                    line_a = Chem.MolFromSmiles(line_a)

                    with open(self.sdf_b) as f_b:
                        while True:
                            line_b = f_b.readline()

                            if not line_b:
                                break

                            line_b = Chem.MolFromSmiles(line_b)

                            p1 = rxn1.RunReactants((line_a, line_b))
                            p1 = [x for t in p1 for x in t]
                            for x in p1:
                                x = Chem.MolToSmiles(x)
                                t1.write(x)
                                t1.write("\n")
                f_a.close()
                f_b.close()
                t1.close()

    def react2(self, t1, t2):
        """Second step of the synthesis"""
        rxn2 = AllChem.ReactionFromSmarts(self.s2_smarts)
        with open(t2, "w") as t2:
            with open(t1, "r") as f_c:
                while True:
                    line_c = f_c.readline()
                    if not line_c:
                        break

                    line_c = Chem.MolFromSmiles(line_c)

                    if line_c == None:
                        pass
                    else:
                        p2 = rxn2.RunReactants((line_c,))
                        p2 = [x for t in p2 for x in t]
                        for x in p2:
                            x = Chem.MolToSmiles(x)
                            t2.write(x)
                            t2.write("\n")
            f_c.close()
            t2.close()

    def react3 (self, t2, t3):
        """Third step of the synthesis"""
        rxn3 = AllChem.ReactionFromSmarts(self.s3_smarts)
        with open(t3, "w") as t3:
            with open(t2, "r") as f_d:
                    while True:
                        line_d = f_d.readline()

                        if not line_d:
                            break

                        line_d = Chem.MolFromSmiles(line_d)

                        p3 = rxn3.RunReactants((line_d,))
                        p3 = [x for t in p3 for x in t]
                        for x in p3:
                            x = Chem.MolToSmiles(x)
                            t3.write(x)
                            t3.write("\n")
            f_d.close()
            t3.close()


    def react4 (self, t3,  t4):
        """Fourth step of the synthesis"""
        rxn4 = AllChem.ReactionFromSmarts(self.s4_smarts)

        with open(t4, "w") as t4:
            with open(self.sdf_f, "r") as f_f:
                while True:
                    line_f = f_f.readline()

                    if not line_f:
                        break

                    line_f = Chem.MolFromSmiles(line_f)

                    with open(t3) as f_g:
                        while True:
                            line_g = f_g.readline()

                            if not line_g:
                                break

                            line_g = Chem.MolFromSmiles(line_g)

                            p4 = rxn4.RunReactants((line_g, line_f))
                            p4 = [x for t in p4 for x in t]
                            for x in p4:
                                x = Chem.MolToSmiles(x)
                                t4.write(x)
                                t4.write("\n")
            f_f.close()
            f_g.close()
            t4.close()

    def react5(self, t4, t5):
        """Last step of the synthesis"""
        rxn5 = AllChem.ReactionFromSmarts(self.s5_smarts)
        with open(t5, "w") as t5:
            with open(self.sdf_k, "r") as f_k:
                while True:
                    line_k = f_k.readline()

                    if not line_k:
                        break
                    line_k = Chem.MolFromSmiles(line_k)

                    with open(t4) as f_l:
                        while True:
                            line_l = f_l.readline()

                            if not line_l:
                                break

                            line_l = Chem.MolFromSmiles(line_l)

                            p5 = rxn5.RunReactants((line_l, line_k))
                            p5 = [x for t in p5 for x in t]
                            for x in p5:
                                x = Chem.MolToSmiles(x)
                                t5.write(x)
                                t5.write("\n")

and then the actual execution program:

from rdkit import Chem
from rdkit.Chem import AllChem
from synthesis_class2_1 import Synthesis
from duplicate_class import DuplicateRemoval #simple class to remove
duplicates from .txt files.


sdf_a = ".txt file full of SMILE-Strings"
sdf_b = " .txt file full of SMILE-Strings "
sdf_f = " .txt file full of SMILE-Strings "
sdf_k = " .txt file full of SMILE-Strings "

s1_smarts = "SMARTS for the reaction"
s2_smarts = " SMARTS for the reaction "
s3_smarts = " SMARTS for the reaction "
s4_smarts = " SMARTS for the reaction "
s5_smarts = " SMARTS for the reaction "

t1 = ".txt file with all the products of reaction 1"
t1d = " .txt file without duplicates for the products of reaction 1 "
t2 = " .txt file with all the products of reaction 2 "
t2d = " .txt file without duplicates for the products of reaction 2 "
t3 = " .txt file with all the products of reaction 3"
t3d = " .txt file without duplicates for the products of reaction 3 "
t4 = " .txt file with all the products of reaction 4 "
t4d = " .txt file without duplicates for the products of reaction 4 "
t5 = " .txt file with all the products of reaction 5 "
t5d = " .txt file without duplicates for the products of reaction 5 "

rxnrun = Synthesis(sdf_a, sdf_b, sdf_f, sdf_k, s1_smarts, s2_smarts,
s3_smarts, s4_smarts, s5_smarts)
rxnrun.react1(t1)
DuplicateRemoval.check_duplicates(t1, t1d)
rxnrun.react2(t1d, t2)
DuplicateRemoval.check_duplicates(t2, t2d)
rxnrun.react3(t2d, t3)
DuplicateRemoval.check_duplicates(t3, t3d)
rxnrun.react4(t3, t4)
DuplicateRemoval.check_duplicates(t4, t4d)
rxnrun.react5(t4d, t5)
DuplicateRemoval.check_duplicates(t5, t5d)


That's it. I tried to implement different "types" of
multiprocessing/threading for each "RunReactants()" but haven't figured out
how. Especially because everyone says, that implement multiprocessing in
python/Rdkit is rather easy.
Any tips apprecciated.
Many, many thanks for your support.
Kind regards
Philipp
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to