Re: [Rdkit-discuss] Multiprocessing/Threading in Python/Rdkit

2021-06-12 Thread Ivan Tubert-Brohman
Hi Philipp,

This is an embarrassingly parallel problem (that's the actual technical
term, so no need to feel embarrassed. :-), meaning there's no need for
communication between threads or processes, which makes it really easy:
just split the search space, run a separate job for each fraction, and
combine the outputs at the end.

For example, if your building block file "A" has 100 structures and you
want to use 4 CPUs, just split it into four files of 25 structures, run a
job using each file, and concatenate the outputs when the four jobs are
done. IMHO there's no need for any fancy multiprocessing modules; running a
completely independent process gives you more flexibility (you can run each
subjob on a different machine, submit it to a queue, email it to your
cousin, or whatever...)

Ivan

On Fri, Jun 11, 2021 at 6:47 AM Philipp Otten 
wrote:

> Dear all lovely people,
> first of all, I'm rather new to to programming/python/Rdkit and probably
> my issue is quite easy to solve if you're more experienced.
> So I wrote a Python-Sycript simulating a reaction-workflow for a multi
> step synthesis with a lot of different building blocks. The program works
> as intended, but it takes a lot of time (few days) because it uses only one
> CPU thread. Therefore I thought about using more via
> multiprocessing/multithreading, but I couldn't get it to run. I tried a
> lot, but didn't even figure out exactly where to start. Maybe you guys can
> give me a hint in the right direction?
>
> First of all the "Synthesis-Class":
>
> from rdkit import Chem
> from rdkit.Chem import AllChem
>
>
> class Synthesis:
> """Combination of the single synthesis steps."""
>
> def __init__(self,
>  sdf_a, sdf_b, sdf_f, sdf_k,
>  s1_smarts, s2_smarts, s3_smarts, s4_smarts, s5_smarts,
>  ):
> """Initialize building block molecules"""
> self.sdf_a = sdf_a
> self.sdf_b = sdf_b
> self.sdf_f = sdf_f
> self.sdf_k = sdf_k
> self.s1_smarts = s1_smarts
> self.s2_smarts = s2_smarts
> self.s3_smarts = s3_smarts
> self.s4_smarts = s4_smarts
> self.s5_smarts = s5_smarts
>
> def react1(self, t1):
> """First step of the reaction"""
> rxn1 = AllChem.ReactionFromSmarts(self.s1_smarts)
> with open(t1, "w") as t1:
> with open(self.sdf_a, "r") as f_a:
> while True:
> line_a = f_a.readline()
>
> if not line_a:
> break
>
> line_a = Chem.MolFromSmiles(line_a)
>
> with open(self.sdf_b) as f_b:
> while True:
> line_b = f_b.readline()
>
> if not line_b:
> break
>
> line_b = Chem.MolFromSmiles(line_b)
>
> p1 = rxn1.RunReactants((line_a, line_b))
> p1 = [x for t in p1 for x in t]
> for x in p1:
> x = Chem.MolToSmiles(x)
> t1.write(x)
> t1.write("\n")
> f_a.close()
> f_b.close()
> t1.close()
>
> def react2(self, t1, t2):
> """Second step of the synthesis"""
> rxn2 = AllChem.ReactionFromSmarts(self.s2_smarts)
> with open(t2, "w") as t2:
> with open(t1, "r") as f_c:
> while True:
> line_c = f_c.readline()
> if not line_c:
> break
>
> line_c = Chem.MolFromSmiles(line_c)
>
> if line_c == None:
> pass
> else:
> p2 = rxn2.RunReactants((line_c,))
> p2 = [x for t in p2 for x in t]
> for x in p2:
> x = Chem.MolToSmiles(x)
> t2.write(x)
> t2.write("\n")
> f_c.close()
> t2.close()
>
> def react3 (self, t2, t3):
> """Third step of the synthesis"""
> rxn3 = AllChem.ReactionFromSmarts(self.s3_smarts)
> with open(t3, "w") as t3:
> with open(t2, "r") as f_d:
> while True:
> line_d = f_d.readline()
>
> if not line_d:
> break
>
> line_d = Chem.MolFromSmiles(line_d)
>
> p3 = rxn3.RunReactants((line_d,))
> p3 = [x for t in p3 for x in t]
> for x in p3:
> x = Chem.MolToSmiles(x)
> t3.write(x)
> t3.write("\n")
> f_d.close()
> 

Re: [Rdkit-discuss] Multiprocessing/Threading in Python/Rdkit

2021-06-11 Thread Jin Pan
Hey Philipp,

Python3 introduces the standard concurrent.futures package which allows for
really straightforward multiprocessing -
https://docs.python.org/3/library/concurrent.futures.html#processpoolexecutor-example.
Because the futures run on a separate process, you'll have to send smiles
(or some other serialized format) as inputs/outputs of the concurrent
code.  I would also recommend batching your inputs/outputs because the IPC
overhead can be a significant fraction of the overall runtime.

If you're looking to automatically design multi-step syntheses, you may
also want to consider PostEra Manifold 
search, which executes 4+step searches in a few seconds and is free for
academic usage.

Best,
Jin

On Fri, 11 Jun 2021 at 03:49, Philipp Otten 
wrote:

> Dear all lovely people,
> first of all, I'm rather new to to programming/python/Rdkit and probably
> my issue is quite easy to solve if you're more experienced.
> So I wrote a Python-Sycript simulating a reaction-workflow for a multi
> step synthesis with a lot of different building blocks. The program works
> as intended, but it takes a lot of time (few days) because it uses only one
> CPU thread. Therefore I thought about using more via
> multiprocessing/multithreading, but I couldn't get it to run. I tried a
> lot, but didn't even figure out exactly where to start. Maybe you guys can
> give me a hint in the right direction?
>
> First of all the "Synthesis-Class":
>
> from rdkit import Chem
> from rdkit.Chem import AllChem
>
>
> class Synthesis:
> """Combination of the single synthesis steps."""
>
> def __init__(self,
>  sdf_a, sdf_b, sdf_f, sdf_k,
>  s1_smarts, s2_smarts, s3_smarts, s4_smarts, s5_smarts,
>  ):
> """Initialize building block molecules"""
> self.sdf_a = sdf_a
> self.sdf_b = sdf_b
> self.sdf_f = sdf_f
> self.sdf_k = sdf_k
> self.s1_smarts = s1_smarts
> self.s2_smarts = s2_smarts
> self.s3_smarts = s3_smarts
> self.s4_smarts = s4_smarts
> self.s5_smarts = s5_smarts
>
> def react1(self, t1):
> """First step of the reaction"""
> rxn1 = AllChem.ReactionFromSmarts(self.s1_smarts)
> with open(t1, "w") as t1:
> with open(self.sdf_a, "r") as f_a:
> while True:
> line_a = f_a.readline()
>
> if not line_a:
> break
>
> line_a = Chem.MolFromSmiles(line_a)
>
> with open(self.sdf_b) as f_b:
> while True:
> line_b = f_b.readline()
>
> if not line_b:
> break
>
> line_b = Chem.MolFromSmiles(line_b)
>
> p1 = rxn1.RunReactants((line_a, line_b))
> p1 = [x for t in p1 for x in t]
> for x in p1:
> x = Chem.MolToSmiles(x)
> t1.write(x)
> t1.write("\n")
> f_a.close()
> f_b.close()
> t1.close()
>
> def react2(self, t1, t2):
> """Second step of the synthesis"""
> rxn2 = AllChem.ReactionFromSmarts(self.s2_smarts)
> with open(t2, "w") as t2:
> with open(t1, "r") as f_c:
> while True:
> line_c = f_c.readline()
> if not line_c:
> break
>
> line_c = Chem.MolFromSmiles(line_c)
>
> if line_c == None:
> pass
> else:
> p2 = rxn2.RunReactants((line_c,))
> p2 = [x for t in p2 for x in t]
> for x in p2:
> x = Chem.MolToSmiles(x)
> t2.write(x)
> t2.write("\n")
> f_c.close()
> t2.close()
>
> def react3 (self, t2, t3):
> """Third step of the synthesis"""
> rxn3 = AllChem.ReactionFromSmarts(self.s3_smarts)
> with open(t3, "w") as t3:
> with open(t2, "r") as f_d:
> while True:
> line_d = f_d.readline()
>
> if not line_d:
> break
>
> line_d = Chem.MolFromSmiles(line_d)
>
> p3 = rxn3.RunReactants((line_d,))
> p3 = [x for t in p3 for x in t]
> for x in p3:
> x = Chem.MolToSmiles(x)
> t3.write(x)
> t3.write("\n")
> f_d.close()
> t3.close()
>
>
> def react4 (self, t3,  t4):
> 

[Rdkit-discuss] Multiprocessing/Threading in Python/Rdkit

2021-06-11 Thread Philipp Otten
Dear all lovely people,
first of all, I'm rather new to to programming/python/Rdkit and probably my
issue is quite easy to solve if you're more experienced.
So I wrote a Python-Sycript simulating a reaction-workflow for a multi step
synthesis with a lot of different building blocks. The program works as
intended, but it takes a lot of time (few days) because it uses only one
CPU thread. Therefore I thought about using more via
multiprocessing/multithreading, but I couldn't get it to run. I tried a
lot, but didn't even figure out exactly where to start. Maybe you guys can
give me a hint in the right direction?

First of all the "Synthesis-Class":

from rdkit import Chem
from rdkit.Chem import AllChem


class Synthesis:
"""Combination of the single synthesis steps."""

def __init__(self,
 sdf_a, sdf_b, sdf_f, sdf_k,
 s1_smarts, s2_smarts, s3_smarts, s4_smarts, s5_smarts,
 ):
"""Initialize building block molecules"""
self.sdf_a = sdf_a
self.sdf_b = sdf_b
self.sdf_f = sdf_f
self.sdf_k = sdf_k
self.s1_smarts = s1_smarts
self.s2_smarts = s2_smarts
self.s3_smarts = s3_smarts
self.s4_smarts = s4_smarts
self.s5_smarts = s5_smarts

def react1(self, t1):
"""First step of the reaction"""
rxn1 = AllChem.ReactionFromSmarts(self.s1_smarts)
with open(t1, "w") as t1:
with open(self.sdf_a, "r") as f_a:
while True:
line_a = f_a.readline()

if not line_a:
break

line_a = Chem.MolFromSmiles(line_a)

with open(self.sdf_b) as f_b:
while True:
line_b = f_b.readline()

if not line_b:
break

line_b = Chem.MolFromSmiles(line_b)

p1 = rxn1.RunReactants((line_a, line_b))
p1 = [x for t in p1 for x in t]
for x in p1:
x = Chem.MolToSmiles(x)
t1.write(x)
t1.write("\n")
f_a.close()
f_b.close()
t1.close()

def react2(self, t1, t2):
"""Second step of the synthesis"""
rxn2 = AllChem.ReactionFromSmarts(self.s2_smarts)
with open(t2, "w") as t2:
with open(t1, "r") as f_c:
while True:
line_c = f_c.readline()
if not line_c:
break

line_c = Chem.MolFromSmiles(line_c)

if line_c == None:
pass
else:
p2 = rxn2.RunReactants((line_c,))
p2 = [x for t in p2 for x in t]
for x in p2:
x = Chem.MolToSmiles(x)
t2.write(x)
t2.write("\n")
f_c.close()
t2.close()

def react3 (self, t2, t3):
"""Third step of the synthesis"""
rxn3 = AllChem.ReactionFromSmarts(self.s3_smarts)
with open(t3, "w") as t3:
with open(t2, "r") as f_d:
while True:
line_d = f_d.readline()

if not line_d:
break

line_d = Chem.MolFromSmiles(line_d)

p3 = rxn3.RunReactants((line_d,))
p3 = [x for t in p3 for x in t]
for x in p3:
x = Chem.MolToSmiles(x)
t3.write(x)
t3.write("\n")
f_d.close()
t3.close()


def react4 (self, t3,  t4):
"""Fourth step of the synthesis"""
rxn4 = AllChem.ReactionFromSmarts(self.s4_smarts)

with open(t4, "w") as t4:
with open(self.sdf_f, "r") as f_f:
while True:
line_f = f_f.readline()

if not line_f:
break

line_f = Chem.MolFromSmiles(line_f)

with open(t3) as f_g:
while True:
line_g = f_g.readline()

if not line_g:
break

line_g = Chem.MolFromSmiles(line_g)

p4 = rxn4.RunReactants((line_g, line_f))
p4 = [x for t in p4 for x in t]
for x in p4:
x = Chem.MolToSmiles(x)
t4.write(x)
t4.write("\n")
f_f.close()