Adam Reichold <[email protected]> writes: > Hello, > > The Python-based regtest framework currently faces a common problem of > multi-threaded Python programs: An non-timed join will block signals > like SIGINT, so that it becomes impossible to stop the program by > pressing Control + C or sending SIGINT.
Right, thanks for the patch, this has been in my TODO forever :-) > However, since the threading in regtest is simple enough, there seems to > be a useful trick, i.e. using a separate terminator thread on which a > non-timed join is forwarded using a timed join. This way, sending SIGINT > will kill all daemon threads (including the terminator thread) and > interrupt the main thread. Since all our worker threads are daemon > threads and we need not particularly care about clean-up, the attached > patch should suffice to fix SIGINT for us. Sounds hacky. If the only problem is that Queue.join() is using a non-timed wait, I think it would be simpler to make Queue.join() use a timeout value for wait(). I know Queue doesn't allow to do that, but the implementation is very simple, so we can just add our own simplified version of Queue and use always a timed wait. That way we don't need another threads just to make the program interruptible. I tried it out and worked, so I'll push a patch doing it that way. Some comments about the patch inline anyway. > Best regards, Adam. > From fb56e9c2e7fefc3dfecceb3f5c3b1e098e0531a0 Mon Sep 17 00:00:00 2001 > From: Adam Reichold <[email protected]> > Date: Wed, 30 Dec 2015 11:31:46 +0100 > Subject: [PATCH] Fix handling of SIGINT for multithreaded regression tests > using a separate terminator thread. > > --- > regtest/TestReferences.py | 27 +++++++++++++++------------ > regtest/TestRun.py | 30 ++++++++++++------------------ > regtest/Utils.py | 12 ++++++++++++ > 3 files changed, 39 insertions(+), 30 deletions(-) > > diff --git a/regtest/TestReferences.py b/regtest/TestReferences.py > index 05b08e2..c2877c5 100644 > --- a/regtest/TestReferences.py > +++ b/regtest/TestReferences.py > @@ -21,10 +21,10 @@ import errno > from backends import get_backend, get_all_backends > from Config import Config > from Printer import get_printer > -from Utils import get_document_paths_from_dir, get_skipped_tests, > get_passwords > +from Utils import get_document_paths_from_dir, get_skipped_tests, > get_passwords, start_daemon, interruptible_join > > from Queue import Queue > -from threading import Thread, RLock > +from threading import RLock > > class TestReferences: > > @@ -87,7 +87,7 @@ class TestReferences: > backend.create_checksums(refs_path, > self.config.checksums_only) > with self._lock: > self._n_tests += 1 > - self.printer.printout_ln("[%d/%d] %s (%s): done" % > (self._n_tests, self._total_tests, doc_path, backend.get_name())) > + self.printer.printout_ln("[%d/%d] %s (%s): done" % > (self._n_tests, self._total_tests, doc_path, backend.get_name())) This change is unrelated, I'll push it as a separate patch. > def _worker_thread(self): > while True: > @@ -102,17 +102,20 @@ class TestReferences: > > self.printer.printout_ln('Found %d documents' % (total_docs)) > self.printer.printout_ln('Backends: %s' % ', > '.join([backend.get_name() for backend in backends])) > - self.printer.printout_ln('Process %d using %d worker threads' % > (os.getpid(), self.config.threads)) Why removing the log? > self.printer.printout_ln() > > - self.printer.printout('Spawning %d workers...' % > (self.config.threads)) > + n_workers = min(self.config.threads, total_docs) > + if n_workers <= 1: > > - for n_thread in range(self.config.threads): > - thread = Thread(target=self._worker_thread) > - thread.daemon = True > - thread.start() > + for doc in docs: > + self.create_refs_for_file(doc) Not using threads when there's only one task like we do in RunTests is also unrelated to this patch. We should do this in a separate commit as well. > - for doc in docs: > - self._queue.put(doc) > + else: > > - self._queue.join() > + for doc in docs: > + self._queue.put(doc) > + > + for _ in range(n_workers): > + start_daemon(self._worker_thread) > + > + interruptible_join(self._queue.join) > diff --git a/regtest/TestRun.py b/regtest/TestRun.py > index fc3f6a7..904010a 100644 > --- a/regtest/TestRun.py > +++ b/regtest/TestRun.py > @@ -18,14 +18,14 @@ > > from backends import get_backend, get_all_backends > from Config import Config > -from Utils import get_document_paths_from_dir, get_skipped_tests, > get_passwords > +from Utils import get_document_paths_from_dir, get_skipped_tests, > get_passwords, start_daemon, interruptible_join > from Printer import get_printer > import sys > import os > import errno > > from Queue import Queue > -from threading import Thread, RLock > +from threading import RLock > > class TestRun: > > @@ -204,31 +204,25 @@ class TestRun: > backends = self._get_backends() > self._total_tests = total_docs * len(backends) > > - if total_docs == 1: > - n_workers = 0 > - else: > - n_workers = min(self.config.threads, total_docs) > - > self.printer.printout_ln('Found %d documents' % (total_docs)) > self.printer.printout_ln('Backends: %s' % ', > '.join([backend.get_name() for backend in backends])) > - self.printer.printout_ln('Process %d using %d worker threads' % > (os.getpid(), n_workers)) Why are you removing the log messages? > self.printer.printout_ln() > > - if n_workers > 0: > - self.printer.printout('Spawning %d workers...' % > (self.config.threads)) Ditto. > - for n_thread in range(n_workers): > - thread = Thread(target=self._worker_thread) > - thread.daemon = True > - thread.start() > + n_workers = min(self.config.threads, total_docs) > + if n_workers <= 1: > > for doc in docs: > - self._queue.put(doc) > + self.run_test(doc) > > - self._queue.join() > else: > + > for doc in docs: > - self.run_test(doc) > + self._queue.put(doc) > + > + for _ in range(n_workers): > + start_daemon(self._worker_thread) > + > + interruptible_join(self._queue.join) > > return int(self._n_passed != self._n_run) > > diff --git a/regtest/Utils.py b/regtest/Utils.py > index cd1a572..6e4fab5 100644 > --- a/regtest/Utils.py > +++ b/regtest/Utils.py > @@ -18,6 +18,8 @@ > > import os > > +from threading import Thread > + > def get_document_paths_from_dir(docsdir, basedir = None): > if basedir is None: > basedir = docsdir > @@ -69,3 +71,13 @@ def get_passwords(docsdir): > execfile(passwords_file, passwords) > return passwords['passwords'] > > +def start_daemon(target): > + thread = Thread(target = target) > + thread.daemon = True > + thread.start() > + return thread > + > +def interruptible_join(target): > + thread = start_daemon(target) > + while thread.isAlive(): > + thread.join(9223372036.0) Where does this magic number come from? Could we use something like sys.float_info.max instead? Since the actual number doesn't really matter. > -- > 2.6.4 > > _______________________________________________ > poppler mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/poppler -- Carlos Garcia Campos PGP key: http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x523E6462
signature.asc
Description: PGP signature
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
