-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Hello everyone,
I currently try to get myself acquainted with Poppler's regression testing framework. Because my system has a rather low single-threaded performance, I tried to implement parallel testing using Python's Queue class. Even though poppler-regtest currently uses two processes per test file, rendering even and odd pages respectively, the test files themselves are still handled sequentially and both process are joined for each test file. This will yield suboptimal system utilization even for a small three-core system like mine. Using the "-t/--threads N" option in the patched poppler-regtest will spawn N worker threads that handle all tests they can get from a single queue for all known tests, allowing to heavily utilize also large system if using a large set of test cases. But even for my three-core system, this brought down the time to create references for the complete test suite using the Splash backend from 4,5 hours to 2,75 hours. IMHO, the necessary changes seem quite small especially since a lot of them are connected to indentation handling. What are your thoughts on the utility and implementation of this? Best regards, Adam. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.19 (GNU/Linux) iQEcBAEBAgAGBQJQvZvsAAoJEPSSjE3STU34o3IIALy0fNljhHM/6VDQ+sZkhsO1 qIZuNZYnL+JrDphSk0L8fwahjU3bHMBs0WTeP98b5NNiUPgtKixzBdFq0S5Ch03W C/ysZiZXQgEK1yZgfWT+3mvTHYlPKTD7KgkubISW9qvyy5OHlwuaRcnQpDCO3ju0 ratVRjJ5x/TJIXNa5W6F335UTvu9eJu2v3f0PHh7fQRnX6e5e4lHUOQphaB9wfbJ UvsI0PE9OGjhyEUc4Z92KeLw5Zp5NqQthbQLsGsxnZg6V/YmrzOFjuQJEsFZxEK7 JFKACLjj/41F3aTL8HaczOJ5oHWmbnq9VnetGSGJ5ClZZwXuhgeJxfW2TnGzpcs= =ng+7 -----END PGP SIGNATURE-----
>From e34c246f3c11d7abf0cea629100ca33d483c180f Mon Sep 17 00:00:00 2001 From: Adam Reichold <[email protected]> Date: Tue, 4 Dec 2012 07:28:01 +0100 Subject: [PATCH 1/2] implemented parallel testing using a queue and worker threads --- regtest/Printer.py | 8 ++++++++ regtest/TestReferences.py | 23 ++++++++++++++++++++++- regtest/TestRun.py | 36 +++++++++++++++++++++++++++++++++++- regtest/main.py | 3 +++ 4 files changed, 68 insertions(+), 2 deletions(-) diff --git a/regtest/Printer.py b/regtest/Printer.py index 008f46b..453e03c 100644 --- a/regtest/Printer.py +++ b/regtest/Printer.py @@ -19,6 +19,8 @@ import sys from Config import Config +from threading import RLock + class Printer: __single = None @@ -32,6 +34,8 @@ class Printer: self._rewrite = self._stream.isatty() and not self._verbose self._current_line = None + self._lock = RLock() + Printer.__single = self def _erase_current_line(self): @@ -52,11 +56,13 @@ class Printer: self._stream.flush() def printout(self, msg): + with self._lock: self._erase_current_line() self._print(msg) self._current_line = msg[msg.rfind('\n') + 1:] def printout_update(self, msg): + with self._lock: if self._rewrite and self._current_line is not None: msg = self._current_line + msg elif not self._rewrite: @@ -64,6 +70,7 @@ class Printer: self.printout(msg) def printout_ln(self, msg): + with self._lock: if self._current_line is not None: self._current_line = None msg = '\n' + msg @@ -71,6 +78,7 @@ class Printer: self._print(self._ensure_new_line(msg)) def printerr(self, msg): + with self._lock: self.stderr.write(self._ensure_new_line(msg)) self.stderr.flush() diff --git a/regtest/TestReferences.py b/regtest/TestReferences.py index 9a9e923..cccd8af 100644 --- a/regtest/TestReferences.py +++ b/regtest/TestReferences.py @@ -23,6 +23,9 @@ from Config import Config from Printer import get_printer from Utils import get_document_paths_from_dir, get_skipped_tests +from Queue import Queue +from threading import Thread + class TestReferences: def __init__(self, docsdir, refsdir): @@ -32,6 +35,8 @@ class TestReferences: self.config = Config() self.printer = get_printer() + self._queue = Queue() + try: os.makedirs(self._refsdir) except OSError as e: @@ -68,9 +73,25 @@ class TestReferences: if backend.create_refs(doc_path, refs_path): backend.create_checksums(refs_path, self.config.checksums_only) + def worker_thread(self): + while True: + doc, n_doc, total_docs = self._queue.get() + self.create_refs_for_file(doc, n_doc, total_docs) + self._queue.task_done() + def create_refs(self): docs, total_docs = get_document_paths_from_dir(self._docsdir) + + self.printer.printout_ln('Process %d is spawning %d worker threads...' % (os.getpid(), self.config.threads)) + + for n_thread in range(self.config.threads): + thread = Thread(target=self.worker_thread) + thread.daemon = True + thread.start() + n_doc = 0 for doc in docs: n_doc += 1 - self.create_refs_for_file(doc, n_doc, total_docs) + self._queue.put( (doc, n_doc, total_docs) ) + + self._queue.join() diff --git a/regtest/TestRun.py b/regtest/TestRun.py index f4e5051..3894e93 100644 --- a/regtest/TestRun.py +++ b/regtest/TestRun.py @@ -24,6 +24,9 @@ import sys import os import errno +from Queue import Queue +from threading import Thread, RLock + class TestRun: def __init__(self, docsdir, refsdir, outdir): @@ -43,6 +46,9 @@ class TestRun: self._stderr = [] self._skipped = [] + self._queue = Queue() + self._lock = RLock() + try: os.makedirs(self._outdir); except OSError as e: @@ -57,24 +63,30 @@ class TestRun: ref_is_crashed = backend.is_crashed(refs_path) ref_is_failed = backend.is_failed(refs_path) if not ref_has_md5 and not ref_is_crashed and not ref_is_failed: + with self._lock: self._skipped.append("%s (%s)" % (doc_path, backend.get_name())) self.printer.print_default("Reference files not found, skipping '%s' for %s backend" % (doc_path, backend.get_name())) return + with self._lock: self._n_tests += 1 + self.printer.print_test_start("Testing '%s' using %s backend (%d/%d): " % (doc_path, backend.get_name(), n_doc, total_docs)) test_has_md5 = backend.create_refs(doc_path, test_path) if backend.has_stderr(test_path): + with self._lock: self._stderr.append("%s (%s)" % (doc_path, backend.get_name())) if ref_has_md5 and test_has_md5: if backend.compare_checksums(refs_path, test_path, not self.config.keep_results, self.config.create_diffs, self.config.update_refs): # FIXME: remove dir if it's empty? self.printer.print_test_result("PASS") + with self._lock: self._n_passed += 1 else: self.printer.print_test_result_ln("FAIL") + with self._lock: self._failed.append("%s (%s)" % (doc_path, backend.get_name())) return elif test_has_md5: @@ -87,6 +99,7 @@ class TestRun: test_is_crashed = backend.is_crashed(test_path) if ref_is_crashed and test_is_crashed: self.printer.print_test_result("PASS (Expected crash)") + with self._lock: self._n_passed += 1 return @@ -94,22 +107,26 @@ class TestRun: if ref_is_failed and test_is_failed: # FIXME: compare status errors self.printer.print_test_result("PASS (Expected fail with status error %d)" % (test_is_failed)) + with self._lock: self._n_passed += 1 return if test_is_crashed: self.printer.print_test_result_ln("CRASH") + with self._lock: self._crashed.append("%s (%s)" % (doc_path, backend.get_name())) return if test_is_failed: self.printer.print_test_result_ln("FAIL (status error %d)" % (test_is_failed)) + with self._lock: self._failed_status_error("%s (%s)" % (doc_path, backend.get_name())) return def run_test(self, filename, n_doc = 1, total_docs = 1): if filename in self._skip: doc_path = os.path.join(self._docsdir, filename) + with self._lock: self._skipped.append("%s" % (doc_path)) self.printer.print_default("Skipping test '%s' (%d/%d)" % (doc_path, n_doc, total_docs)) return @@ -126,6 +143,7 @@ class TestRun: refs_path = os.path.join(self._refsdir, filename) if not os.path.isdir(refs_path): + with self._lock: self._skipped.append("%s" % (doc_path)) self.printer.print_default("Reference dir not found for %s, skipping (%d/%d)" % (doc_path, n_doc, total_docs)) return @@ -138,12 +156,28 @@ class TestRun: for backend in backends: self.test(refs_path, doc_path, out_path, backend, n_doc, total_docs) + def worker_thread(self): + while True: + doc, n_doc, total_docs = self._queue.get() + self.run_test(doc, n_doc, total_docs) + self._queue.task_done() + def run_tests(self): docs, total_docs = get_document_paths_from_dir(self._docsdir) + + self.printer.printout_ln('Process %d is spawning %d worker threads...' % (os.getpid(), self.config.threads)) + + for n_thread in range(self.config.threads): + thread = Thread(target=self.worker_thread) + thread.daemon = True + thread.start() + n_doc = 0 for doc in docs: n_doc += 1 - self.run_test(doc, n_doc, total_docs) + self._queue.put( (doc, n_doc, total_docs) ) + + self._queue.join() def summary(self): if not self._n_tests: diff --git a/regtest/main.py b/regtest/main.py index 290c8bc..f7c3ac7 100644 --- a/regtest/main.py +++ b/regtest/main.py @@ -61,6 +61,9 @@ def main(args): parser.add_argument('--skip', metavar = 'FILE', action = 'store', dest = 'skipped_file', help = 'File containing tests to skip') + parser.add_argument('-t', '--threads', + action = 'store', dest = 'threads', type = int, default = 1, + help = 'Number of worker threads') ns, args = parser.parse_known_args(args) if not args: -- 1.8.0.1 >From 939a57e14726cfb9859497249b20e64409def9d2 Mon Sep 17 00:00:00 2001 From: Adam Reichold <[email protected]> Date: Tue, 4 Dec 2012 07:32:44 +0100 Subject: [PATCH 2/2] removed two process optimization because it reduces parallel test performance --- regtest/backends/cairo.py | 5 ++--- regtest/backends/splash.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/regtest/backends/cairo.py b/regtest/backends/cairo.py index 304783e..3593342 100644 --- a/regtest/backends/cairo.py +++ b/regtest/backends/cairo.py @@ -28,9 +28,8 @@ class Cairo(Backend): def create_refs(self, doc_path, refs_path): out_path = os.path.join(refs_path, 'cairo') - p1 = subprocess.Popen([self._pdftocairo, '-cropbox', '-r', '72', '-e', '-png', doc_path, out_path], stderr = subprocess.PIPE) - p2 = subprocess.Popen([self._pdftocairo, '-cropbox', '-r', '72', '-o', '-png', doc_path, out_path], stderr = subprocess.PIPE) - return self._check_exit_status2(p1, p2, out_path) + p = subprocess.Popen([self._pdftocairo, '-cropbox', '-r', '72', '-png', doc_path, out_path], stderr = subprocess.PIPE) + return self._check_exit_status(p, out_path) def _create_diff(self, ref_path, result_path): self._diff_png(ref_path, result_path) diff --git a/regtest/backends/splash.py b/regtest/backends/splash.py index 3144bc7..aadbdec 100644 --- a/regtest/backends/splash.py +++ b/regtest/backends/splash.py @@ -28,9 +28,8 @@ class Splash(Backend): def create_refs(self, doc_path, refs_path): out_path = os.path.join(refs_path, 'splash') - p1 = subprocess.Popen([self._pdftoppm, '-cropbox', '-r', '72', '-e', '-png', doc_path, out_path], stderr = subprocess.PIPE) - p2 = subprocess.Popen([self._pdftoppm, '-cropbox', '-r', '72', '-o', '-png', doc_path, out_path], stderr = subprocess.PIPE) - return self._check_exit_status2(p1, p2, out_path) + p = subprocess.Popen([self._pdftoppm, '-cropbox', '-r', '72', '-png', doc_path, out_path], stderr = subprocess.PIPE) + return self._check_exit_status(p, out_path) def _create_diff(self, ref_path, result_path): self._diff_png(ref_path, result_path) -- 1.8.0.1
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
