Hello again, attached is an extended version of the patch series that does better handling of documents that fail to produce an output, reduces time spent in console IO by discarding stdout and stderr and reduces boiler plate code by using a common perf backend base class. (The patch series is larger, but the final diff w.r.t. master is smaller. Should I rather squash for a shorter patch series instead of the full commit history?)
Best regards, Adam. Am 30.12.2015 um 17:04 schrieb Adam Reichold: > Hello again, > > as discussed in the code modernization thread, if we are going to make > performance-orient changes, we need a simple way to track functional and > performance regressions. > > The attached patch tries to extend the existing Python-based regtest > framework to measure run time and memory usage to spot significant > performance changes in the sense of relative deviations w.r.t. to these > two parameters. It also collects the sums of both which might be used as > "ball park" numbers to compare the performance effect of changes over > document collections. > > The patch runs the measured commands repeatedly including warm-up > iterations and collects statistics from these runs. The measurement > results are stored as JSON documents with the actual program output of > e.g. pdftotext or pdftoppm being discarded. > > To implement the check for relative deviations, it abuses the checksum > comparison method and hence checksums are still computed for the JSON > documents even though they are actually unnecessary. It is also limited > to Unix-like operating systems (due to the use of the wait3 syscall to > determine resource usage similar to the time command). > > > > _______________________________________________ > poppler mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/poppler >
From fb56e9c2e7fefc3dfecceb3f5c3b1e098e0531a0 Mon Sep 17 00:00:00 2001 From: Adam Reichold <[email protected]> Date: Wed, 30 Dec 2015 11:31:46 +0100 Subject: [PATCH 1/5] Fix handling of SIGINT for multithreaded regression tests using a separate terminator thread. --- regtest/TestReferences.py | 27 +++++++++++++++------------ regtest/TestRun.py | 30 ++++++++++++------------------ regtest/Utils.py | 12 ++++++++++++ 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/regtest/TestReferences.py b/regtest/TestReferences.py index 05b08e2..c2877c5 100644 --- a/regtest/TestReferences.py +++ b/regtest/TestReferences.py @@ -21,10 +21,10 @@ import errno from backends import get_backend, get_all_backends from Config import Config from Printer import get_printer -from Utils import get_document_paths_from_dir, get_skipped_tests, get_passwords +from Utils import get_document_paths_from_dir, get_skipped_tests, get_passwords, start_daemon, interruptible_join from Queue import Queue -from threading import Thread, RLock +from threading import RLock class TestReferences: @@ -87,7 +87,7 @@ class TestReferences: backend.create_checksums(refs_path, self.config.checksums_only) with self._lock: self._n_tests += 1 - self.printer.printout_ln("[%d/%d] %s (%s): done" % (self._n_tests, self._total_tests, doc_path, backend.get_name())) + self.printer.printout_ln("[%d/%d] %s (%s): done" % (self._n_tests, self._total_tests, doc_path, backend.get_name())) def _worker_thread(self): while True: @@ -102,17 +102,20 @@ class TestReferences: self.printer.printout_ln('Found %d documents' % (total_docs)) self.printer.printout_ln('Backends: %s' % ', '.join([backend.get_name() for backend in backends])) - self.printer.printout_ln('Process %d using %d worker threads' % (os.getpid(), self.config.threads)) self.printer.printout_ln() - self.printer.printout('Spawning %d workers...' % (self.config.threads)) + n_workers = min(self.config.threads, total_docs) + if n_workers <= 1: - for n_thread in range(self.config.threads): - thread = Thread(target=self._worker_thread) - thread.daemon = True - thread.start() + for doc in docs: + self.create_refs_for_file(doc) - for doc in docs: - self._queue.put(doc) + else: - self._queue.join() + for doc in docs: + self._queue.put(doc) + + for _ in range(n_workers): + start_daemon(self._worker_thread) + + interruptible_join(self._queue.join) diff --git a/regtest/TestRun.py b/regtest/TestRun.py index fc3f6a7..904010a 100644 --- a/regtest/TestRun.py +++ b/regtest/TestRun.py @@ -18,14 +18,14 @@ from backends import get_backend, get_all_backends from Config import Config -from Utils import get_document_paths_from_dir, get_skipped_tests, get_passwords +from Utils import get_document_paths_from_dir, get_skipped_tests, get_passwords, start_daemon, interruptible_join from Printer import get_printer import sys import os import errno from Queue import Queue -from threading import Thread, RLock +from threading import RLock class TestRun: @@ -204,31 +204,25 @@ class TestRun: backends = self._get_backends() self._total_tests = total_docs * len(backends) - if total_docs == 1: - n_workers = 0 - else: - n_workers = min(self.config.threads, total_docs) - self.printer.printout_ln('Found %d documents' % (total_docs)) self.printer.printout_ln('Backends: %s' % ', '.join([backend.get_name() for backend in backends])) - self.printer.printout_ln('Process %d using %d worker threads' % (os.getpid(), n_workers)) self.printer.printout_ln() - if n_workers > 0: - self.printer.printout('Spawning %d workers...' % (self.config.threads)) - - for n_thread in range(n_workers): - thread = Thread(target=self._worker_thread) - thread.daemon = True - thread.start() + n_workers = min(self.config.threads, total_docs) + if n_workers <= 1: for doc in docs: - self._queue.put(doc) + self.run_test(doc) - self._queue.join() else: + for doc in docs: - self.run_test(doc) + self._queue.put(doc) + + for _ in range(n_workers): + start_daemon(self._worker_thread) + + interruptible_join(self._queue.join) return int(self._n_passed != self._n_run) diff --git a/regtest/Utils.py b/regtest/Utils.py index cd1a572..6e4fab5 100644 --- a/regtest/Utils.py +++ b/regtest/Utils.py @@ -18,6 +18,8 @@ import os +from threading import Thread + def get_document_paths_from_dir(docsdir, basedir = None): if basedir is None: basedir = docsdir @@ -69,3 +71,13 @@ def get_passwords(docsdir): execfile(passwords_file, passwords) return passwords['passwords'] +def start_daemon(target): + thread = Thread(target = target) + thread.daemon = True + thread.start() + return thread + +def interruptible_join(target): + thread = start_daemon(target) + while thread.isAlive(): + thread.join(9223372036.0) -- 2.6.4 From c626921b966b46feefb2c05922a18efe551e5674 Mon Sep 17 00:00:00 2001 From: Adam Reichold <[email protected]> Date: Wed, 30 Dec 2015 14:39:27 +0100 Subject: [PATCH 2/5] Add two backends for doing performance measurements instead of output comparison to the regtest framework. --- regtest/Utils.py | 10 +++++ regtest/backends/__init__.py | 88 +++++++++++++++++++++++++++++++++++++++++- regtest/backends/perfsplash.py | 44 +++++++++++++++++++++ regtest/backends/perftext.py | 44 +++++++++++++++++++++ regtest/main.py | 9 +++++ 5 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 regtest/backends/perfsplash.py create mode 100644 regtest/backends/perftext.py diff --git a/regtest/Utils.py b/regtest/Utils.py index 6e4fab5..ccfcfcc 100644 --- a/regtest/Utils.py +++ b/regtest/Utils.py @@ -17,6 +17,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os +import math from threading import Thread @@ -81,3 +82,12 @@ def interruptible_join(target): thread = start_daemon(target) while thread.isAlive(): thread.join(9223372036.0) + +def mean(values): + return sum(values) / float(len(values)) + +def stddev(values, mean): + return math.sqrt(sum((value - mean) ** 2 for value in values) / float(len(values) - 1)) + +def reldev(value, ref_value): + return abs(value - ref_value) / ref_value diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py index 1287110..f2ea4cc 100644 --- a/regtest/backends/__init__.py +++ b/regtest/backends/__init__.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # backends # # Copyright (C) 2011 Carlos Garcia Campos <[email protected]> @@ -18,10 +19,13 @@ import hashlib import os +import subprocess import select import shutil import errno +import json from Config import Config +from Utils import mean, stddev, reldev from Printer import get_printer __all__ = [ 'register_backend', @@ -38,7 +42,11 @@ class Backend: def __init__(self, name, diff_ext = None): self._name = name self._diff_ext = diff_ext - self._utilsdir = Config().utils_dir + config = Config() + self._utilsdir = config.utils_dir + self._iterations = config.iterations + self._warm_up_iterations = config.warm_up_iterations + self._allowed_deviation = config.allowed_deviation / 100.0 self.printer = get_printer() @@ -99,7 +107,7 @@ class Backend: result_path = os.path.join(out_path, basename) result_md5sum = self.__md5sum(result_path); - matched = md5sum == result_md5sum + matched = self._match_checksums(md5_path, md5sum, result_path, result_md5sum) if update_refs: result_md5.append("%s %s\n" % (result_md5sum, ref_path)) @@ -145,6 +153,9 @@ class Backend: return retval + def _match_checksums(self, ref_path, ref_checksum, res_path, res_checksum): + return ref_checksum == res_checksum + def update_results(self, refs_path, out_path): if not self.has_md5(refs_path): path = os.path.join(refs_path, self._name) @@ -290,6 +301,79 @@ class Backend: def create_refs(self, doc_path, refs_path, password = None): raise NotImplementedError + def _measure_command(self, cmd, out_path): + for _ in range(self._warm_up_iterations): + process = subprocess.Popen(cmd) + + if not os.WIFEXITED(process.wait()): + return False + + run_times = [] + memory_usages = [] + + for _ in range(self._iterations): + process = subprocess.Popen(cmd) + + _, status, resources = os.wait4(process.pid, 0) + + if not os.WIFEXITED(status): + return False + + run_times.append(resources.ru_utime + resources.ru_stime) + memory_usages.append(resources.ru_maxrss) + + run_time_mean = mean(run_times) + run_time_stddev = stddev(run_times, run_time_mean) + + memory_usage_mean = mean(memory_usages) + memory_usage_stddev = stddev(memory_usages, memory_usage_mean) + + with open(out_path, 'w') as out_file: + json.dump({ 'run_time_mean': run_time_mean + , 'run_time_stddev': run_time_stddev + , 'memory_usage_mean': memory_usage_mean + , 'memory_usage_stddev': memory_usage_stddev } + , out_file) + + return True + + def _match_resources(self, ref_path, res_path): + with open(ref_path, 'r') as ref_file: + ref = json.load(ref_file) + with open(res_path, 'r') as res_file: + res = json.load(res_file) + + run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) + memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) + + if run_time_reldev > self._allowed_deviation: + return False + + if memory_usage_reldev > self._allowed_deviation: + return False + + return True + + def _diff_resources(self, ref_path, res_path): + with open(ref_path, 'r') as ref_file: + ref = json.load(ref_file) + with open(res_path, 'r') as res_file: + res = json.load(res_file) + + run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) + memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) + + with open(res_path + '.txt', 'w') as out_file: + out_file.write('Run time:\n') + out_file.write('\tReference: %.2f â %.3f s\n' % (ref['run_time_mean'], ref['run_time_stddev'])) + out_file.write('\tResult: %.2f â %.3f s\n' % (res['run_time_mean'], res['run_time_stddev'])) + out_file.write('\tDeviation: %.1f %%\n\n' % (run_time_reldev * 100.0)) + out_file.write('Memory usage:\n') + out_file.write('\tReference: %.1f â %.2f kB\n' % (ref['memory_usage_mean'] / 1024.0, ref['memory_usage_stddev'] / 1024.0)) + out_file.write('\tResult: %.1f â %.2f kB\n' % (res['memory_usage_mean'] / 1024.0, res['memory_usage_stddev'] / 1024.0)) + out_file.write('\tDeviation: %.1f %%\n\n' % (memory_usage_reldev * 100.0)) + + _backends = {} def register_backend(backend_name, backend_class): _backends[backend_name] = backend_class diff --git a/regtest/backends/perfsplash.py b/regtest/backends/perfsplash.py new file mode 100644 index 0000000..c958a70 --- /dev/null +++ b/regtest/backends/perfsplash.py @@ -0,0 +1,44 @@ +# perfsplash.py +# +# Copyright (C) 2015 Adam Reichold <[email protected]> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +from backends import Backend, register_backend +import subprocess +import os + +class PerfSplash(Backend): + + def __init__(self, name): + Backend.__init__(self, name, '.txt') + self._pdftoppm = os.path.join(self._utilsdir, 'pdftoppm'); + + def create_refs(self, doc_path, refs_path, password = None): + out_path = os.path.join(refs_path, 'perfsplash') + cmd = [self._pdftoppm, '-cropbox', '-r', '72', '-png', doc_path, '/dev/null'] + if password is not None: + cmd.extend(['-opw', password, '-upw', password]) + + return self._measure_command(cmd, out_path) + + def _match_checksums(self, ref_path, ref_checksum, res_path, res_checksum): + return self._match_resources(ref_path, res_path) + + def _create_diff(self, ref_path, res_path): + self._diff_resources(ref_path, res_path) + +register_backend('perfsplash', PerfSplash) + diff --git a/regtest/backends/perftext.py b/regtest/backends/perftext.py new file mode 100644 index 0000000..4cb697a --- /dev/null +++ b/regtest/backends/perftext.py @@ -0,0 +1,44 @@ +# perftext.py +# +# Copyright (C) 2015 Adam Reichold <[email protected]> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +from backends import Backend, register_backend +import os + +class PerfText(Backend): + + def __init__(self, name): + Backend.__init__(self, name) + self._pdftotext = os.path.join(self._utilsdir, 'pdftotext'); + + def create_refs(self, doc_path, refs_path, password = None): + out_path = os.path.join(refs_path, 'perftext') + cmd = [self._pdftotext, doc_path, '/dev/null'] + if password is not None: + cmd.extend(['-opw', password, '-upw', password]) + + return self._measure_command(cmd, out_path) + + def _match_checksums(self, ref_path, ref_checksum, res_path, res_checksum): + return self._match_resources(ref_path, res_path) + + def _create_diff(self, ref_path, res_path): + self._diff_resources(ref_path, res_path) + + +register_backend('perftext', PerfText) + diff --git a/regtest/main.py b/regtest/main.py index 0febb79..0e1c1c4 100644 --- a/regtest/main.py +++ b/regtest/main.py @@ -71,6 +71,15 @@ def main(args): parser.add_argument('-t', '--threads', action = 'store', dest = 'threads', type = int, default = n_cpus, help = 'Number of worker threads (Default: %d)' % n_cpus) + parser.add_argument('--iterations', + action = 'store', dest = 'iterations', type = int, default = 10, + help = 'Number of iterations to run commands during measurements.') + parser.add_argument('--warm-up-iterations', + action = 'store', dest = 'warm_up_iterations', type = int, default = 5, + help = 'Number of iterations to run commands before measurements.') + parser.add_argument('--allowed-deviation', + action = 'store', dest = 'allowed_deviation', type = int, default = 5, + help = 'The allowed relative deviation in percent to consider two measurements similar.') ns, args = parser.parse_known_args(args) if not args: -- 2.6.4 From 401de5de0195f2111a753d29ee104f23fd73cb04 Mon Sep 17 00:00:00 2001 From: Adam Reichold <[email protected]> Date: Wed, 30 Dec 2015 15:50:38 +0100 Subject: [PATCH 3/5] Extend the regtest performance backends to collect some cummulative statistics. --- regtest/TestReferences.py | 12 ++++++++++- regtest/TestRun.py | 10 +++++++++ regtest/Utils.py | 2 +- regtest/backends/__init__.py | 49 ++++++++++++++++++++++++++++++++++++++---- regtest/backends/perfsplash.py | 10 +++++++-- regtest/backends/perftext.py | 8 ++++++- 6 files changed, 82 insertions(+), 9 deletions(-) diff --git a/regtest/TestReferences.py b/regtest/TestReferences.py index c2877c5..992d8e1 100644 --- a/regtest/TestReferences.py +++ b/regtest/TestReferences.py @@ -37,6 +37,7 @@ class TestReferences: self.printer = get_printer() self._total_tests = 1 self._n_tests = 0 + self._stats = dict([ (backend.get_name(), None) for backend in self._get_backends() ]) self._queue = Queue() self._lock = RLock() @@ -85,9 +86,14 @@ class TestReferences: if backend.create_refs(doc_path, refs_path, password): backend.create_checksums(refs_path, self.config.checksums_only) + + backend_name = backend.get_name() + with self._lock: self._n_tests += 1 - self.printer.printout_ln("[%d/%d] %s (%s): done" % (self._n_tests, self._total_tests, doc_path, backend.get_name())) + self._stats[backend_name] = backend.collect_stats(self._stats[backend_name], refs_path) + + self.printer.printout_ln("[%d/%d] %s (%s): done" % (self._n_tests, self._total_tests, doc_path, backend_name)) def _worker_thread(self): while True: @@ -119,3 +125,7 @@ class TestReferences: start_daemon(self._worker_thread) interruptible_join(self._queue.join) + + for backend in self._get_backends(): + backend_name = backend.get_name() + backend.print_stats(self.printer, self._stats[backend_name]) diff --git a/regtest/TestRun.py b/regtest/TestRun.py index 904010a..86c475a 100644 --- a/regtest/TestRun.py +++ b/regtest/TestRun.py @@ -51,6 +51,8 @@ class TestRun: self._stderr = {} self._skipped = [] self._new = [] + self._ref_stats = dict([ (backend.get_name(), None) for backend in self._get_backends() ]) + self._stats = dict([ (backend.get_name(), None) for backend in self._get_backends() ]) self._queue = Queue() self._lock = RLock() @@ -84,6 +86,10 @@ class TestRun: test_has_md5 = backend.create_refs(doc_path, test_path, password) test_passed = False if ref_has_md5 and test_has_md5: + backend_name = backend.get_name() + self._ref_stats[backend_name] = backend.collect_stats(self._ref_stats[backend_name], refs_path) + self._stats[backend_name] = backend.collect_stats(self._stats[backend_name], test_path) + test_passed = backend.compare_checksums(refs_path, test_path, not self.config.keep_results, self.config.create_diffs, self.config.update_refs) elif self.config.update_refs: backend.update_results(refs_path, test_path) @@ -282,3 +288,7 @@ class TestRun: self.printer.printout_ln("\n".join([" %s" % new for new in self._new])) self.printer.printout_ln("Use create-refs command to add reference results for them") self.printer.printout_ln() + + for backend in self._get_backends(): + backend_name = backend.get_name() + backend.print_stats(self.printer, self._stats[backend_name], self._ref_stats[backend_name]) diff --git a/regtest/Utils.py b/regtest/Utils.py index ccfcfcc..1fe332d 100644 --- a/regtest/Utils.py +++ b/regtest/Utils.py @@ -90,4 +90,4 @@ def stddev(values, mean): return math.sqrt(sum((value - mean) ** 2 for value in values) / float(len(values) - 1)) def reldev(value, ref_value): - return abs(value - ref_value) / ref_value + return (value - ref_value) / ref_value diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py index f2ea4cc..f3c865b 100644 --- a/regtest/backends/__init__.py +++ b/regtest/backends/__init__.py @@ -301,6 +301,12 @@ class Backend: def create_refs(self, doc_path, refs_path, password = None): raise NotImplementedError + def collect_stats(self, stats, ref_path): + return None + + def print_stats(self, printer, stats, ref_stats = None): + pass + def _measure_command(self, cmd, out_path): for _ in range(self._warm_up_iterations): process = subprocess.Popen(cmd) @@ -346,10 +352,10 @@ class Backend: run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) - if run_time_reldev > self._allowed_deviation: + if abs(run_time_reldev) > self._allowed_deviation: return False - if memory_usage_reldev > self._allowed_deviation: + if abs(memory_usage_reldev) > self._allowed_deviation: return False return True @@ -367,11 +373,46 @@ class Backend: out_file.write('Run time:\n') out_file.write('\tReference: %.2f â %.3f s\n' % (ref['run_time_mean'], ref['run_time_stddev'])) out_file.write('\tResult: %.2f â %.3f s\n' % (res['run_time_mean'], res['run_time_stddev'])) - out_file.write('\tDeviation: %.1f %%\n\n' % (run_time_reldev * 100.0)) + out_file.write('\tDeviation: %+.1f %%\n\n' % (run_time_reldev * 100.0)) out_file.write('Memory usage:\n') out_file.write('\tReference: %.1f â %.2f kB\n' % (ref['memory_usage_mean'] / 1024.0, ref['memory_usage_stddev'] / 1024.0)) out_file.write('\tResult: %.1f â %.2f kB\n' % (res['memory_usage_mean'] / 1024.0, res['memory_usage_stddev'] / 1024.0)) - out_file.write('\tDeviation: %.1f %%\n\n' % (memory_usage_reldev * 100.0)) + out_file.write('\tDeviation: %+.1f %%\n\n' % (memory_usage_reldev * 100.0)) + + def _collect_resources(self, stats, out_path): + if stats is None: + stats = { 'run_time_sum': 0.0 + , 'memory_usage_sum': 0.0 } + + with open(out_path, 'r') as out_file: + out = json.load(out_file) + + stats['run_time_sum'] += out['run_time_mean'] + stats['memory_usage_sum'] += out['memory_usage_mean'] + + return stats + + def _print_resources(self, printer, stats, ref_stats): + if stats is None: + return + + if ref_stats is None: + printer.printout_ln('%s: Cummulative run time: %.2f min' % (self.get_name(), stats['run_time_sum'] / 60.0)) + printer.printout_ln('%s: Cummulative memory usage: %.1f MB' % (self.get_name(), stats['memory_usage_sum'] / 1024.0 / 1024.0)) + else: + run_time_reldev = reldev(stats['run_time_sum'], ref_stats['run_time_sum']) + memory_usage_reldev = reldev(stats['memory_usage_sum'], ref_stats['memory_usage_sum']) + + printer.printout_ln('%s: Cummulative run time: %.2f min (%+.1f %% from %.2f min)' % + ( self.get_name() + , stats['run_time_sum'] / 60.0 + , run_time_reldev + , ref_stats['run_time_sum'] / 60.0 )) + printer.printout_ln('%s: Cummulative memory usage: %.1f MB (%+.1f %% from %.1f MB)' % + ( self.get_name() + , stats['memory_usage_sum'] / 1024.0 / 1024.0 + , memory_usage_reldev + , ref_stats['memory_usage_sum'] / 1024.0 / 1024.0 )) _backends = {} diff --git a/regtest/backends/perfsplash.py b/regtest/backends/perfsplash.py index c958a70..7ab93af 100644 --- a/regtest/backends/perfsplash.py +++ b/regtest/backends/perfsplash.py @@ -17,7 +17,6 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA from backends import Backend, register_backend -import subprocess import os class PerfSplash(Backend): @@ -27,7 +26,7 @@ class PerfSplash(Backend): self._pdftoppm = os.path.join(self._utilsdir, 'pdftoppm'); def create_refs(self, doc_path, refs_path, password = None): - out_path = os.path.join(refs_path, 'perfsplash') + out_path = os.path.join(refs_path, self.get_name()) cmd = [self._pdftoppm, '-cropbox', '-r', '72', '-png', doc_path, '/dev/null'] if password is not None: cmd.extend(['-opw', password, '-upw', password]) @@ -40,5 +39,12 @@ class PerfSplash(Backend): def _create_diff(self, ref_path, res_path): self._diff_resources(ref_path, res_path) + def collect_stats(self, stats, ref_path): + out_path = os.path.join(ref_path, self.get_name()) + return self._collect_resources(stats, out_path) + + def print_stats(self, printer, stats, ref_stats = None): + self._print_resources(printer, stats, ref_stats) + register_backend('perfsplash', PerfSplash) diff --git a/regtest/backends/perftext.py b/regtest/backends/perftext.py index 4cb697a..838a17a 100644 --- a/regtest/backends/perftext.py +++ b/regtest/backends/perftext.py @@ -26,7 +26,7 @@ class PerfText(Backend): self._pdftotext = os.path.join(self._utilsdir, 'pdftotext'); def create_refs(self, doc_path, refs_path, password = None): - out_path = os.path.join(refs_path, 'perftext') + out_path = os.path.join(refs_path, self.get_name()) cmd = [self._pdftotext, doc_path, '/dev/null'] if password is not None: cmd.extend(['-opw', password, '-upw', password]) @@ -39,6 +39,12 @@ class PerfText(Backend): def _create_diff(self, ref_path, res_path): self._diff_resources(ref_path, res_path) + def collect_stats(self, stats, ref_path): + out_path = os.path.join(ref_path, self.get_name()) + return self._collect_resources(stats, out_path) + + def print_stats(self, printer, stats, ref_stats = None): + self._print_resources(printer, stats, ref_stats) register_backend('perftext', PerfText) -- 2.6.4 From 4a229ad60e45fb5beaf238e988e2889a74e053f1 Mon Sep 17 00:00:00 2001 From: Adam Reichold <[email protected]> Date: Thu, 31 Dec 2015 10:44:54 +0100 Subject: [PATCH 4/5] Better handle reading measurement documents if problematic files did not produce a result. --- regtest/backends/__init__.py | 55 +++++++++++++++++++++++++----------------- regtest/backends/perfsplash.py | 2 +- regtest/backends/perftext.py | 2 +- 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py index f3c865b..840174c 100644 --- a/regtest/backends/__init__.py +++ b/regtest/backends/__init__.py @@ -308,25 +308,27 @@ class Backend: pass def _measure_command(self, cmd, out_path): - for _ in range(self._warm_up_iterations): - process = subprocess.Popen(cmd) - - if not os.WIFEXITED(process.wait()): - return False run_times = [] memory_usages = [] - for _ in range(self._iterations): - process = subprocess.Popen(cmd) + with open(os.devnull, 'w') as devnull: + for _ in range(self._warm_up_iterations): + process = subprocess.Popen(cmd, stdout = devnull, stderr = devnull) + + if not os.WIFEXITED(process.wait()): + return False - _, status, resources = os.wait4(process.pid, 0) + for _ in range(self._iterations): + process = subprocess.Popen(cmd, stdout = devnull, stderr = devnull) - if not os.WIFEXITED(status): - return False + _, status, resources = os.wait4(process.pid, 0) - run_times.append(resources.ru_utime + resources.ru_stime) - memory_usages.append(resources.ru_maxrss) + if not os.WIFEXITED(status): + return False + + run_times.append(resources.ru_utime + resources.ru_stime) + memory_usages.append(resources.ru_maxrss) run_time_mean = mean(run_times) run_time_stddev = stddev(run_times, run_time_mean) @@ -344,10 +346,13 @@ class Backend: return True def _match_resources(self, ref_path, res_path): - with open(ref_path, 'r') as ref_file: - ref = json.load(ref_file) - with open(res_path, 'r') as res_file: - res = json.load(res_file) + try: + with open(ref_path, 'r') as ref_file: + ref = json.load(ref_file) + with open(res_path, 'r') as res_file: + res = json.load(res_file) + except IOError: + return False run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) @@ -361,10 +366,13 @@ class Backend: return True def _diff_resources(self, ref_path, res_path): - with open(ref_path, 'r') as ref_file: - ref = json.load(ref_file) - with open(res_path, 'r') as res_file: - res = json.load(res_file) + try: + with open(ref_path, 'r') as ref_file: + ref = json.load(ref_file) + with open(res_path, 'r') as res_file: + res = json.load(res_file) + except IOError: + return run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) @@ -384,8 +392,11 @@ class Backend: stats = { 'run_time_sum': 0.0 , 'memory_usage_sum': 0.0 } - with open(out_path, 'r') as out_file: - out = json.load(out_file) + try: + with open(out_path, 'r') as out_file: + out = json.load(out_file) + except IOError: + return stats stats['run_time_sum'] += out['run_time_mean'] stats['memory_usage_sum'] += out['memory_usage_mean'] diff --git a/regtest/backends/perfsplash.py b/regtest/backends/perfsplash.py index 7ab93af..d84b8cd 100644 --- a/regtest/backends/perfsplash.py +++ b/regtest/backends/perfsplash.py @@ -27,7 +27,7 @@ class PerfSplash(Backend): def create_refs(self, doc_path, refs_path, password = None): out_path = os.path.join(refs_path, self.get_name()) - cmd = [self._pdftoppm, '-cropbox', '-r', '72', '-png', doc_path, '/dev/null'] + cmd = [self._pdftoppm, '-cropbox', '-r', '72', '-png', doc_path, os.devnull] if password is not None: cmd.extend(['-opw', password, '-upw', password]) diff --git a/regtest/backends/perftext.py b/regtest/backends/perftext.py index 838a17a..e668e8f 100644 --- a/regtest/backends/perftext.py +++ b/regtest/backends/perftext.py @@ -27,7 +27,7 @@ class PerfText(Backend): def create_refs(self, doc_path, refs_path, password = None): out_path = os.path.join(refs_path, self.get_name()) - cmd = [self._pdftotext, doc_path, '/dev/null'] + cmd = [self._pdftotext, doc_path, os.devnull] if password is not None: cmd.extend(['-opw', password, '-upw', password]) -- 2.6.4 From 12b2374319273476ab847c3fa21bbcbf0dc8b5ab Mon Sep 17 00:00:00 2001 From: Adam Reichold <[email protected]> Date: Thu, 31 Dec 2015 11:04:58 +0100 Subject: [PATCH 5/5] Reduce perf backend boiler plate code by factoring shared code into a common base class. --- regtest/TestReferences.py | 2 +- regtest/TestRun.py | 2 +- regtest/backends/__init__.py | 131 +--------------------------------- regtest/backends/perf.py | 156 +++++++++++++++++++++++++++++++++++++++++ regtest/backends/perfsplash.py | 18 +---- regtest/backends/perftext.py | 20 ++---- 6 files changed, 167 insertions(+), 162 deletions(-) create mode 100644 regtest/backends/perf.py diff --git a/regtest/TestReferences.py b/regtest/TestReferences.py index 992d8e1..e268fc8 100644 --- a/regtest/TestReferences.py +++ b/regtest/TestReferences.py @@ -128,4 +128,4 @@ class TestReferences: for backend in self._get_backends(): backend_name = backend.get_name() - backend.print_stats(self.printer, self._stats[backend_name]) + backend.print_stats(self._stats[backend_name]) diff --git a/regtest/TestRun.py b/regtest/TestRun.py index 86c475a..80d5189 100644 --- a/regtest/TestRun.py +++ b/regtest/TestRun.py @@ -291,4 +291,4 @@ class TestRun: for backend in self._get_backends(): backend_name = backend.get_name() - backend.print_stats(self.printer, self._stats[backend_name], self._ref_stats[backend_name]) + backend.print_stats(self._stats[backend_name], self._ref_stats[backend_name]) diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py index 840174c..21f3ff6 100644 --- a/regtest/backends/__init__.py +++ b/regtest/backends/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # backends # # Copyright (C) 2011 Carlos Garcia Campos <[email protected]> @@ -19,13 +18,10 @@ import hashlib import os -import subprocess import select import shutil import errno -import json from Config import Config -from Utils import mean, stddev, reldev from Printer import get_printer __all__ = [ 'register_backend', @@ -42,11 +38,7 @@ class Backend: def __init__(self, name, diff_ext = None): self._name = name self._diff_ext = diff_ext - config = Config() - self._utilsdir = config.utils_dir - self._iterations = config.iterations - self._warm_up_iterations = config.warm_up_iterations - self._allowed_deviation = config.allowed_deviation / 100.0 + self._utilsdir = Config().utils_dir self.printer = get_printer() @@ -304,128 +296,9 @@ class Backend: def collect_stats(self, stats, ref_path): return None - def print_stats(self, printer, stats, ref_stats = None): + def print_stats(self, stats, ref_stats = None): pass - def _measure_command(self, cmd, out_path): - - run_times = [] - memory_usages = [] - - with open(os.devnull, 'w') as devnull: - for _ in range(self._warm_up_iterations): - process = subprocess.Popen(cmd, stdout = devnull, stderr = devnull) - - if not os.WIFEXITED(process.wait()): - return False - - for _ in range(self._iterations): - process = subprocess.Popen(cmd, stdout = devnull, stderr = devnull) - - _, status, resources = os.wait4(process.pid, 0) - - if not os.WIFEXITED(status): - return False - - run_times.append(resources.ru_utime + resources.ru_stime) - memory_usages.append(resources.ru_maxrss) - - run_time_mean = mean(run_times) - run_time_stddev = stddev(run_times, run_time_mean) - - memory_usage_mean = mean(memory_usages) - memory_usage_stddev = stddev(memory_usages, memory_usage_mean) - - with open(out_path, 'w') as out_file: - json.dump({ 'run_time_mean': run_time_mean - , 'run_time_stddev': run_time_stddev - , 'memory_usage_mean': memory_usage_mean - , 'memory_usage_stddev': memory_usage_stddev } - , out_file) - - return True - - def _match_resources(self, ref_path, res_path): - try: - with open(ref_path, 'r') as ref_file: - ref = json.load(ref_file) - with open(res_path, 'r') as res_file: - res = json.load(res_file) - except IOError: - return False - - run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) - memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) - - if abs(run_time_reldev) > self._allowed_deviation: - return False - - if abs(memory_usage_reldev) > self._allowed_deviation: - return False - - return True - - def _diff_resources(self, ref_path, res_path): - try: - with open(ref_path, 'r') as ref_file: - ref = json.load(ref_file) - with open(res_path, 'r') as res_file: - res = json.load(res_file) - except IOError: - return - - run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) - memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) - - with open(res_path + '.txt', 'w') as out_file: - out_file.write('Run time:\n') - out_file.write('\tReference: %.2f â %.3f s\n' % (ref['run_time_mean'], ref['run_time_stddev'])) - out_file.write('\tResult: %.2f â %.3f s\n' % (res['run_time_mean'], res['run_time_stddev'])) - out_file.write('\tDeviation: %+.1f %%\n\n' % (run_time_reldev * 100.0)) - out_file.write('Memory usage:\n') - out_file.write('\tReference: %.1f â %.2f kB\n' % (ref['memory_usage_mean'] / 1024.0, ref['memory_usage_stddev'] / 1024.0)) - out_file.write('\tResult: %.1f â %.2f kB\n' % (res['memory_usage_mean'] / 1024.0, res['memory_usage_stddev'] / 1024.0)) - out_file.write('\tDeviation: %+.1f %%\n\n' % (memory_usage_reldev * 100.0)) - - def _collect_resources(self, stats, out_path): - if stats is None: - stats = { 'run_time_sum': 0.0 - , 'memory_usage_sum': 0.0 } - - try: - with open(out_path, 'r') as out_file: - out = json.load(out_file) - except IOError: - return stats - - stats['run_time_sum'] += out['run_time_mean'] - stats['memory_usage_sum'] += out['memory_usage_mean'] - - return stats - - def _print_resources(self, printer, stats, ref_stats): - if stats is None: - return - - if ref_stats is None: - printer.printout_ln('%s: Cummulative run time: %.2f min' % (self.get_name(), stats['run_time_sum'] / 60.0)) - printer.printout_ln('%s: Cummulative memory usage: %.1f MB' % (self.get_name(), stats['memory_usage_sum'] / 1024.0 / 1024.0)) - else: - run_time_reldev = reldev(stats['run_time_sum'], ref_stats['run_time_sum']) - memory_usage_reldev = reldev(stats['memory_usage_sum'], ref_stats['memory_usage_sum']) - - printer.printout_ln('%s: Cummulative run time: %.2f min (%+.1f %% from %.2f min)' % - ( self.get_name() - , stats['run_time_sum'] / 60.0 - , run_time_reldev - , ref_stats['run_time_sum'] / 60.0 )) - printer.printout_ln('%s: Cummulative memory usage: %.1f MB (%+.1f %% from %.1f MB)' % - ( self.get_name() - , stats['memory_usage_sum'] / 1024.0 / 1024.0 - , memory_usage_reldev - , ref_stats['memory_usage_sum'] / 1024.0 / 1024.0 )) - - _backends = {} def register_backend(backend_name, backend_class): _backends[backend_name] = backend_class diff --git a/regtest/backends/perf.py b/regtest/backends/perf.py new file mode 100644 index 0000000..653886e --- /dev/null +++ b/regtest/backends/perf.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +# perf.py +# +# Copyright (C) 2015 Adam Reichold <[email protected]> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +from backends import Backend +import subprocess +import os +import json +from Config import Config +from Utils import mean, stddev, reldev + +class Perf(Backend): + + def __init__(self, name): + Backend.__init__(self, name, '.txt') + config = Config() + self._iterations = config.iterations + self._warm_up_iterations = config.warm_up_iterations + self._allowed_deviation = config.allowed_deviation / 100.0 + + def _match_checksums(self, ref_path, ref_checksum, res_path, res_checksum): + try: + with open(ref_path, 'r') as ref_file: + ref = json.load(ref_file) + with open(res_path, 'r') as res_file: + res = json.load(res_file) + except IOError: + return False + + run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) + memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) + + if abs(run_time_reldev) > self._allowed_deviation: + return False + + if abs(memory_usage_reldev) > self._allowed_deviation: + return False + + return True + + def _create_diff(self, ref_path, res_path): + try: + with open(ref_path, 'r') as ref_file: + ref = json.load(ref_file) + with open(res_path, 'r') as res_file: + res = json.load(res_file) + except IOError: + return + + run_time_reldev = reldev(res['run_time_mean'], ref['run_time_mean']) + memory_usage_reldev = reldev(res['memory_usage_mean'], ref['memory_usage_mean']) + + with open(res_path + '.txt', 'w') as out_file: + out_file.write('Run time:\n') + out_file.write('\tReference: %.2f â %.3f s\n' % (ref['run_time_mean'], ref['run_time_stddev'])) + out_file.write('\tResult: %.2f â %.3f s\n' % (res['run_time_mean'], res['run_time_stddev'])) + out_file.write('\tDeviation: %+.1f %%\n\n' % (run_time_reldev * 100.0)) + out_file.write('Memory usage:\n') + out_file.write('\tReference: %.1f â %.2f kB\n' % (ref['memory_usage_mean'] / 1024.0, ref['memory_usage_stddev'] / 1024.0)) + out_file.write('\tResult: %.1f â %.2f kB\n' % (res['memory_usage_mean'] / 1024.0, res['memory_usage_stddev'] / 1024.0)) + out_file.write('\tDeviation: %+.1f %%\n\n' % (memory_usage_reldev * 100.0)) + + def collect_stats(self, stats, ref_path): + if stats is None: + stats = { 'run_time_sum': 0.0 + , 'memory_usage_sum': 0.0 } + + out_path = os.path.join(ref_path, self.get_name()) + try: + with open(out_path, 'r') as out_file: + out = json.load(out_file) + except IOError: + return stats + + stats['run_time_sum'] += out['run_time_mean'] + stats['memory_usage_sum'] += out['memory_usage_mean'] + + return stats + + def print_stats(self, stats, ref_stats = None): + if stats is None: + return + + if ref_stats is None: + self.printer.printout_ln('%s: Cummulative run time: %.2f min' % (self.get_name(), stats['run_time_sum'] / 60.0)) + self.printer.printout_ln('%s: Cummulative memory usage: %.1f MB' % (self.get_name(), stats['memory_usage_sum'] / 1024.0 / 1024.0)) + else: + run_time_reldev = reldev(stats['run_time_sum'], ref_stats['run_time_sum']) + memory_usage_reldev = reldev(stats['memory_usage_sum'], ref_stats['memory_usage_sum']) + + self.printer.printout_ln('%s: Cummulative run time: %.2f min (%+.1f %% from %.2f min)' % + ( self.get_name() + , stats['run_time_sum'] / 60.0 + , run_time_reldev + , ref_stats['run_time_sum'] / 60.0 )) + self.printer.printout_ln('%s: Cummulative memory usage: %.1f MB (%+.1f %% from %.1f MB)' % + ( self.get_name() + , stats['memory_usage_sum'] / 1024.0 / 1024.0 + , memory_usage_reldev + , ref_stats['memory_usage_sum'] / 1024.0 / 1024.0 )) + + def _measure_command(self, cmd, out_path): + + run_times = [] + memory_usages = [] + + with open(os.devnull, 'w') as devnull: + for _ in range(self._warm_up_iterations): + process = subprocess.Popen(cmd, stdout = devnull, stderr = devnull) + + if not os.WIFEXITED(process.wait()): + return False + + for _ in range(self._iterations): + process = subprocess.Popen(cmd, stdout = devnull, stderr = devnull) + + _, status, resources = os.wait4(process.pid, 0) + + if not os.WIFEXITED(status): + return False + + run_times.append(resources.ru_utime + resources.ru_stime) + memory_usages.append(resources.ru_maxrss) + + run_time_mean = mean(run_times) + run_time_stddev = stddev(run_times, run_time_mean) + + memory_usage_mean = mean(memory_usages) + memory_usage_stddev = stddev(memory_usages, memory_usage_mean) + + with open(out_path, 'w') as out_file: + json.dump({ 'run_time_mean': run_time_mean + , 'run_time_stddev': run_time_stddev + , 'memory_usage_mean': memory_usage_mean + , 'memory_usage_stddev': memory_usage_stddev } + , out_file) + + return True + + def create_refs(self, doc_path, refs_path, password = None): + raise NotImplementedError diff --git a/regtest/backends/perfsplash.py b/regtest/backends/perfsplash.py index d84b8cd..c634120 100644 --- a/regtest/backends/perfsplash.py +++ b/regtest/backends/perfsplash.py @@ -17,12 +17,13 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA from backends import Backend, register_backend +from backends.perf import Perf import os -class PerfSplash(Backend): +class PerfSplash(Perf): def __init__(self, name): - Backend.__init__(self, name, '.txt') + Perf.__init__(self, name) self._pdftoppm = os.path.join(self._utilsdir, 'pdftoppm'); def create_refs(self, doc_path, refs_path, password = None): @@ -33,18 +34,5 @@ class PerfSplash(Backend): return self._measure_command(cmd, out_path) - def _match_checksums(self, ref_path, ref_checksum, res_path, res_checksum): - return self._match_resources(ref_path, res_path) - - def _create_diff(self, ref_path, res_path): - self._diff_resources(ref_path, res_path) - - def collect_stats(self, stats, ref_path): - out_path = os.path.join(ref_path, self.get_name()) - return self._collect_resources(stats, out_path) - - def print_stats(self, printer, stats, ref_stats = None): - self._print_resources(printer, stats, ref_stats) - register_backend('perfsplash', PerfSplash) diff --git a/regtest/backends/perftext.py b/regtest/backends/perftext.py index e668e8f..8b9855c 100644 --- a/regtest/backends/perftext.py +++ b/regtest/backends/perftext.py @@ -16,13 +16,14 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -from backends import Backend, register_backend +from backends import register_backend +from backends.perf import Perf import os -class PerfText(Backend): +class PerfText(Perf): def __init__(self, name): - Backend.__init__(self, name) + Perf.__init__(self, name) self._pdftotext = os.path.join(self._utilsdir, 'pdftotext'); def create_refs(self, doc_path, refs_path, password = None): @@ -33,18 +34,5 @@ class PerfText(Backend): return self._measure_command(cmd, out_path) - def _match_checksums(self, ref_path, ref_checksum, res_path, res_checksum): - return self._match_resources(ref_path, res_path) - - def _create_diff(self, ref_path, res_path): - self._diff_resources(ref_path, res_path) - - def collect_stats(self, stats, ref_path): - out_path = os.path.join(ref_path, self.get_name()) - return self._collect_resources(stats, out_path) - - def print_stats(self, printer, stats, ref_stats = None): - self._print_resources(printer, stats, ref_stats) - register_backend('perftext', PerfText) -- 2.6.4
signature.asc
Description: OpenPGP digital signature
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
