# HG changeset patch # User Thomas Pelle Jakobsen <[EMAIL PROTECTED]> # Date 1226015502 -3600 # Node ID 75e5113f27777649c2001b1221c9717d8d375423 # Parent 0985564470de2bb2c5247effd30dc40f74048f17 Added benchmark suite class.
diff -r 0985564470de -r 75e5113f2777 apps/benchmark/suite.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/apps/benchmark/suite.py Fri Nov 07 00:51:42 2008 +0100 @@ -0,0 +1,279 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2007, 2008 VIFF Development Team. +# +# This file is part of VIFF, the Virtual Ideal Functionality Framework. +# +# VIFF is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License (LGPL) as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# VIFF is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General +# Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with VIFF. If not, see <http://www.gnu.org/licenses/>. +""" + Description: + Main goal is to make it easy to write and run VIFF benchmarks while + at the same time not limiting the kind of benchmarks that could be + made. Also, the solution should scale as the number of benchmarks + increase. Finally, the benchmark data should be collected in a central + database rather than as a bunch of text files. + + A benchmark is created by subclassing either Benchmark or + VIFFBenchmark. By subclassing VIFFBenchmark, you need only specify the + runtime that is to be used, the protocol that is to be benchmarked, and + the code that does the measure. See the examples. + + + Features: + When setting up a suite, one can specify which revision to benchmark. + Uses ssh and thus benefits from key agents like ssh-agent. + Non-viff benchmarks can be done by subclassing Benchmark while VIFF + benchmarks can easily be benchmarked by subclassing VIFFBenchmark. + Data is automatically stored in central database. + Benchmark results are stored for each individual run. This enables all + kind of statistics to be applied at a later time, since no info is + lost. E.g. storing only mean values would prevent calculating + conficence intervals and other statistical stuff. + Dynamic approach: All parameter names are available except a few + pre-defined, e.g. work_dir, player_id, ... (TODO: List them here.) + + + Issues: + Looks like the first run takes longer time than the rest. Is that ok? + Results for all runs for a benchmark is kept in memory at once. + Problem? + Currently master only works on *nix since we use select. + Benchmark slaves works only on *nix now due to small issues with pipes, + etc. + It is assumed that all benchmark hosts have access to the same file + system and that the dir supplied to the suite is on this shared + file system. + Can we run multiple benchmark slaves on one host? + Currently, default parameters are always used, e.g. use_ssl = no. + Support for string, float and boolean results. Currently only integers + (up to Mysql.BIGNUM) are supported. + We assume that python setup.py install --home=$HOME/opt overwrites old + installation nicely. + Log err and out nicely to various files and/or use python logging. + Write unit tests! + All random seeds should come in as parameters. + Currently, the runs are done in sequence. Maybe also support parallel + runs; this is ok if benchmark doesn't measure bandwith or time, and + it is (presumably) faster. + The table TimedResults cannot be used yet. + The benchmark suite must be run in the viff/benchmark dir due to scp + copy of the benchmark classes, ok? + Better documentation. + Security. Currently, password to the mysql db is supplied on the + command line both in example.py and when executing benchmark.py on + each host. This is not secure as everyone having access to the + benchmark computers will be able to read off the password using + e.g. ps -eLf | grep benchmark.py. + Can't get the popen objects to finish when ssh finishes, so I've done a + hack by letting each benchmark write COMPLETED WITH RETURNCODE x as + the last thing. Not pretty, but it works (at least now..) + Non-VIFF benchmarks are always run on the same host that starts the + suite. It probably would be nice to be able to specify which host + to use for the benchmark. + VIFFBenchmark reports back to master on the fly. Would also be nice + if non-VIFF benchmarks did that. + + Whishlist: + Make it even easier for user by letting him specify that this protocol + should be timed, or this protocol should be measured w.r.t memory + usage, etc. This should be done in a way that doesn't prohibit user + from writing his own benchmarks measuring things in his own way. This + will also help make the typical benchmarks of timing, memory etc. more + comparable. + + Support for other databases than mysql. + + Code would maybe benefit from using a ORM (Object-Relational Mapping) + instead of using raw SQL statements. + +""" + +import time +import copy +import os +import select +import sys +import re +from viff.config import load_config +from twisted.internet import reactor +from database import Database +from util import exec_on_host + +def parse(args): + """ Creates a dict form a list of strings a la k1=val1. """ + res = {} + for a in args[1:]: + # Note: This allows attribute names to contain '=' + s = a.rsplit("=",1) + # TODO: Info about attribute types should be included. Here, we + # simply treat an attribute as int if possible and otherwise + # as a string. + try: + res[s[0]] = int(s[1]) + except ValueError: + res[s[0]] = s[1] + return res + + +class Suite: + + # TODO: Change revision to hg_revision. + + """ + hosts: A list of (hostname, port). This is the list of all available hosts + that can be used for the benchmark and the port numbers that should be + used on each host. As hostname you can either supply a real hostname, + e.g. camel17.daimi.au.dk, or an integer which then refers to the + host_id in the benchmark database. This is useful if one host like + camel17.daimi.au.dk is used with several configurations and thus should + be treated as multiple "hosts". If multiple such host configurations + exists in the database and only the string hostname is given, an + exception is thrown. + + Note that this should be the complete list of hosts that can be used + for the benchmark. Some of the benchmarks will only use a subset of the + hosts listed. + + Note also that the hosts are not nescessarily given protocol ids in the + same order as this list. + + user: The username that should be used to log into the benchmark hosts. + TODO: Defaults to the username of the user executing this script. + + work_dir: A directory on a shared filesystem that all the benchmark slaves + have access to. Here, viff will be checked out and temporary files will + possibly be written. TODO: Defaults to same directory as on the master + host (e.g. where this script is executed). + + database: The benchmark database. Note that the credentials of this + database should be set up to provide write access to some of the + database tables. + + revision: The VIFF revision which should be benchmarked. Defaults to tip. + + hg_repository: The repository from where the revision is checked out. This + could be the main repository http://hg.viff.dk/viff, but you can also + use your own clone such as ssh://[EMAIL PROTECTED]/viff-benchmark. If + no hg_repository is supplied it is assumed that a hg repository clone + already exists in work_dir/viff and the needed revision is simply + checked out from this clone. TODO: Add support for pull. + + viff_dir: Where VIFF should be installed. When the appropriate VIFF + revision has been checked out, the suite executes + + python setup.py install --home=viff_dir + + on one of the available hosts. Make sure that your PATH and PYTHONPATH + are set up correspondingly, e.g. as described in "Installing from + Source" at http://viff.dk/doc/install.html. One example could be to use + + viff_dir = $HOME/opt + + and to include these in your .bashrc file: + + export PYTHONPATH=$PYTHONPATH:$HOME/opt/lib/python + export PATH=$PATH:$HOME/opt/bin + + """ + def __init__(self, database, hosts, user, work_dir, viff_dir, + revision=None, + hg_repository=None): + self.user = user + self.viff_dir = viff_dir + self.revision = revision + self.benchmarks = {} + self.host_name = {} + self.host_port = {} + self.database = database + self.hg_repository = hg_repository + self.suite_id = self.database.create_suite(revision) + self.work_dir = work_dir + for hostname, port in hosts: + if type(hostname) is str: + host_id = database.get_host_id(hostname) + self.host_name[host_id] = hostname + self.host_port[host_id] = port + else: + self.host_name[hostname] = database.get_host_name(hostname) + self.host_port[hostname] = port + + def setup(self): + print "Setting up Suite" + somehost = self.host_name.values()[0] + + # If user supplied a hg_repository, then check out viff from there. + # TODO: Take care using rm -rf in a script like this!!! + if self.hg_repository: + exec_on_host(self.user, somehost, + ["rm -rf %s/viff; cd %s; hg clone %s viff" % + (self.work_dir, self.work_dir, self.hg_repository)]) + + # If user supplied revision, check it out. Otherwise, check out the tip. + if self.revision: + rev = "--rev %s" % self.revision + else: + rev = "" + exec_on_host(self.user, somehost, + ["cd %s/viff; hg update --clean %s" % + (self.work_dir, rev)]) + + # Build VIFF. + exec_on_host(self.user, somehost, + ["cd %s/viff; python setup.py install --home=%s" % + (self.work_dir, self.viff_dir)]) + + def teardown(self): + print "Tearing down Suite" + # TODO: Remove local checkout but not the hg clone? + + def add_benchmark(self, benchmark): + """ Note that if the benchmark has already database parameters, e.g. + db_host, db_user, db_password, db_port, db_name, these are used to + report the result. If they are not set, the same database parameters + are used as those given when creating the Suite.""" + + # TODO: Hack -> Benchmarks name is derived from class name. + benchmark_name = str(benchmark.__class__).split('.')[-1] + benchmark_id = self.database.add_benchmark(self.suite_id, + benchmark_name) + + # Database host, port, name, user and passwd supplied to the benchmark + # overrides those supplied to the suite. + # + # This makes it possible for at benchmark to report back to another + # database or using other credentials than the administrator + # credentials that must be supplied in the database given when + # initializing the Suite. Normally,it should be enough to use a + # 'benchmark' user in the database that has only write access to the + # Result and TimedResult tables. + benchmark.attr['benchmark_id'] = benchmark_id + if not 'db_host' in benchmark.attr.keys(): + benchmark.attr['db_host'] = self.database.db_host + if not 'db_name' in benchmark.attr.keys(): + benchmark.attr['db_name'] = self.database.db_name + if not 'db_user' in benchmark.attr.keys(): + benchmark.attr['db_user'] = self.database.db_user + if not 'db_passwd' in benchmark.attr.keys(): + benchmark.attr['db_passwd'] = self.database.db_passwd + if not 'db_port' in benchmark.attr.keys(): + benchmark.attr['db_port'] = self.database.db_port + self.benchmarks[benchmark_id] = benchmark + for atr, val in benchmark.attr.items(): + self.database.add_benchmark_attribute(benchmark_id, atr, val) + + def run(self): + self.setup() + for benchmark in self.benchmarks.values(): + benchmark.run_on_master(self) + self.teardown() _______________________________________________ viff-patches mailing list [email protected] http://lists.viff.dk/listinfo.cgi/viff-patches-viff.dk
