Hi!

I wonder whether any project has gotten a multi-server setup to work based on the current code and documentation (http://boinc.berkeley.edu/trac/wiki/MultiHost), and how.

What I found is that the current stuff silently assumes that there is a (NFS) shared project directory mounted on all project servers on the very same physical path, which at least for us isn't the case. We do have a project directory path that is common on all servers, but set up with symlinks. The physical path varies (for good reasons), and the project directory (including subdirectories like the pid directories) isn't shared (because some remote servers are far away).

In addition the hardcoded ssh command used in the start/stop/status script is completely independent of the ssh configuration for the server status page (SSP), which is at least confusing.

The attached series of patches is meant to fix that:

- the project path to use on all servers can now be configured as <project_dir>. For backwards compatibility the defaults in server_status.php and start are chosen in a way that the old behavior is unchanged ("../.." in SSP, os.getcwd() in start).

- the start scrip now uses the <ssh_exec> path for ssh if configured. The 
default ssh in the start script is now '/usr/bin/ssh' (as already in the SSP)

- the pid of a daemon is now looked up in the pid directory on the _remote_ host via ssh, thus not requiring a shared project directory. Actually determining the pid and runing ps to find out whether the daemon is running is done by a script (pshelper) executed on the remote host, requiring only one command to be executed remotely via ssh. Still one ssh connection is required for every daemon on a remote host, which could be a significant slowdown. I'd rather handle all daemons running on one host in a single connection, but I couldn't get this finished now. If my current solution is to be used, pshelper must be put into the bin/ directory of the project on the remote server (make_project should be updated to do this). The 'ps' command used on remote hosts must be edited there.

- if a daemons is disabled, daemons_status() returns immediately without looking up the PID and checking whether the daemon is actually running (which wouldn't change the return value anyway).

- the ssh command that is executed on the remote host by the start script is only printed when the start script is ran in verbose mode. In particular this avoids unnecessary output and thus mails when ran by cron (start --cron).

Best,
Bernd

>From 281b9ef3c3d29a0cb6c2a26d725557ffb1af17a3 Mon Sep 17 00:00:00 2001
From: Bernd Machenschalk <[email protected]>
Date: Mon, 3 Dec 2012 10:07:54 +0000
Subject: [PATCH 1/5] only print remote command in verbose mode

(in particular not when ran with --cron)
---
 sched/start | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sched/start b/sched/start
index fb01cc1..98ffc93 100755
--- a/sched/start
+++ b/sched/start
@@ -745,7 +745,7 @@ if is_main_host:
         remote_cmd = [ 'ssh', host, 'cd', cwd, ' && ' ] + sys.argv
         if verbose:
             remote_cmd += [ '-v' ]
-        print 'running ', ' '.join(remote_cmd)
+            print 'running ', ' '.join(remote_cmd)
         os.spawnvp(wait_mode, remote_cmd[0], remote_cmd)
 
 os.unlink(start_lockfile)
-- 
1.7.12.2

>From dca59f85a1aa07b85b046bd40bda344b76700577 Mon Sep 17 00:00:00 2001
From: Bernd Machenschalk <[email protected]>
Date: Mon, 3 Dec 2012 10:09:54 +0000
Subject: [PATCH 2/5] sync configuration of remote server management with PHP
 (server status page)

- configure ssh executable to use with <ssh_exec>

- configure a project directory common to all hosts with <project_dir>
---
 sched/start | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/sched/start b/sched/start
index 98ffc93..962b385 100755
--- a/sched/start
+++ b/sched/start
@@ -611,7 +611,6 @@ def command_show_config():
 local_hostname = socket.gethostname()
 local_hostname = local_hostname.split('.')[0]
 # print 'local hostname: ', local_hostname
-cwd = os.getcwd()
 program_name = os.path.basename(sys.argv[0])
 if program_name == 'start':
     command = command_enable_start
@@ -709,6 +708,18 @@ if not command:
 config = configxml.ConfigFile(config_filename).read()
 run_state = configxml.RunStateFile(run_state_filename).read(failopen_ok = True)
 
+if 'ssh_exec' in config.config.__dict__:
+    ssh = config.config.ssh_exec
+else:
+    ssh = '/usr/bin/ssh'
+
+if 'project_dir' in config.config.__dict__:
+    cwd = config.config.project_dir + '/bin'
+    cmd = './' + program_name
+else:
+    cwd = os.getcwd()
+    cmd = sys.argv[0]
+
 os.chdir(boinc_project_path.project_path())
 bin_dir = get_dir('bin')
 cgi_bin_dir = get_dir('cgi_bin')
@@ -742,7 +753,7 @@ if is_main_host:
     for host in other_hosts:
         if host == local_hostname:
             continue
-        remote_cmd = [ 'ssh', host, 'cd', cwd, ' && ' ] + sys.argv
+        remote_cmd = [ ssh, host, 'cd', cwd, ' && ', cmd ] + sys.argv[1:]
         if verbose:
             remote_cmd += [ '-v' ]
             print 'running ', ' '.join(remote_cmd)
-- 
1.7.12.2

>From c59637123a92d24c579ed2c760e6b025d7bbe6a8 Mon Sep 17 00:00:00 2001
From: Bernd Machenschalk <[email protected]>
Date: Mon, 3 Dec 2012 10:12:32 +0000
Subject: [PATCH 3/5] added pshelper

- script to find out whether a daemon is running on a remote server

- ran via ssh by the server status page

- returns the process line returned by 'ps ww pid' or 'false'
---
 sched/pshelper | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100755 sched/pshelper

diff --git a/sched/pshelper b/sched/pshelper
new file mode 100755
index 0000000..042c8e1
--- /dev/null
+++ b/sched/pshelper
@@ -0,0 +1,3 @@
+#!/bin/sh
+pid=`cat "$1"` &&
+ps ww $pid | grep "^ *\($pid\) " || echo false
-- 
1.7.12.2

>From b3ef26e264b242d89e5ee7490afa4cc8761cee72 Mon Sep 17 00:00:00 2001
From: Bernd Machenschalk <[email protected]>
Date: Mon, 3 Dec 2012 10:26:07 +0000
Subject: [PATCH 4/5] server_status.php: fix monitoring of daemons running on
 remote hosts

- if a daemon is disabled, don't bother inversigating the pid

- doesn't require an NFS-shared project (pid) directory

- uses pshelper ran on remote machine via ssh

- still opens a new ssh connection for every daemon check
---
 html/user/server_status.php | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/html/user/server_status.php b/html/user/server_status.php
index 4b2c997..a435495 100644
--- a/html/user/server_status.php
+++ b/html/user/server_status.php
@@ -61,24 +61,30 @@ $xml = get_int("xml", true);
 // daemon status outputs: 1 (running) 0 (not running) or -1 (disabled)
 //
 function daemon_status($host, $pidname, $progname, $disabled) {
-    global $ssh_exe, $ps_exe, $project_host;
-    $path = "../../pid_$host/$pidname";
+    global $ssh_exe, $ps_exe, $project_host, $project_dir;
+    if ($disabled == 1) return -1;
+    $path = "$project_dir/pid_$host/$pidname";
+    if ($host != $project_host) {
+        $command = "$ssh_exe $host $project_dir/bin/pshelper $path";
+        $foo = exec($command);
+        $running = 1;
+        if ($foo) {
+            if (strstr($foo, "false")) $running = 0;
+        } else $running = 0;
+        return $running;
+    }
     $running = 0;
     if (is_file($path)) {
         $pid = file_get_contents($path);
         if ($pid) {
             $pid = trim($pid);
             $command = "$ps_exe ww $pid";
-            if ($host != $project_host) {
-                $command = "$ssh_exe $host " . $command;
-            }
             $foo = exec($command);
             if ($foo) {
                 if (strstr($foo, (string)$pid)) $running = 1;
             }
         }
     }
-    if ($disabled == 1) $running = -1;
     return $running;
 }
 
@@ -207,6 +213,10 @@ $uldl_host = parse_element($config_vars,"<uldl_host>");
 if ($uldl_host == "") {
     $uldl_host = $project_host;
 }
+$project_dir = parse_element($config_vars,"<project_dir>");
+if ($project_dir == "") {
+    $project_dir = "../..";
+}
 $ssh_exe = parse_element($config_vars,"<ssh_exe>");
 if ($ssh_exe == "") {
     $ssh_exe = "/usr/bin/ssh";
-- 
1.7.12.2

_______________________________________________
boinc_dev mailing list
[email protected]
http://lists.ssl.berkeley.edu/mailman/listinfo/boinc_dev
To unsubscribe, visit the above URL and
(near bottom of page) enter your email address.

Reply via email to