Author: arkurth
Date: Tue Aug 23 16:04:09 2011
New Revision: 1160762
URL: http://svn.apache.org/viewvc?rev=1160762&view=rev
Log:
VCL-503
Added code to util.pm::run_ssh_command to allow a timeout value to be specified
in order to prevent hung SSH processes from hanging for a very long time or
indefinitely. The default value for now is 0 meaning that the code never times
out the command.
Added utils.pm kill_child_processes subroutine. If the run_ssh_command timeout
value is reached, the SSH process is still running so the kill_child_processes
subroutine is called to kill all child processes belonging to the reservation
process.
Added timeout_seconds argument set to 15 seconds to the run_ssh_command call in
OS.pm::is_ssh_responding. Also added 20 second timeout argument to the call to
execute qwinsta.exe in Windows.pm::user_logged_in. These calls cause an SSH
process to hang most often.
VCL-465
Updated OS.pm::get_public_interface_name to ignore interfaces named 'lo' and
'sit[0-9]'. Changed logic to not ignore interfaces without a bound IP address.
This fixes problems where static addresses are being assigned under Linux. The
correct public interface does not come up with an autogenerated IP address as
it does under Windows and the code was ignoring this interface because of this.
Other
Updated OS.pm::create_text_file to correct the line endings depending on the OS
type.
Modified:
incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm
incubator/vcl/trunk/managementnode/lib/VCL/utils.pm
Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
URL:
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm Tue Aug 23 16:04:09
2011
@@ -471,6 +471,7 @@ sub is_ssh_responding {
command => "echo testing ssh on $computer_node_name",
max_attempts => $max_attempts,
output_level => 0,
+ timeout_seconds => 15,
});
# The exit status will be 0 if the command succeeded
@@ -957,7 +958,11 @@ sub get_public_interface_name {
my $description =
$network_configuration->{$check_interface_name}{description} || '';
# Check if the interface should be ignored based on the name or
description
- if ($check_interface_name =~
/(loopback|vmnet|afs|tunnel|6to4|isatap|teredo)/i) {
+ if ($check_interface_name =~ /^(lo|sit\d)$/i) {
+ notify($ERRORS{'DEBUG'}, 0, "interface
'$check_interface_name' ignored because its name is '$1'");
+ next INTERFACE;
+ }
+ elsif ($check_interface_name =~
/(loopback|vmnet|afs|tunnel|6to4|isatap|teredo)/i) {
notify($ERRORS{'DEBUG'}, 0, "interface
'$check_interface_name' ignored because its name contains '$1'");
next INTERFACE;
}
@@ -966,15 +971,6 @@ sub get_public_interface_name {
next INTERFACE;
}
- # Get the IP addresses assigned to the interface
- my @check_ip_addresses = keys
%{$network_configuration->{$check_interface_name}{ip_address}};
-
- # Ignore interface if it doesn't have an IP address
- if (!@check_ip_addresses) {
- notify($ERRORS{'DEBUG'}, 0, "interface
'$check_interface_name' ignored because it is not assigned an IP address");
- next INTERFACE;
- }
-
# If $public_interface_name hasn't been set yet, set it and
continue checking the next interface
if (!$public_interface_name) {
$public_interface_name = $check_interface_name;
@@ -1501,13 +1497,17 @@ sub get_public_default_gateway {
=head2 create_text_file
- Parameters : $file_path, $file_contents
+ Parameters : $file_path, $file_contents, $no_correct_line_endings (optional)
Returns : boolean
Description : Creates a text file on the computer. The $file_contents
string argument is converted to ASCII hex values. These values
are echo'd on the computer which avoids problems with special
characters and escaping. If the file already exists it is
overwritten.
+ The line endings within the $file_contents string are corrected
+ by default to Windows-style (\r\n) or Linux-style (\n) depending
+ on the OS. An optional boolean 3rd argument can be specified to
+ prevent the string from being altered.
=cut
@@ -1524,8 +1524,16 @@ sub create_text_file {
return;
}
- my $management_node_keys = $self->data->get_management_node_keys();
- my $computer_node_name = $self->data->get_computer_node_name();
+ my $computer_node_name = $self->data->get_computer_node_name();
+ my $image_os_type = $self->data->get_image_os_type();
+
+ # Remove Windows-style carriage returns if the image OS isn't Windows
+ if ($image_os_type =~ /windows/) {
+ $file_contents_string =~ s/\r*\n/\r\n/g;
+ }
+ else {
+ $file_contents_string =~ s/\r//g;
+ }
# Convert the string to a string containing the hex value of each
character
# This is done to avoid problems with special characters in the file
contents
@@ -1541,8 +1549,8 @@ sub create_text_file {
# Create a command to echo the hex string to the file
# Use -e to enable interpretation of backslash escapes
- my $command .= "echo -e \"$hex_string\" > $file_path";
- my ($exit_status, $output) = run_ssh_command($computer_node_name,
$management_node_keys, $command, '', '', 0);
+ my $command .= "echo -n -e \"$hex_string\" > $file_path";
+ my ($exit_status, $output) = $self->execute($command);
if (!defined($output)) {
notify($ERRORS{'WARNING'}, 0, "failed to execute ssh command to
create file on $computer_node_name: $file_path");
return;
Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm
URL:
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm Tue Aug 23
16:04:09 2011
@@ -8038,7 +8038,8 @@ sub user_logged_in {
notify($ERRORS{'DEBUG'}, 0, "checking if $username is logged in to
$computer_node_name");
# Run qwinsta.exe to display terminal session information
- my ($exit_status, $output) = run_ssh_command($computer_node_name,
$management_node_keys, "$system32_path/qwinsta.exe");
+ # Set command timeout argument because this command occasionally hangs
+ my ($exit_status, $output) = run_ssh_command($computer_node_name,
$management_node_keys, "$system32_path/qwinsta.exe", '', '', 1, 20);
if ($exit_status > 0) {
notify($ERRORS{'WARNING'}, 0, "failed to run qwinsta.exe on
$computer_node_name, exit status: $exit_status, output:\n@{$output}");
return;
Modified: incubator/vcl/trunk/managementnode/lib/VCL/utils.pm
URL:
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/utils.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/utils.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/utils.pm Tue Aug 23 16:04:09 2011
@@ -164,6 +164,7 @@ our @EXPORT = qw(
is_valid_ip_address
isconnected
isfilelocked
+ kill_child_processes
kill_reservation_process
known_hosts
lockfile
@@ -5393,16 +5394,17 @@ EOF
=head2 run_ssh_command
- Parameters : $node, $identity_path, $command, $user, $port
+ Parameters : $node, $identity_path, $command, $user, $port, $output_level,
$timeout_seconds
-or-
Hash reference with the following keys:
- node - node name (required)
- command - command to be executed
remotely (required)
- identity_paths - string containing
paths to identity key files separated by commas (optional)
- user - user to run remote command as
(optional, default is 'root')
- port - SSH port number (optional,
default is 22)
- output_level - allows the amount of
output to be controlled: 0, 1, or 2 (optional)
- max_attempts - maximum number of SSH
attempts to make
+ node - node name (required)
+ command - command to be
executed remotely (required)
+ identity_paths - string
containing paths to identity key files separated by commas (optional)
+ user - user to run remote
command as (optional, default is 'root')
+ port - SSH port number
(optional, default is 22)
+ output_level - allows the
amount of output to be controlled: 0, 1, or 2 (optional)
+ max_attempts - maximum number
of SSH attempts to make
+ timeout_seconds - maximum
number seconds SSH process can run before being terminated
Returns : If successful: array:
$array[0] = the exit status of the command
$array[1] = reference to array
containing lines of output
@@ -5412,7 +5414,7 @@ EOF
=cut
sub run_ssh_command {
- my ($node, $identity_paths, $command, $user, $port, $output_level) = @_;
+ my ($node, $identity_paths, $command, $user, $port, $output_level,
$timeout_seconds) = @_;
my $max_attempts = 3;
@@ -5426,7 +5428,7 @@ sub run_ssh_command {
$port = $arguments->{port} || '22';
$output_level = $arguments->{output_level};
$max_attempts = $arguments->{max_attempts} || 3;
-
+ $timeout_seconds = $arguments->{timeout_seconds};
}
# Determine the output level if it was specified
@@ -5456,7 +5458,8 @@ sub run_ssh_command {
# Set default values if not passed as an argument
$user = "root" if (!$user);
- $port = 22 if (!$port);
+ $port = 22 if (!$port);
+ $timeout_seconds = 0 if (!$timeout_seconds);
$identity_paths = $ENV{management_node_info}{keys} if (!defined
$identity_paths || length($identity_paths) == 0);
# TODO: Add ssh path to config file and set global variable
@@ -5511,9 +5514,9 @@ sub run_ssh_command {
my $ssh_command = "$ssh_path $identity_paths -o
StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o
ConnectionAttempts=1 -o ConnectTimeout=3 -l $user -p $port -x $node '$command'
2>&1";
# Execute the command
- my $ssh_output;
- my $ssh_output_formatted;
- my $attempts = 0;
+ my $ssh_output = '';
+ my $ssh_output_formatted = '';
+ my $attempts = 0;
my $exit_status = 255;
# Make multiple attempts if failure occurs
@@ -5538,28 +5541,53 @@ sub run_ssh_command {
notify($ERRORS{'DEBUG'}, 0, "attempt
$attempts/$max_attempts: executing SSH command on $node:\n$ssh_command") if
$output_level;
}
- # Execute the command
- $ssh_output = `$ssh_command`;
-
- # Bits 0-7 of $? are set to the signal the child process
received that caused it to die
- my $signal_number = $? & 127;
-
- # Bit 8 of $? will be true if a core dump occurred
- my $core_dump = $? & 128;
-
- # Bits 9-16 of $? contain the child process exit status
- $exit_status = $? >> 8;
+ # Enclose SSH command in an eval block and use alarm to
eventually timeout the SSH command if it hangs
+ my $start_time = time;
+ eval {
+ # Override the die and alarm handlers
+ local $SIG{__DIE__} = sub{};
+ local $SIG{ALRM} = sub { die "alarm\n" };
+
+ if ($timeout_seconds) {
+ notify($ERRORS{'DEBUG'}, 0, "waiting up to
$timeout_seconds seconds for SSH process to finish");
+ alarm $timeout_seconds;
+ }
+
+ # Execute the command
+ $ssh_output = `$ssh_command`;
+
+ # Save the exit status
+ $exit_status = $? >> 8;
+
+ # Ignore the returned value of $? if it is -1
+ # This likely means a Perl bug was encountered
+ # Assume command was successful
+ if ($? == -1) {
+ notify($ERRORS{'DEBUG'}, 0, "exit status
changed from $exit_status to 0, Perl bug likely encountered") if $output_level;
+ $exit_status = 0;
+ }
+
+ if ($timeout_seconds) {
+ # Cancel the timer
+ alarm 0;
+ }
+ };
+
+ my $duration = (time - $start_time);
- # Ignore the returned value of $? if it is -1
- # This likely means a Perl bug was encountered
- # Assume command was successful
- if ($? == -1) {
- notify($ERRORS{'DEBUG'}, 0, "exit status changed from
$exit_status to 0, Perl bug likely encountered") if $output_level;
- $exit_status = 0;
+ # Check if the timeout was reached
+ if ($EVAL_ERROR && $EVAL_ERROR eq "alarm\n") {
+ notify($ERRORS{'CRITICAL'}, 0, "attempt
$attempts/$max_attempts: SSH command timed out after $duration seconds, timeout
threshold: $timeout_seconds seconds, command: $node:\n$ssh_command");
+
+ # Kill the child processes of this reservation process
+ kill_child_processes($PID);
+ next;
+ }
+ elsif ($EVAL_ERROR) {
+ notify($ERRORS{'CRITICAL'}, 0, "attempt
$attempts/$max_attempts: eval error was generated attempting to run SSH
command: $node:\n$ssh_command, error: $EVAL_ERROR");
+ next;
}
- #notify($ERRORS{'DEBUG'}, 0, "\$?: $?, signal: $signal_number,
core dump: $core_dump, exit status: $exit_status");
-
# Strip out the key warning message from the output
$ssh_output =~ s/\@{10,}.*man-in-the-middle attacks\.//igs;
@@ -10443,6 +10471,66 @@ EOF
#/////////////////////////////////////////////////////////////////////////////
+=head2 kill_child_processes
+
+ Parameters : $parent_pid
+ Returns : boolean
+ Description : Kills all child processes belonging to the parent PID specified
+ as the argument.
+
+=cut
+
+sub kill_child_processes {
+ my @parent_pids = @_;
+ my $parent_pid = $parent_pids[-1];
+ my $parent_process_string = "parent PID: " . join(">", @parent_pids);
+
+ # Make sure the parent vcld daemon process didn't call this subroutine
for safety
+ # Prevents all reservations being processed from being killed
+ if ($ENV{vcld}) {
+ notify($ERRORS{'CRITICAL'}, 0, "kill_child_processes subroutine
called from the parent vcld process, not killing any processes for safety");
+ return;
+ }
+
+ notify($ERRORS{'DEBUG'}, 0, "$parent_process_string: attempting to kill
child processes");
+
+ my $command = "pgrep -flP $parent_pid | sort -r";
+ my ($exit_status, $output) = run_command($command, 1);
+
+ for my $line (@$output) {
+ # Make sure the line only contains a PID
+ my ($child_pid, $child_command) = $line =~ /^(\d+)\s+(.*)/;
+ if (!defined($child_pid) || !defined($child_command)) {
+ notify($ERRORS{'WARNING'}, 0, "$parent_process_string:
pgrep output line does not contain a PID and command:\nline:
'$child_pid'\ncommand: '$command'");
+ next;
+ }
+ elsif ($child_command =~ /$command/) {
+ # Ignore the pgrep command called to determine child
processes
+ next;
+ }
+
+ # Create a string containing the beginning and end of the child
process command to make log output more readable
+ my $child_command_summary = join('...', ($child_command =~
/^(.{10,20}).*(.{20,30})$/));
+
+ notify($ERRORS{'DEBUG'}, 0, "$parent_process_string, found
child process: $child_pid '$child_command_summary'");
+
+ # Recursively kill the child processes of the child process
+ kill_child_processes(@parent_pids, $child_pid);
+
+ my $kill_count = kill 9, $child_pid;
+ if ($kill_count) {
+ notify($ERRORS{'DEBUG'}, 0, "$parent_process_string,
killed child process: $child_pid (kill count: $kill_count)");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "$parent_process_string,
kill command returned 0 attempting to kill child process: $child_pid");
+ }
+ }
+
+ return 1;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
1;
__END__