Author: arkurth
Date: Tue Aug 23 16:04:09 2011
New Revision: 1160762

URL: http://svn.apache.org/viewvc?rev=1160762&view=rev
Log:
VCL-503
Added code to util.pm::run_ssh_command to allow a timeout value to be specified 
in order to prevent hung SSH processes from hanging for a very long time or 
indefinitely. The default value for now is 0 meaning that the code never times 
out the command.

Added utils.pm kill_child_processes subroutine. If the run_ssh_command timeout 
value is reached, the SSH process is still running so the kill_child_processes 
subroutine is called to kill all child processes belonging to the reservation 
process.

Added timeout_seconds argument set to 15 seconds to the run_ssh_command call in 
OS.pm::is_ssh_responding. Also added 20 second timeout argument to the call to 
execute qwinsta.exe in Windows.pm::user_logged_in. These calls cause an SSH 
process to hang most often.

VCL-465
Updated OS.pm::get_public_interface_name to ignore interfaces named 'lo' and 
'sit[0-9]'. Changed logic to not ignore interfaces without a bound IP address. 
This fixes problems where static addresses are being assigned under Linux. The 
correct public interface does not come up with an autogenerated IP address as 
it does under Windows and the code was ignoring this interface because of this.

Other
Updated OS.pm::create_text_file to correct the line endings depending on the OS 
type.

Modified:
    incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
    incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm
    incubator/vcl/trunk/managementnode/lib/VCL/utils.pm

Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
URL: 
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm Tue Aug 23 16:04:09 
2011
@@ -471,6 +471,7 @@ sub is_ssh_responding {
                        command => "echo testing ssh on $computer_node_name",
                        max_attempts => $max_attempts,
                        output_level => 0,
+                       timeout_seconds => 15,
                });
                
                # The exit status will be 0 if the command succeeded
@@ -957,7 +958,11 @@ sub get_public_interface_name {
                my $description = 
$network_configuration->{$check_interface_name}{description} || '';
                
                # Check if the interface should be ignored based on the name or 
description
-               if ($check_interface_name =~ 
/(loopback|vmnet|afs|tunnel|6to4|isatap|teredo)/i) {
+               if ($check_interface_name =~ /^(lo|sit\d)$/i) {
+                       notify($ERRORS{'DEBUG'}, 0, "interface 
'$check_interface_name' ignored because its name is '$1'");
+                       next INTERFACE;
+               }
+               elsif ($check_interface_name =~ 
/(loopback|vmnet|afs|tunnel|6to4|isatap|teredo)/i) {
                        notify($ERRORS{'DEBUG'}, 0, "interface 
'$check_interface_name' ignored because its name contains '$1'");
                        next INTERFACE;
                }
@@ -966,15 +971,6 @@ sub get_public_interface_name {
                        next INTERFACE;
                }
                
-               # Get the IP addresses assigned to the interface
-               my @check_ip_addresses  = keys 
%{$network_configuration->{$check_interface_name}{ip_address}};
-               
-               # Ignore interface if it doesn't have an IP address
-               if (!@check_ip_addresses) {
-                       notify($ERRORS{'DEBUG'}, 0, "interface 
'$check_interface_name' ignored because it is not assigned an IP address");
-                       next INTERFACE;
-               }
-               
                # If $public_interface_name hasn't been set yet, set it and 
continue checking the next interface
                if (!$public_interface_name) {
                        $public_interface_name = $check_interface_name;
@@ -1501,13 +1497,17 @@ sub get_public_default_gateway {
 
 =head2 create_text_file
 
- Parameters  : $file_path, $file_contents
+ Parameters  : $file_path, $file_contents, $no_correct_line_endings (optional)
  Returns     : boolean
  Description : Creates a text file on the computer. The $file_contents
                string argument is converted to ASCII hex values. These values
                are echo'd on the computer which avoids problems with special
                characters and escaping. If the file already exists it is
                overwritten.
+               The line endings within the $file_contents string are corrected
+               by default to Windows-style (\r\n) or Linux-style (\n) depending
+               on the OS. An optional boolean 3rd argument can be specified to
+               prevent the string from being altered.
 
 =cut
 
@@ -1524,8 +1524,16 @@ sub create_text_file {
                return;
        }
        
-       my $management_node_keys = $self->data->get_management_node_keys();
-       my $computer_node_name   = $self->data->get_computer_node_name();
+       my $computer_node_name = $self->data->get_computer_node_name();
+       my $image_os_type = $self->data->get_image_os_type();
+       
+       # Remove Windows-style carriage returns if the image OS isn't Windows
+       if ($image_os_type =~ /windows/) {
+               $file_contents_string =~ s/\r*\n/\r\n/g;
+       }
+       else {
+               $file_contents_string =~ s/\r//g;
+       }
        
        # Convert the string to a string containing the hex value of each 
character
        # This is done to avoid problems with special characters in the file 
contents
@@ -1541,8 +1549,8 @@ sub create_text_file {
        
        # Create a command to echo the hex string to the file
        # Use -e to enable interpretation of backslash escapes
-       my $command .= "echo -e \"$hex_string\" > $file_path";
-       my ($exit_status, $output) = run_ssh_command($computer_node_name, 
$management_node_keys, $command, '', '', 0);
+       my $command .= "echo -n -e \"$hex_string\" > $file_path";
+       my ($exit_status, $output) = $self->execute($command);
        if (!defined($output)) {
                notify($ERRORS{'WARNING'}, 0, "failed to execute ssh command to 
create file on $computer_node_name: $file_path");
                return;

Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm
URL: 
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm Tue Aug 23 
16:04:09 2011
@@ -8038,7 +8038,8 @@ sub user_logged_in {
        notify($ERRORS{'DEBUG'}, 0, "checking if $username is logged in to 
$computer_node_name");
 
        # Run qwinsta.exe to display terminal session information
-       my ($exit_status, $output) = run_ssh_command($computer_node_name, 
$management_node_keys, "$system32_path/qwinsta.exe");
+       # Set command timeout argument because this command occasionally hangs
+       my ($exit_status, $output) = run_ssh_command($computer_node_name, 
$management_node_keys, "$system32_path/qwinsta.exe", '', '', 1, 20);
        if ($exit_status > 0) {
                notify($ERRORS{'WARNING'}, 0, "failed to run qwinsta.exe on 
$computer_node_name, exit status: $exit_status, output:\n@{$output}");
                return;

Modified: incubator/vcl/trunk/managementnode/lib/VCL/utils.pm
URL: 
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/utils.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/utils.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/utils.pm Tue Aug 23 16:04:09 2011
@@ -164,6 +164,7 @@ our @EXPORT = qw(
   is_valid_ip_address
   isconnected
   isfilelocked
+  kill_child_processes
   kill_reservation_process
   known_hosts
   lockfile
@@ -5393,16 +5394,17 @@ EOF
 
 =head2 run_ssh_command
 
- Parameters  : $node, $identity_path, $command, $user, $port
+ Parameters  : $node, $identity_path, $command, $user, $port, $output_level, 
$timeout_seconds
                                        -or-
                                        Hash reference with the following keys:
-                                       node - node name (required)
-                                       command - command to be executed 
remotely (required)
-                                       identity_paths - string containing 
paths to identity key files separated by commas (optional)
-                                       user - user to run remote command as 
(optional, default is 'root')
-                                       port - SSH port number (optional, 
default is 22)
-                                       output_level - allows the amount of 
output to be controlled: 0, 1, or 2 (optional)
-                                       max_attempts - maximum number of SSH 
attempts to make
+                                               node - node name (required)
+                                               command - command to be 
executed remotely (required)
+                                               identity_paths - string 
containing paths to identity key files separated by commas (optional)
+                                               user - user to run remote 
command as (optional, default is 'root')
+                                               port - SSH port number 
(optional, default is 22)
+                                               output_level - allows the 
amount of output to be controlled: 0, 1, or 2 (optional)
+                                               max_attempts - maximum number 
of SSH attempts to make
+                                               timeout_seconds - maximum 
number seconds SSH process can run before being terminated
  Returns     : If successful: array:
                   $array[0] = the exit status of the command
                                           $array[1] = reference to array 
containing lines of output
@@ -5412,7 +5414,7 @@ EOF
 =cut
 
 sub run_ssh_command {
-       my ($node, $identity_paths, $command, $user, $port, $output_level) = @_;
+       my ($node, $identity_paths, $command, $user, $port, $output_level, 
$timeout_seconds) = @_;
 
        my $max_attempts = 3;
        
@@ -5426,7 +5428,7 @@ sub run_ssh_command {
                $port = $arguments->{port} || '22';
                $output_level = $arguments->{output_level};
                $max_attempts = $arguments->{max_attempts} || 3;
-               
+               $timeout_seconds = $arguments->{timeout_seconds};
        }
        
        # Determine the output level if it was specified
@@ -5456,7 +5458,8 @@ sub run_ssh_command {
 
        # Set default values if not passed as an argument
        $user = "root" if (!$user);
-       $port = 22     if (!$port);
+       $port = 22 if (!$port);
+       $timeout_seconds = 0 if (!$timeout_seconds);
        $identity_paths = $ENV{management_node_info}{keys} if (!defined 
$identity_paths || length($identity_paths) == 0);
        
        # TODO: Add ssh path to config file and set global variable
@@ -5511,9 +5514,9 @@ sub run_ssh_command {
        my $ssh_command = "$ssh_path $identity_paths -o 
StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o 
ConnectionAttempts=1 -o ConnectTimeout=3 -l $user -p $port -x $node '$command' 
2>&1";
        
        # Execute the command
-       my $ssh_output;
-       my $ssh_output_formatted;
-       my $attempts        = 0;
+       my $ssh_output = '';
+       my $ssh_output_formatted = '';
+       my $attempts = 0;
        my $exit_status = 255;
 
        # Make multiple attempts if failure occurs
@@ -5538,28 +5541,53 @@ sub run_ssh_command {
                        notify($ERRORS{'DEBUG'}, 0, "attempt 
$attempts/$max_attempts: executing SSH command on $node:\n$ssh_command") if 
$output_level;
                }
                
-               # Execute the command
-               $ssh_output = `$ssh_command`;
-
-               # Bits 0-7 of $? are set to the signal the child process 
received that caused it to die
-               my $signal_number = $? & 127;
-               
-               # Bit 8 of $? will be true if a core dump occurred
-               my $core_dump = $? & 128;
-               
-               # Bits 9-16 of $? contain the child process exit status
-               $exit_status = $? >> 8;
+               # Enclose SSH command in an eval block and use alarm to 
eventually timeout the SSH command if it hangs
+               my $start_time = time;
+               eval {
+                       # Override the die and alarm handlers
+                       local $SIG{__DIE__} = sub{};
+                       local $SIG{ALRM} = sub { die "alarm\n" };
+                       
+                       if ($timeout_seconds) {
+                               notify($ERRORS{'DEBUG'}, 0, "waiting up to 
$timeout_seconds seconds for SSH process to finish");
+                               alarm $timeout_seconds;
+                       }
+                       
+                       # Execute the command
+                       $ssh_output = `$ssh_command`;
+                       
+                       # Save the exit status
+                       $exit_status = $? >> 8;
+                       
+                       # Ignore the returned value of $? if it is -1
+                       # This likely means a Perl bug was encountered
+                       # Assume command was successful
+                       if ($? == -1) {
+                               notify($ERRORS{'DEBUG'}, 0, "exit status 
changed from $exit_status to 0, Perl bug likely encountered") if $output_level;
+                               $exit_status = 0;
+                       }
+                       
+                       if ($timeout_seconds) {
+                               # Cancel the timer
+                               alarm 0;
+                       }
+               };
+       
+               my $duration = (time - $start_time);
                
-               # Ignore the returned value of $? if it is -1
-               # This likely means a Perl bug was encountered
-               # Assume command was successful
-               if ($? == -1) {
-                       notify($ERRORS{'DEBUG'}, 0, "exit status changed from 
$exit_status to 0, Perl bug likely encountered") if $output_level;
-                       $exit_status = 0;
+               # Check if the timeout was reached
+               if ($EVAL_ERROR && $EVAL_ERROR eq "alarm\n") {
+                       notify($ERRORS{'CRITICAL'}, 0, "attempt 
$attempts/$max_attempts: SSH command timed out after $duration seconds, timeout 
threshold: $timeout_seconds seconds, command: $node:\n$ssh_command");
+                       
+                       # Kill the child processes of this reservation process
+                       kill_child_processes($PID);
+                       next;
+               }
+               elsif ($EVAL_ERROR) {
+                       notify($ERRORS{'CRITICAL'}, 0, "attempt 
$attempts/$max_attempts: eval error was generated attempting to run SSH 
command: $node:\n$ssh_command, error: $EVAL_ERROR");
+                       next;
                }
                
-               #notify($ERRORS{'DEBUG'}, 0, "\$?: $?, signal: $signal_number, 
core dump: $core_dump, exit status: $exit_status");
-
                # Strip out the key warning message from the output
                $ssh_output =~ s/\@{10,}.*man-in-the-middle attacks\.//igs;
                
@@ -10443,6 +10471,66 @@ EOF
 
 #/////////////////////////////////////////////////////////////////////////////
 
+=head2 kill_child_processes
+
+ Parameters  : $parent_pid
+ Returns     : boolean
+ Description : Kills all child processes belonging to the parent PID specified
+               as the argument.
+
+=cut
+
+sub kill_child_processes {
+       my @parent_pids = @_;
+       my $parent_pid = $parent_pids[-1];
+       my $parent_process_string = "parent PID: " . join(">", @parent_pids);
+       
+       # Make sure the parent vcld daemon process didn't call this subroutine 
for safety
+       # Prevents all reservations being processed from being killed
+       if ($ENV{vcld}) {
+               notify($ERRORS{'CRITICAL'}, 0, "kill_child_processes subroutine 
called from the parent vcld process, not killing any processes for safety");
+               return;
+       }
+       
+       notify($ERRORS{'DEBUG'}, 0, "$parent_process_string: attempting to kill 
child processes");
+       
+       my $command = "pgrep -flP $parent_pid | sort -r";
+       my ($exit_status, $output) = run_command($command, 1);
+       
+       for my $line (@$output) {
+               # Make sure the line only contains a PID
+               my ($child_pid, $child_command) = $line =~ /^(\d+)\s+(.*)/;
+               if (!defined($child_pid) || !defined($child_command)) {
+                       notify($ERRORS{'WARNING'}, 0, "$parent_process_string: 
pgrep output line does not contain a PID and command:\nline: 
'$child_pid'\ncommand: '$command'");
+                       next;
+               }
+               elsif ($child_command =~ /$command/) {
+                       # Ignore the pgrep command called to determine child 
processes
+                       next;
+               }
+               
+               # Create a string containing the beginning and end of the child 
process command to make log output more readable
+               my $child_command_summary = join('...', ($child_command =~ 
/^(.{10,20}).*(.{20,30})$/));
+               
+               notify($ERRORS{'DEBUG'}, 0, "$parent_process_string, found 
child process: $child_pid '$child_command_summary'");
+               
+               # Recursively kill the child processes of the child process
+               kill_child_processes(@parent_pids, $child_pid);
+               
+               my $kill_count = kill 9, $child_pid;
+               if ($kill_count) {
+                       notify($ERRORS{'DEBUG'}, 0, "$parent_process_string, 
killed child process: $child_pid (kill count: $kill_count)");
+               }
+               else {
+                       notify($ERRORS{'WARNING'}, 0, "$parent_process_string, 
kill command returned 0 attempting to kill child process: $child_pid");
+               }
+       }
+       
+       return 1;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
 1;
 __END__
 


Reply via email to