Author: fapeeler
Date: Wed May 25 17:31:48 2011
New Revision: 1127593

URL: http://svn.apache.org/viewvc?rev=1127593&view=rev
Log:
VCL-463

Server loads
ability to process a reboot
inuse module is calling os->reboot routine
each OS module will be responsible for handling it

If os module cannot reboot the machine then inuse module will return to inuse 
state 
without any action.



Modified:
    incubator/vcl/trunk/managementnode/bin/vcld
    incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
    incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
    incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm

Modified: incubator/vcl/trunk/managementnode/bin/vcld
URL: 
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/bin/vcld?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/bin/vcld (original)
+++ incubator/vcl/trunk/managementnode/bin/vcld Wed May 25 17:31:48 2011
@@ -202,7 +202,7 @@ sub main () {
                        $ENV{state}     = $request_state_name;
 
                        # Make sure the request state is valid
-                       if ($request_state_name !~ 
/inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse/)
 {
+                       if ($request_state_name !~ 
/inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse|rebootsoft|reboothard|reinstall/)
 {
                                notify($ERRORS{'WARNING'}, $LOGFILE, "assigned 
request in unsupported state: $request_state_name");
                                next REQUEST;
                        }
@@ -497,7 +497,7 @@ sub make_new_child {
        }
 
        # The imageinuse state is now handled by inuse.pm
-       if ($state =~ /^(imageinuse)$/) {
+       if ($state =~ /^(imageinuse|rebootsoft|reboothard|reinstall)$/) {
                notify($ERRORS{'DEBUG'}, $LOGFILE, "request will be processed 
by inuse.pm");
                $state_module = "VCL::inuse";
        }

Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
URL: 
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm Wed May 25 17:31:48 
2011
@@ -220,6 +220,86 @@ sub get_current_image_name {
 
 #/////////////////////////////////////////////////////////////////////////////
 
+=head2 wait_for_reboot
+
+ Parameters  : Maximum number of seconds to wait (optional), delay between 
attempts (optional)
+ Returns     : If computer is pingable before the maximum amount of time has 
elapsed: 1
+ Description : 
+
+=cut
+
+sub wait_for_reboot {
+        my $self = shift;
+        if (ref($self) !~ /VCL::Module/i) {
+                notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a 
function, it must be called as a class method");
+                return;
+        }
+       
+        my $computer_node_name   = $self->data->get_computer_node_name();
+
+       # Make multiple attempts to wait for the reboot to complete
+        my $wait_attempt_limit = shift;
+       
+       if (!defined($wait_attempt_limit)) {
+                $wait_attempt_limit = 2;
+        }
+       
+        WAIT_ATTEMPT:
+        for (my $wait_attempt = 1; $wait_attempt <= $wait_attempt_limit; 
$wait_attempt++) {
+                if ($wait_attempt > 1) {
+                        # Computer did not become fully responsive on previous 
wait attempt
+                        notify($ERRORS{'OK'}, 0, "$computer_node_name reboot 
failed to complete on previous attempt, attempting hard power reset");
+                        # Call provisioning module's power_reset() subroutine
+                        if ($self->provisioner->power_reset()) {
+                                notify($ERRORS{'OK'}, 0, "reboot attempt 
$wait_attempt/$wait_attempt_limit: initiated power reset on 
$computer_node_name");
+                        }
+                        else {
+                                notify($ERRORS{'WARNING'}, 0, "reboot failed, 
failed to initiate power reset on $computer_node_name");
+                                return 0;
+                        }
+                } ## end if ($wait_attempt > 1)
+
+                # Wait maximum of 3 minutes for the computer to become 
unresponsive
+                if (!$self->wait_for_no_ping(180, 3)) {
+                        # Computer never stopped responding to ping
+                        notify($ERRORS{'WARNING'}, 0, "$computer_node_name 
never became unresponsive to ping");
+                        next WAIT_ATTEMPT;
+                }
+
+                # Computer is unresponsive, reboot has begun
+                # Wait for 5 seconds before beginning to check if computer is 
back online
+                notify($ERRORS{'DEBUG'}, 0, "$computer_node_name reboot has 
begun, sleeping for 5 seconds");
+                sleep 5;
+
+                # Wait maximum of 6 minutes for the computer to come back up
+                if (!$self->wait_for_ping(360, 5)) {
+                        # Check if the computer was ever offline, it should 
have been or else reboot never happened
+                        notify($ERRORS{'WARNING'}, 0, "$computer_node_name 
never responded to ping");
+                        next WAIT_ATTEMPT;
+                }
+
+                notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is pingable, 
waiting for ssh to respond");
+
+                # Wait maximum of 3 minutes for ssh to respond
+                if (!$self->wait_for_ssh(180, 5)) {
+                        notify($ERRORS{'WARNING'}, 0, "ssh never responded on 
$computer_node_name");
+                        next WAIT_ATTEMPT;
+                }
+
+                notify($ERRORS{'DEBUG'}, 0, "$computer_node_name responded to 
ssh");
+
+                return 1;
+        } ## end for (my $wait_attempt = 1; $wait_attempt <=...
+
+        # If loop completed, maximum number of reboot attempts was reached
+        notify($ERRORS{'WARNING'}, 0, "reboot failed on $computer_node_name, 
made $wait_attempt_limit attempts");
+        return 0;
+
+
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
 =head2 wait_for_ping
 
  Parameters  : Maximum number of seconds to wait (optional), delay between 
attempts (optional)

Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
URL: 
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm Wed May 25 
17:31:48 2011
@@ -2787,6 +2787,111 @@ sub get_public_ip_address {
 
 #/////////////////////////////////////////////////////////////////////////////
 
+
+=head2 reboot
+
+ Parameters  : $wait_for_reboot
+ Returns     : 
+ Description : 
+
+=cut
+
+sub reboot {
+        my $self = shift;
+        if (ref($self) !~ /linux/i) {
+                notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a 
function, it must be called as a class method");
+                return;
+        }
+
+        my $management_node_keys = $self->data->get_management_node_keys();
+        my $computer_node_name   = $self->data->get_computer_node_name();
+
+        # Check if an argument was supplied
+        my $wait_for_reboot = shift;
+        if (!defined($wait_for_reboot) || $wait_for_reboot !~ /0/) {
+                notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and 
waiting for ssh to become active");
+                $wait_for_reboot = 1;
+        }
+        else {
+                notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and 
NOT waiting");
+                $wait_for_reboot = 0;
+        }
+
+        my $reboot_start_time = time();
+        notify($ERRORS{'DEBUG'}, 0, "reboot will be attempted on 
$computer_node_name");
+
+        # Check if computer responds to ssh before preparing for reboot
+        if ($self->wait_for_ssh(0)) {
+
+                # Check if shutdown exists on the computer
+                my $reboot_command;
+                if ( $self->file_exists("/sbin/shutdown")) {
+                        $reboot_command = "/sbin/shutdown -r now";
+                }
+                else {
+                        notify($ERRORS{'WARNING'}, 0, "reboot not attempted, 
/sbin/shutdown did not exists on OS");
+                        return 0;
+                }
+
+                my ($reboot_exit_status, $reboot_output) = 
run_ssh_command($computer_node_name, $management_node_keys, $reboot_command);
+                if (!defined($reboot_output)) {
+                        notify($ERRORS{'WARNING'}, 0, "failed to execute ssh 
command to reboot $computer_node_name");
+                        return;
+                }
+
+                if ($reboot_exit_status == 0) {
+                        notify($ERRORS{'OK'}, 0, "executed reboot command on 
$computer_node_name");
+                }
+                else {
+                        notify($ERRORS{'WARNING'}, 0, "failed to reboot 
$computer_node_name, attempting power reset, output:\n" . join("\n", 
@$reboot_output));
+
+                        # Call provisioning module's power_reset() subroutine
+                        if ($self->provisioner->power_reset()) {
+                                notify($ERRORS{'OK'}, 0, "initiated power 
reset on $computer_node_name");
+                        }
+                        else {
+                                notify($ERRORS{'WARNING'}, 0, "reboot failed, 
failed to initiate power reset on $computer_node_name");
+                                return;
+                        }
+                }
+        }
+        else {
+                # Computer did not respond to ssh
+                notify($ERRORS{'WARNING'}, 0, "$computer_node_name did not 
respond to ssh, graceful reboot cannot be performed, attempting hard reset");
+
+                # Call provisioning module's power_reset() subroutine
+                if ($self->provisioner->power_reset()) {
+                        notify($ERRORS{'OK'}, 0, "initiated power reset on 
$computer_node_name");
+                }
+                else {
+                        notify($ERRORS{'WARNING'}, 0, "reboot failed, failed 
to initiate power reset on $computer_node_name");
+                        return 0;
+                }
+        } ## end else [ if ($self->wait_for_ssh(0))
+       
+       my $wait_attempt_limit = 2;
+        # Check if wait for reboot is set
+        if (!$wait_for_reboot) {
+                return 1;
+        }
+       else {
+               if($self->wait_for_reboot($wait_attempt_limit)){
+                       # Reboot was successful, calculate how long reboot took
+                       my $reboot_end_time = time();
+                       my $reboot_duration = ($reboot_end_time - 
$reboot_start_time);
+                       notify($ERRORS{'OK'}, 0, "reboot complete on 
$computer_node_name, took $reboot_duration seconds");
+                       return 1;
+               }
+               else {
+                       notify($ERRORS{'WARNING'}, 0, "reboot failed on 
$computer_node_name, made $wait_attempt_limit attempts");
+                       return 0;
+               }
+       }
+
+} ## end sub reboot
+
+#/////////////////////////////////////////////////////////////////////////////
+
 1;
 __END__
 

Modified: incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm
URL: 
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm Wed May 25 17:31:48 2011
@@ -122,7 +122,32 @@ sub process {
        my $reservation_count     = $self->data->get_reservation_count();
        my $is_parent_reservation = $self->data->is_parent_reservation();
        my $identity_key          = $self->data->get_image_identity();
+       my $request_state_name    = $self->data->get_request_state_name();
 
+       if ($request_state_name =~ /reboot|rebootsoft|reboothard/) {
+               notify($ERRORS{'OK'}, 0, "this is a 'reboot' request");
+               if($self->os->can('reboot')){
+                       if($self->os->reboot()){
+                               notify($ERRORS{'OK'}, 0, "successfuly rebooted 
$computer_nodename");
+                       
+                       }
+                       else {
+                               notify($ERRORS{'WARNING'}, 0, "failed to reboot 
$computer_nodename");
+                               #do not fail request or machine
+                       }
+                       # Put this request back into the inuse state
+                               if (update_request_state($request_id, "inuse", 
"inuse")) {
+                                       notify($ERRORS{'OK'}, 0, "request state 
set back to inuse");
+                        }
+                        else {
+                                       notify($ERRORS{'WARNING'}, 0, "unable 
to set request state back to inuse");
+                        }
+                       notify($ERRORS{'OK'}, 0, "exiting");
+                       exit;
+               }
+               
+       }
+       
        # Set the user connection timeout limit in minutes
        my $connect_timeout_limit = 15;
        


Reply via email to