Author: fapeeler Date: Wed May 25 17:31:48 2011 New Revision: 1127593 URL: http://svn.apache.org/viewvc?rev=1127593&view=rev Log: VCL-463
Server loads ability to process a reboot inuse module is calling os->reboot routine each OS module will be responsible for handling it If os module cannot reboot the machine then inuse module will return to inuse state without any action. Modified: incubator/vcl/trunk/managementnode/bin/vcld incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm Modified: incubator/vcl/trunk/managementnode/bin/vcld URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/bin/vcld?rev=1127593&r1=1127592&r2=1127593&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/bin/vcld (original) +++ incubator/vcl/trunk/managementnode/bin/vcld Wed May 25 17:31:48 2011 @@ -202,7 +202,7 @@ sub main () { $ENV{state} = $request_state_name; # Make sure the request state is valid - if ($request_state_name !~ /inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse/) { + if ($request_state_name !~ /inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse|rebootsoft|reboothard|reinstall/) { notify($ERRORS{'WARNING'}, $LOGFILE, "assigned request in unsupported state: $request_state_name"); next REQUEST; } @@ -497,7 +497,7 @@ sub make_new_child { } # The imageinuse state is now handled by inuse.pm - if ($state =~ /^(imageinuse)$/) { + if ($state =~ /^(imageinuse|rebootsoft|reboothard|reinstall)$/) { notify($ERRORS{'DEBUG'}, $LOGFILE, "request will be processed by inuse.pm"); $state_module = "VCL::inuse"; } Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm?rev=1127593&r1=1127592&r2=1127593&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm Wed May 25 17:31:48 2011 @@ -220,6 +220,86 @@ sub get_current_image_name { #///////////////////////////////////////////////////////////////////////////// +=head2 wait_for_reboot + + Parameters : Maximum number of seconds to wait (optional), delay between attempts (optional) + Returns : If computer is pingable before the maximum amount of time has elapsed: 1 + Description : + +=cut + +sub wait_for_reboot { + my $self = shift; + if (ref($self) !~ /VCL::Module/i) { + notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); + return; + } + + my $computer_node_name = $self->data->get_computer_node_name(); + + # Make multiple attempts to wait for the reboot to complete + my $wait_attempt_limit = shift; + + if (!defined($wait_attempt_limit)) { + $wait_attempt_limit = 2; + } + + WAIT_ATTEMPT: + for (my $wait_attempt = 1; $wait_attempt <= $wait_attempt_limit; $wait_attempt++) { + if ($wait_attempt > 1) { + # Computer did not become fully responsive on previous wait attempt + notify($ERRORS{'OK'}, 0, "$computer_node_name reboot failed to complete on previous attempt, attempting hard power reset"); + # Call provisioning module's power_reset() subroutine + if ($self->provisioner->power_reset()) { + notify($ERRORS{'OK'}, 0, "reboot attempt $wait_attempt/$wait_attempt_limit: initiated power reset on $computer_node_name"); + } + else { + notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name"); + return 0; + } + } ## end if ($wait_attempt > 1) + + # Wait maximum of 3 minutes for the computer to become unresponsive + if (!$self->wait_for_no_ping(180, 3)) { + # Computer never stopped responding to ping + notify($ERRORS{'WARNING'}, 0, "$computer_node_name never became unresponsive to ping"); + next WAIT_ATTEMPT; + } + + # Computer is unresponsive, reboot has begun + # Wait for 5 seconds before beginning to check if computer is back online + notify($ERRORS{'DEBUG'}, 0, "$computer_node_name reboot has begun, sleeping for 5 seconds"); + sleep 5; + + # Wait maximum of 6 minutes for the computer to come back up + if (!$self->wait_for_ping(360, 5)) { + # Check if the computer was ever offline, it should have been or else reboot never happened + notify($ERRORS{'WARNING'}, 0, "$computer_node_name never responded to ping"); + next WAIT_ATTEMPT; + } + + notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is pingable, waiting for ssh to respond"); + + # Wait maximum of 3 minutes for ssh to respond + if (!$self->wait_for_ssh(180, 5)) { + notify($ERRORS{'WARNING'}, 0, "ssh never responded on $computer_node_name"); + next WAIT_ATTEMPT; + } + + notify($ERRORS{'DEBUG'}, 0, "$computer_node_name responded to ssh"); + + return 1; + } ## end for (my $wait_attempt = 1; $wait_attempt <=... + + # If loop completed, maximum number of reboot attempts was reached + notify($ERRORS{'WARNING'}, 0, "reboot failed on $computer_node_name, made $wait_attempt_limit attempts"); + return 0; + + +} + +#///////////////////////////////////////////////////////////////////////////// + =head2 wait_for_ping Parameters : Maximum number of seconds to wait (optional), delay between attempts (optional) Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm?rev=1127593&r1=1127592&r2=1127593&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm Wed May 25 17:31:48 2011 @@ -2787,6 +2787,111 @@ sub get_public_ip_address { #///////////////////////////////////////////////////////////////////////////// + +=head2 reboot + + Parameters : $wait_for_reboot + Returns : + Description : + +=cut + +sub reboot { + my $self = shift; + if (ref($self) !~ /linux/i) { + notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); + return; + } + + my $management_node_keys = $self->data->get_management_node_keys(); + my $computer_node_name = $self->data->get_computer_node_name(); + + # Check if an argument was supplied + my $wait_for_reboot = shift; + if (!defined($wait_for_reboot) || $wait_for_reboot !~ /0/) { + notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and waiting for ssh to become active"); + $wait_for_reboot = 1; + } + else { + notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and NOT waiting"); + $wait_for_reboot = 0; + } + + my $reboot_start_time = time(); + notify($ERRORS{'DEBUG'}, 0, "reboot will be attempted on $computer_node_name"); + + # Check if computer responds to ssh before preparing for reboot + if ($self->wait_for_ssh(0)) { + + # Check if shutdown exists on the computer + my $reboot_command; + if ( $self->file_exists("/sbin/shutdown")) { + $reboot_command = "/sbin/shutdown -r now"; + } + else { + notify($ERRORS{'WARNING'}, 0, "reboot not attempted, /sbin/shutdown did not exists on OS"); + return 0; + } + + my ($reboot_exit_status, $reboot_output) = run_ssh_command($computer_node_name, $management_node_keys, $reboot_command); + if (!defined($reboot_output)) { + notify($ERRORS{'WARNING'}, 0, "failed to execute ssh command to reboot $computer_node_name"); + return; + } + + if ($reboot_exit_status == 0) { + notify($ERRORS{'OK'}, 0, "executed reboot command on $computer_node_name"); + } + else { + notify($ERRORS{'WARNING'}, 0, "failed to reboot $computer_node_name, attempting power reset, output:\n" . join("\n", @$reboot_output)); + + # Call provisioning module's power_reset() subroutine + if ($self->provisioner->power_reset()) { + notify($ERRORS{'OK'}, 0, "initiated power reset on $computer_node_name"); + } + else { + notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name"); + return; + } + } + } + else { + # Computer did not respond to ssh + notify($ERRORS{'WARNING'}, 0, "$computer_node_name did not respond to ssh, graceful reboot cannot be performed, attempting hard reset"); + + # Call provisioning module's power_reset() subroutine + if ($self->provisioner->power_reset()) { + notify($ERRORS{'OK'}, 0, "initiated power reset on $computer_node_name"); + } + else { + notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name"); + return 0; + } + } ## end else [ if ($self->wait_for_ssh(0)) + + my $wait_attempt_limit = 2; + # Check if wait for reboot is set + if (!$wait_for_reboot) { + return 1; + } + else { + if($self->wait_for_reboot($wait_attempt_limit)){ + # Reboot was successful, calculate how long reboot took + my $reboot_end_time = time(); + my $reboot_duration = ($reboot_end_time - $reboot_start_time); + notify($ERRORS{'OK'}, 0, "reboot complete on $computer_node_name, took $reboot_duration seconds"); + return 1; + } + else { + notify($ERRORS{'WARNING'}, 0, "reboot failed on $computer_node_name, made $wait_attempt_limit attempts"); + return 0; + } + } + +} ## end sub reboot + +#///////////////////////////////////////////////////////////////////////////// + 1; __END__ Modified: incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm?rev=1127593&r1=1127592&r2=1127593&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm Wed May 25 17:31:48 2011 @@ -122,7 +122,32 @@ sub process { my $reservation_count = $self->data->get_reservation_count(); my $is_parent_reservation = $self->data->is_parent_reservation(); my $identity_key = $self->data->get_image_identity(); + my $request_state_name = $self->data->get_request_state_name(); + if ($request_state_name =~ /reboot|rebootsoft|reboothard/) { + notify($ERRORS{'OK'}, 0, "this is a 'reboot' request"); + if($self->os->can('reboot')){ + if($self->os->reboot()){ + notify($ERRORS{'OK'}, 0, "successfuly rebooted $computer_nodename"); + + } + else { + notify($ERRORS{'WARNING'}, 0, "failed to reboot $computer_nodename"); + #do not fail request or machine + } + # Put this request back into the inuse state + if (update_request_state($request_id, "inuse", "inuse")) { + notify($ERRORS{'OK'}, 0, "request state set back to inuse"); + } + else { + notify($ERRORS{'WARNING'}, 0, "unable to set request state back to inuse"); + } + notify($ERRORS{'OK'}, 0, "exiting"); + exit; + } + + } + # Set the user connection timeout limit in minutes my $connect_timeout_limit = 15;