Author: fapeeler
Date: Wed May 25 17:31:48 2011
New Revision: 1127593
URL: http://svn.apache.org/viewvc?rev=1127593&view=rev
Log:
VCL-463
Server loads
ability to process a reboot
inuse module is calling os->reboot routine
each OS module will be responsible for handling it
If os module cannot reboot the machine then inuse module will return to inuse
state
without any action.
Modified:
incubator/vcl/trunk/managementnode/bin/vcld
incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm
Modified: incubator/vcl/trunk/managementnode/bin/vcld
URL:
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/bin/vcld?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/bin/vcld (original)
+++ incubator/vcl/trunk/managementnode/bin/vcld Wed May 25 17:31:48 2011
@@ -202,7 +202,7 @@ sub main () {
$ENV{state} = $request_state_name;
# Make sure the request state is valid
- if ($request_state_name !~
/inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse/)
{
+ if ($request_state_name !~
/inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse|rebootsoft|reboothard|reinstall/)
{
notify($ERRORS{'WARNING'}, $LOGFILE, "assigned
request in unsupported state: $request_state_name");
next REQUEST;
}
@@ -497,7 +497,7 @@ sub make_new_child {
}
# The imageinuse state is now handled by inuse.pm
- if ($state =~ /^(imageinuse)$/) {
+ if ($state =~ /^(imageinuse|rebootsoft|reboothard|reinstall)$/) {
notify($ERRORS{'DEBUG'}, $LOGFILE, "request will be processed
by inuse.pm");
$state_module = "VCL::inuse";
}
Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
URL:
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm Wed May 25 17:31:48
2011
@@ -220,6 +220,86 @@ sub get_current_image_name {
#/////////////////////////////////////////////////////////////////////////////
+=head2 wait_for_reboot
+
+ Parameters : Maximum number of seconds to wait (optional), delay between
attempts (optional)
+ Returns : If computer is pingable before the maximum amount of time has
elapsed: 1
+ Description :
+
+=cut
+
+sub wait_for_reboot {
+ my $self = shift;
+ if (ref($self) !~ /VCL::Module/i) {
+ notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a
function, it must be called as a class method");
+ return;
+ }
+
+ my $computer_node_name = $self->data->get_computer_node_name();
+
+ # Make multiple attempts to wait for the reboot to complete
+ my $wait_attempt_limit = shift;
+
+ if (!defined($wait_attempt_limit)) {
+ $wait_attempt_limit = 2;
+ }
+
+ WAIT_ATTEMPT:
+ for (my $wait_attempt = 1; $wait_attempt <= $wait_attempt_limit;
$wait_attempt++) {
+ if ($wait_attempt > 1) {
+ # Computer did not become fully responsive on previous
wait attempt
+ notify($ERRORS{'OK'}, 0, "$computer_node_name reboot
failed to complete on previous attempt, attempting hard power reset");
+ # Call provisioning module's power_reset() subroutine
+ if ($self->provisioner->power_reset()) {
+ notify($ERRORS{'OK'}, 0, "reboot attempt
$wait_attempt/$wait_attempt_limit: initiated power reset on
$computer_node_name");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot failed,
failed to initiate power reset on $computer_node_name");
+ return 0;
+ }
+ } ## end if ($wait_attempt > 1)
+
+ # Wait maximum of 3 minutes for the computer to become
unresponsive
+ if (!$self->wait_for_no_ping(180, 3)) {
+ # Computer never stopped responding to ping
+ notify($ERRORS{'WARNING'}, 0, "$computer_node_name
never became unresponsive to ping");
+ next WAIT_ATTEMPT;
+ }
+
+ # Computer is unresponsive, reboot has begun
+ # Wait for 5 seconds before beginning to check if computer is
back online
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name reboot has
begun, sleeping for 5 seconds");
+ sleep 5;
+
+ # Wait maximum of 6 minutes for the computer to come back up
+ if (!$self->wait_for_ping(360, 5)) {
+ # Check if the computer was ever offline, it should
have been or else reboot never happened
+ notify($ERRORS{'WARNING'}, 0, "$computer_node_name
never responded to ping");
+ next WAIT_ATTEMPT;
+ }
+
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is pingable,
waiting for ssh to respond");
+
+ # Wait maximum of 3 minutes for ssh to respond
+ if (!$self->wait_for_ssh(180, 5)) {
+ notify($ERRORS{'WARNING'}, 0, "ssh never responded on
$computer_node_name");
+ next WAIT_ATTEMPT;
+ }
+
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name responded to
ssh");
+
+ return 1;
+ } ## end for (my $wait_attempt = 1; $wait_attempt <=...
+
+ # If loop completed, maximum number of reboot attempts was reached
+ notify($ERRORS{'WARNING'}, 0, "reboot failed on $computer_node_name,
made $wait_attempt_limit attempts");
+ return 0;
+
+
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
=head2 wait_for_ping
Parameters : Maximum number of seconds to wait (optional), delay between
attempts (optional)
Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
URL:
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm Wed May 25
17:31:48 2011
@@ -2787,6 +2787,111 @@ sub get_public_ip_address {
#/////////////////////////////////////////////////////////////////////////////
+
+=head2 reboot
+
+ Parameters : $wait_for_reboot
+ Returns :
+ Description :
+
+=cut
+
+sub reboot {
+ my $self = shift;
+ if (ref($self) !~ /linux/i) {
+ notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a
function, it must be called as a class method");
+ return;
+ }
+
+ my $management_node_keys = $self->data->get_management_node_keys();
+ my $computer_node_name = $self->data->get_computer_node_name();
+
+ # Check if an argument was supplied
+ my $wait_for_reboot = shift;
+ if (!defined($wait_for_reboot) || $wait_for_reboot !~ /0/) {
+ notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and
waiting for ssh to become active");
+ $wait_for_reboot = 1;
+ }
+ else {
+ notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and
NOT waiting");
+ $wait_for_reboot = 0;
+ }
+
+ my $reboot_start_time = time();
+ notify($ERRORS{'DEBUG'}, 0, "reboot will be attempted on
$computer_node_name");
+
+ # Check if computer responds to ssh before preparing for reboot
+ if ($self->wait_for_ssh(0)) {
+
+ # Check if shutdown exists on the computer
+ my $reboot_command;
+ if ( $self->file_exists("/sbin/shutdown")) {
+ $reboot_command = "/sbin/shutdown -r now";
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot not attempted,
/sbin/shutdown did not exists on OS");
+ return 0;
+ }
+
+ my ($reboot_exit_status, $reboot_output) =
run_ssh_command($computer_node_name, $management_node_keys, $reboot_command);
+ if (!defined($reboot_output)) {
+ notify($ERRORS{'WARNING'}, 0, "failed to execute ssh
command to reboot $computer_node_name");
+ return;
+ }
+
+ if ($reboot_exit_status == 0) {
+ notify($ERRORS{'OK'}, 0, "executed reboot command on
$computer_node_name");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "failed to reboot
$computer_node_name, attempting power reset, output:\n" . join("\n",
@$reboot_output));
+
+ # Call provisioning module's power_reset() subroutine
+ if ($self->provisioner->power_reset()) {
+ notify($ERRORS{'OK'}, 0, "initiated power
reset on $computer_node_name");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot failed,
failed to initiate power reset on $computer_node_name");
+ return;
+ }
+ }
+ }
+ else {
+ # Computer did not respond to ssh
+ notify($ERRORS{'WARNING'}, 0, "$computer_node_name did not
respond to ssh, graceful reboot cannot be performed, attempting hard reset");
+
+ # Call provisioning module's power_reset() subroutine
+ if ($self->provisioner->power_reset()) {
+ notify($ERRORS{'OK'}, 0, "initiated power reset on
$computer_node_name");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot failed, failed
to initiate power reset on $computer_node_name");
+ return 0;
+ }
+ } ## end else [ if ($self->wait_for_ssh(0))
+
+ my $wait_attempt_limit = 2;
+ # Check if wait for reboot is set
+ if (!$wait_for_reboot) {
+ return 1;
+ }
+ else {
+ if($self->wait_for_reboot($wait_attempt_limit)){
+ # Reboot was successful, calculate how long reboot took
+ my $reboot_end_time = time();
+ my $reboot_duration = ($reboot_end_time -
$reboot_start_time);
+ notify($ERRORS{'OK'}, 0, "reboot complete on
$computer_node_name, took $reboot_duration seconds");
+ return 1;
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot failed on
$computer_node_name, made $wait_attempt_limit attempts");
+ return 0;
+ }
+ }
+
+} ## end sub reboot
+
+#/////////////////////////////////////////////////////////////////////////////
+
1;
__END__
Modified: incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm
URL:
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm Wed May 25 17:31:48 2011
@@ -122,7 +122,32 @@ sub process {
my $reservation_count = $self->data->get_reservation_count();
my $is_parent_reservation = $self->data->is_parent_reservation();
my $identity_key = $self->data->get_image_identity();
+ my $request_state_name = $self->data->get_request_state_name();
+ if ($request_state_name =~ /reboot|rebootsoft|reboothard/) {
+ notify($ERRORS{'OK'}, 0, "this is a 'reboot' request");
+ if($self->os->can('reboot')){
+ if($self->os->reboot()){
+ notify($ERRORS{'OK'}, 0, "successfuly rebooted
$computer_nodename");
+
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "failed to reboot
$computer_nodename");
+ #do not fail request or machine
+ }
+ # Put this request back into the inuse state
+ if (update_request_state($request_id, "inuse",
"inuse")) {
+ notify($ERRORS{'OK'}, 0, "request state
set back to inuse");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "unable
to set request state back to inuse");
+ }
+ notify($ERRORS{'OK'}, 0, "exiting");
+ exit;
+ }
+
+ }
+
# Set the user connection timeout limit in minutes
my $connect_timeout_limit = 15;