Author: arkurth
Date: Thu Aug 10 15:30:53 2017
New Revision: 1804679
URL: http://svn.apache.org/viewvc?rev=1804679&view=rev
Log:
VCL-1073
Added '&' after call to shutdown in Linux.pm::reboot and Linux.pm::shutdown so
that they doesn't wait for the command to complete. Added 'max_attempts' = 1
argument to execute call so that multiple attempts aren't tried if the
connection is quickly broken after the reboot starts. Removed error checking on
the result of executing this command. Later on, the code waits for the computer
to either shut down or not respond to ping and monitors the reboot. If the
shutdown command failed for some reason, the code would still attempt a hard
reset or power off.
Other
Moved activate_interfaces call to earlier in Linux.pm::post_load. It was being
called after update_public_ip_address, synchronize_time, and possibly other
steps that would require the interfaces to be up. Replaced duplicated code
calling ifup in Linux.pm::activate_interfaces with call to
start_network_interface.
Modified:
vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
Modified: vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
URL:
http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm?rev=1804679&r1=1804678&r2=1804679&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm Thu Aug 10 15:30:53 2017
@@ -105,7 +105,8 @@ our $CAPTURE_DELETE_FILE_PATHS = [
'/etc/sysconfig/iptables_pre*',
'/etc/udev/rules.d/70-persistent-net.rules',
'/tmp/*',
- '/var/log/*.0',
+ '/var/log/*.0*',
+ '/var/log/*.1*',
'/var/log/*-20*',
'/var/log/*.gz',
'/var/log/*.old',
@@ -121,13 +122,16 @@ our $CAPTURE_DELETE_FILE_PATHS = [
our $CAPTURE_CLEAR_FILE_PATHS = [
'/etc/hostname',
'/var/log/audit/audit.log',
- '/var/log/auth.log*',
- '/var/log/boot.log*',
+ '/var/log/auth.log',
+ '/var/log/boot.log',
+ '/var/log/kern.log',
'/var/log/lastlog',
'/var/log/maillog',
'/var/log/messages',
'/var/log/secure',
+ '/var/log/syslog',
'/var/log/udev',
+ '/var/log/ufw.log',
'/var/log/wtmp',
];
@@ -500,6 +504,9 @@ sub post_load {
return;
}
+ # Attempt to generate ifcfg-eth* files and start any interfaces which
the file does not exist
+ $self->activate_interfaces();
+
# Configure the firewall to allow SSH traffic only from the management
node
if ($self->can('firewall') &&
$self->firewall->can('process_post_load')) {
$self->firewall->process_post_load() || return;
@@ -544,9 +551,6 @@ sub post_load {
notify($ERRORS{'WARNING'}, 0, "failed to clear known identity
keys");
}
- # Attempt to generate ifcfg-eth* files and ifup any interfaces which
the file does not exist
- $self->activate_interfaces();
-
# Update computer hostname if imagemeta.sethostname is not set to 0
my $set_hostname = $self->data->get_imagemeta_sethostname(0);
if (defined($set_hostname) && $set_hostname =~ /0/) {
@@ -979,21 +983,20 @@ sub start_network_interface {
notify($ERRORS{'DEBUG'}, 0, "attempting to start network interface
$interface_name on $computer_name");
my $command = "/sbin/ifup $interface_name";
- my ($exit_status, $output) = $self->execute($command);
+ my ($exit_status, $output) = $self->execute($command, 0);
if (!defined($output)) {
notify($ERRORS{'WARNING'}, 0, "failed to execute command to
start $interface_name interface on $computer_name");
return;
}
elsif (grep(/already configured/i, @$output)) {
notify($ERRORS{'DEBUG'}, 0, "$interface_name interface on
$computer_name is already started, output:\n" . join("\n", @$output));
- return 1;
}
- elsif ($exit_status) {
- notify($ERRORS{'WARNING'}, 0, "failed to start $interface_name
interface on $computer_name, exit status: $exit_status, command: '$command',
output:\n" . join("\n", @$output));
- return;
+ elsif ($exit_status == 0 || grep(/done/i, @$output)) {
+ notify($ERRORS{'DEBUG'}, 0, "started $interface_name interface
on $computer_name, " . (@$output ? "output:\n" . join("\n", @$output) : 'no
output'));
}
else {
- notify($ERRORS{'DEBUG'}, 0, "started $interface_name interface
on $computer_name, output:\n" . join("\n", @$output));
+ notify($ERRORS{'WARNING'}, 0, "failed to start $interface_name
interface on $computer_name, exit status: $exit_status, command: '$command',
output:\n" . join("\n", @$output));
+ return;
}
return 1;
@@ -2423,8 +2426,8 @@ sub set_file_owner {
Description : Finds all networking interfaces with an active link. Checks if
an
ifcfg-eth* file exists for the interface. An ifcfg-eth* file is
generated if it does not exist using DHCP and the interface is
- brought up via ifup. This is useful if additional interfaces are
- added by the provisioning module when an image is loaded.
+ brought up. This is useful if additional interfaces are added by
+ the provisioning module when an image is loaded.
=cut
@@ -2479,20 +2482,7 @@ EOF
return;
}
- # Call ifup on the interface
- my $command = "ifup $interface_name";
- my ($exit_status, $output) = $self->execute($command, 1);
- if (!defined($output)) {
- notify($ERRORS{'WARNING'}, 0, "failed to execute
command to activate $interface_name interface: $command");
- return;
- }
- elsif ($exit_status eq '0' || grep(/done\./, @$output)) {
- notify($ERRORS{'OK'}, 0, "activated $interface_name
interface, output:\n" . join("\n", @$output));
- }
- else {
- notify($ERRORS{'WARNING'}, 0, "failed to activate
$interface_name interface, exit status: $exit_status, output:\n" . join("\n",
@$output));
- return;
- }
+ $self->start_network_interface($interface_name);
}
return 1;
@@ -2673,9 +2663,13 @@ sub get_network_configuration {
=head2 reboot
- Parameters : $wait_for_reboot
- Returns :
- Description :
+ Parameters : none
+ Returns : boolean
+ Description : Attempts to gracefully reboot the computer by executing
+ 'shutdown -r now' command. Attempts to detect reboot began and
+ completed. If this fails or if the computer is not responding to
+ SSH, the provisioning module will attempt to forcefully perform
a
+ hard reset of the computer.
=cut
@@ -2688,90 +2682,59 @@ sub reboot {
my $computer_node_name = $self->data->get_computer_node_name();
- # Check if an argument was supplied
- my $wait_for_reboot = shift || 1;
- if ($wait_for_reboot) {
- notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and
waiting for SSH to become active");
- }
- else {
- notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and
NOT waiting");
- }
+ notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and waiting
for SSH to become active");
my $reboot_start_time = time();
# Check if computer responds to ssh before preparing for reboot
if ($self->wait_for_ssh(0)) {
- # Check if shutdown exists on the computer
- my $reboot_command;
- if ($self->file_exists("/sbin/shutdown")) {
- $reboot_command = "/sbin/shutdown -r now";
- }
- else {
- notify($ERRORS{'WARNING'}, 0, "reboot not attempted,
/sbin/shutdown did not exists on $computer_node_name");
- return;
- }
-
- my ($reboot_exit_status, $reboot_output) =
$self->execute($reboot_command);
- if (!defined($reboot_output)) {
- notify($ERRORS{'WARNING'}, 0, "failed to execute
command to reboot $computer_node_name");
- return;
- }
- elsif ($reboot_exit_status == 0) {
- notify($ERRORS{'OK'}, 0, "executed reboot command on
$computer_node_name");
- }
- else {
- notify($ERRORS{'WARNING'}, 0, "failed to reboot
$computer_node_name, attempting power reset, output:\n" . join("\n",
@$reboot_output));
-
- # Call provisioning module's power_reset() subroutine
- if ($self->provisioner->power_reset()) {
- notify($ERRORS{'OK'}, 0, "initiated power reset
on $computer_node_name");
+ my $reboot_command = '/sbin/shutdown -r now &';
+ notify($ERRORS{'DEBUG'}, 0, "attempting to gracefully reboot
$computer_node_name by executing '$reboot_command'");
+ my ($reboot_exit_status, $reboot_output) = $self->execute(
+ {
+ command => $reboot_command,
+ timeout => 30,
+ max_attempts => 1,
+ display_output => 0,
}
- else {
- notify($ERRORS{'WARNING'}, 0, "reboot failed,
failed to initiate power reset on $computer_node_name");
- return;
- }
- }
- }
- else {
- # Computer did not respond to SSH
- notify($ERRORS{'WARNING'}, 0, "$computer_node_name is not
responding to SSH, graceful reboot cannot be performed, attempting hard reset");
+ );
- # Call provisioning module's power_reset() subroutine
- if ($self->provisioner->power_reset()) {
- notify($ERRORS{'OK'}, 0, "initiated power reset on
$computer_node_name");
+ if ($self->wait_for_reboot()) {
+ my $reboot_duration = (time() - $reboot_start_time);
+ notify($ERRORS{'OK'}, 0, "gracefully rebooted
$computer_node_name, took $reboot_duration seconds");
+ return 1;
}
else {
- notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to
initiate power reset on $computer_node_name");
- return;
+ notify($ERRORS{'DEBUG'}, 0, "did not detect
$computer_node_name rebooting after executing '$reboot_command', attempting
hard reset using the provisioning module");
}
}
-
- # Check if wait for reboot is set
- if (!$wait_for_reboot) {
- return 1;
+ else {
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is not
responding to SSH, graceful reboot cannot be performed, attempting hard reset
using the provisioning module");
}
+ $self->provisioner->power_reset() || return;
if ($self->wait_for_reboot()) {
- # Reboot was successful, calculate how long reboot took
- my $reboot_end_time = time();
- my $reboot_duration = ($reboot_end_time - $reboot_start_time);
- notify($ERRORS{'OK'}, 0, "reboot complete on
$computer_node_name, took $reboot_duration seconds");
+ my $reboot_duration = (time() - $reboot_start_time);
+ notify($ERRORS{'OK'}, 0, "hard reset of $computer_node_name
complete, took $reboot_duration seconds");
return 1;
}
else {
- notify($ERRORS{'WARNING'}, 0, "reboot failed on
$computer_node_name, made default wait_attempt_limit attempts");
- return 0;
+ notify($ERRORS{'WARNING'}, 0, "$computer_node_name may not have
rebooted, did not detect reboot after attempting hard reset using the
provisioning module");
+ return;
}
-
}
#//////////////////////////////////////////////////////////////////////////////
=head2 shutdown
- Parameters :
- Returns :
- Description :
+ Parameters : none
+ Returns : boolean
+ Description : Attempts to gracefully shut down the computer by executing the
+ shutdown command. Waits for provisioning module to report that
+ the computer is off. If this fails or if the computer is not
+ responding to SSH, the provisioning module will attempt to
+ forcefully power off the computer.
=cut
@@ -2784,52 +2747,38 @@ sub shutdown {
my $computer_node_name = $self->data->get_computer_node_name();
- # Check if an argument was supplied
- my $wait_for_power_off = shift || 1;
- if ($wait_for_power_off) {
- notify($ERRORS{'DEBUG'}, 0, "shutting down $computer_node_name
and waiting for power off");
- }
- else {
- notify($ERRORS{'DEBUG'}, 0, "shutting down $computer_node_name
and NOT waiting for power off");
- }
-
# Check if computer responds to ssh before preparing for shut down
if ($self->wait_for_ssh(0)) {
- my $command = '/sbin/shutdown -h now';
-
- my ($exit_status, $output) = $self->execute({command =>
$command, timeout => 90, ignore_error => 1});
-
- # Wait maximum of 5 minutes for computer to power off
- my $power_off = $self->provisioner->wait_for_power_off(300);
- if (!defined($power_off)) {
- # wait_for_power_off result will be undefined if the
provisioning module doesn't implement a power_status subroutine
- notify($ERRORS{'OK'}, 0, "unable to determine power
status of $computer_node_name from provisioning module, sleeping 1 minute to
allow computer time to shutdown");
- sleep 60;
- }
- elsif (!$power_off) {
- notify($ERRORS{'WARNING'}, 0, "$computer_node_name
never powered off");
- # Call provisioning module's power_off() subroutine
- if (!$self->provisioner->power_off()) {
- notify($ERRORS{'WARNING'}, 0, "failed to shut
down $computer_node_name, failed to initiate power off");
- return;
+ my $shutdown_command = '/sbin/shutdown -h now &';
+ notify($ERRORS{'DEBUG'}, 0, "attempting to gracefully shut down
$computer_node_name by executing '$shutdown_command'");
+ my ($exit_status, $output) = $self->execute(
+ {
+ command => $shutdown_command,
+ timeout => 30,
+ max_attempts => 1,
+ display_output => 0,
}
+ );
+
+ if ($self->provisioner->wait_for_power_off(300, 10)) {
+ notify($ERRORS{'OK'}, 0, "gracefully shut down
$computer_node_name by executing the OS's shutdown command");
+ return 1;
+ }
+ else {
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is
still on after executing shutdown command, attempting to power off the computer
using the provisioning module");
}
}
else {
- notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is not
responding to SSH, attempting power off");
-
- # Call provisioning module's power_off() subroutine
- if (!$self->provisioner->power_off()) {
- notify($ERRORS{'WARNING'}, 0, "failed to shut down
$computer_node_name, failed to initiate power off");
- return;
- }
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is NOT
responding to SSH, attempting to power off the computer using the provisioning
module");
}
- if (!$wait_for_power_off || $self->provisioner->wait_for_power_off(300,
10)) {
+ $self->provisioner->power_off() || return;
+ if ($self->provisioner->wait_for_power_off(300, 10)) {
+ notify($ERRORS{'OK'}, 0, "forcefully powered off
$computer_node_name using the provisioning module");
return 1;
}
else {
- notify($ERRORS{'WARNING'}, 0, "failed to shut down
$computer_node_name, computer never powered off");
+ notify($ERRORS{'WARNING'}, 0, "failed to shut down
$computer_node_name, computer is still on after attempting to power off the
computer using the provisioning module");
return;
}
}