Author: arkurth
Date: Thu Jun 13 16:37:14 2013
New Revision: 1492747

URL: http://svn.apache.org/r1492747
Log:
VCL-16
Fixed a few issues with reserved.pm and inuse.pm. The parent reserved may have 
exited before all child reservations had exited. Added a check to make sure 
computerloadlog reserved entry exists for all children before parent exits. 
Also added check to make sure reservation wasn't deleted or any of the child 
reserved processes failed.

Fixed inuse.pm to properly handle the connecttimeout variable. If set to a long 
duration, the connect checking could have run into the end time countdown.

Modified:
    vcl/trunk/managementnode/lib/VCL/inuse.pm
    vcl/trunk/managementnode/lib/VCL/reserved.pm
    vcl/trunk/managementnode/lib/VCL/utils.pm

Modified: vcl/trunk/managementnode/lib/VCL/inuse.pm
URL: 
http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/inuse.pm?rev=1492747&r1=1492746&r2=1492747&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/inuse.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/inuse.pm Thu Jun 13 16:37:14 2013
@@ -79,7 +79,7 @@ use strict;
 use warnings;
 use diagnostics;
 
-use POSIX;
+use POSIX qw(ceil floor strftime);
 use VCL::utils;
 
 ##############################################################################
@@ -101,31 +101,35 @@ use VCL::utils;
 sub process {
        my $self = shift;
        
-       my $request_id = $self->data->get_request_id();
-       my $request_state_name = $self->data->get_request_state_name();
-       my $request_laststate_name = $self->data->get_request_laststate_name();
-       my $request_start = $self->data->get_request_start_time();
-       my $request_end = $self->data->get_request_end_time();
-       my $request_data = $self->data->get_request_data();
-       my $request_forimaging = $self->data->get_request_forimaging();
-       my $request_checkuser = $self->data->get_request_checkuser();
-       my $reservation_id = $self->data->get_reservation_id();
-       my $reservation_count = $self->data->get_reservation_count();
-       my $server_request_id = $self->data->get_server_request_id();
-       my $imagemeta_checkuser = $self->data->get_imagemeta_checkuser();
-       my $is_parent_reservation = $self->data->is_parent_reservation();
-       my $computer_id = $self->data->get_computer_id();
-       my $computer_short_name   = $self->data->get_computer_short_name();
-       my $connect_timeout_minutes = 
$self->data->get_variable('connect_timeout_minutes') || 15;
+       my $request_id              = $self->data->get_request_id();
+       my $request_state_name      = $self->data->get_request_state_name();
+       my $request_laststate_name  = $self->data->get_request_laststate_name();
+       my $request_start           = $self->data->get_request_start_time();
+       my $request_end             = $self->data->get_request_end_time();
+       my $request_data            = $self->data->get_request_data();
+       my $request_forimaging      = $self->data->get_request_forimaging();
+       my $request_checkuser       = $self->data->get_request_checkuser();
+       my $reservation_id          = $self->data->get_reservation_id();
+       my $reservation_count       = $self->data->get_reservation_count();
+       my $server_request_id       = $self->data->get_server_request_id();
+       my $imagemeta_checkuser     = $self->data->get_imagemeta_checkuser();
+       my $is_parent_reservation   = $self->data->is_parent_reservation();
+       my $computer_id             = $self->data->get_computer_id();
+       my $computer_short_name     = $self->data->get_computer_short_name();
+       my $connect_timeout_seconds = 
$self->data->get_variable('connecttimeout') || (15 * 60);
        
        # Make sure connect timeout is long enough
        # It has to be a bit longer than the ~5 minute period between inuse 
checks due to cluster reservations
        # If too short, a user may be connected to one computer in a cluster 
and another inuse process times out before the connected computer is checked
+       my $connect_timeout_minutes = ceil($connect_timeout_seconds / 60);
        if ($connect_timeout_minutes < 10) {
                notify($ERRORS{'WARNING'}, 0, "connect timeout is set to 
$connect_timeout_minutes minutes, it must be 10 minutes or more");
                $connect_timeout_minutes = 10;
        }
        
+       # Connect timeout must be in whole minutes
+       $connect_timeout_seconds = ($connect_timeout_minutes * 60);
+       
        # Check if reboot operation was requested
        if ($request_state_name =~ /reboot/) {
                if ($self->os->can('reboot')) {
@@ -136,8 +140,7 @@ sub process {
                else {
                        notify($ERRORS{'CRITICAL'}, 0, "'$request_state_name' 
operation requested, " . ref($self->os) . " does not implement a 'reboot' 
subroutine");
                }
-               update_request_state($request_id, "inuse", "inuse");
-               notify($ERRORS{'OK'}, 0, "exiting");
+               switch_state($request_data, 'inuse', 'inuse');
                exit;
        }
        
@@ -146,7 +149,7 @@ sub process {
                if (!$self->os->manage_server_access()) {
                        notify($ERRORS{'CRITICAL'}, 0, "failed to update server 
access");
       }
-               update_request_state($request_id, "inuse", "inuse");
+               switch_state($request_data, 'inuse', 'inuse');
       exit;
        }
        
@@ -154,17 +157,27 @@ sub process {
        delete_computerloadlog_reservation($reservation_id, '!begin');
        
        my $now_epoch_seconds = time;
+       
        my $request_start_epoch_seconds = 
convert_to_epoch_seconds($request_start);
        my $request_end_epoch_seconds = convert_to_epoch_seconds($request_end);
+       
        my $request_remaining_seconds = ($request_end_epoch_seconds - 
$now_epoch_seconds);
        my $request_remaining_minutes = floor($request_remaining_seconds / 60);
+       
        my $request_duration_seconds = ($request_end_epoch_seconds - 
$request_start_epoch_seconds);
        my $request_duration_hours = floor($request_duration_seconds / 60 / 60);
        
        my $end_time_notify_minutes = 10;
        my $end_time_notify_seconds = ($end_time_notify_minutes * 60);
        
+       my $now_string               = strftime('%H:%M:%S', 
localtime($now_epoch_seconds));
+       my $request_end_string       = strftime('%H:%M:%S', 
localtime($request_end_epoch_seconds));
+       my $request_remaining_string = strftime('%H:%M:%S', 
gmtime($request_remaining_seconds));
+       my $end_time_notify_string   = strftime('%H:%M:%S', 
gmtime($end_time_notify_seconds));
+       my $connect_timeout_string   = strftime('%H:%M:%S', 
gmtime($connect_timeout_seconds));
+       
        # Check if near the end time
+       # Compare remaining minutes to connect timeout minutes in case this is 
> 15 minutes
        if ($request_remaining_minutes <= ($end_time_notify_minutes + 6)) {
                # Only 1 reservation needs to handle the end time countdown
                if (!$is_parent_reservation) {
@@ -172,11 +185,6 @@ sub process {
                        exit;
                }
                
-               my $now_string               = strftime('%H:%M:%S', 
localtime($now_epoch_seconds));
-               my $request_end_string       = strftime('%H:%M:%S', 
localtime($request_end_epoch_seconds));
-               my $request_remaining_string = strftime('%H:%M:%S', 
gmtime($request_remaining_seconds));
-               my $end_time_notify_string   = strftime('%H:%M:%S', 
gmtime($end_time_notify_seconds));
-               
                my $sleep_seconds = ($request_remaining_seconds - 
$end_time_notify_seconds);
                if ($sleep_seconds > 0) {
                        my $sleep_string = strftime('%H:%M:%S', 
gmtime($sleep_seconds));
@@ -218,7 +226,7 @@ sub process {
                        # Check if the user extended the request
                        if ($current_request_end_epoch_seconds > 
$request_end_epoch_seconds) {
                                notify($ERRORS{'OK'}, 0, "user extended 
request, end time: $request_end --> $current_request_end, returning request to 
inuse state");
-                               update_request_state($request_id, "inuse", 
"inuse");
+                               switch_state($request_data, 'inuse', 'inuse');
                                exit;
                        }
                        
@@ -241,7 +249,7 @@ sub process {
                        notify($ERRORS{'OK'}, 0, "initiating image auto-capture 
process");
                        if (!$self->_start_imaging_request()) {
                                notify($ERRORS{'CRITICAL'}, 0, "failed to 
initiate image auto-capture process, changing request and computer state to 
maintenance");
-                               update_request_state($request_id, 
'maintenance', 'maintenance');
+                               switch_state($request_data, 'maintenance', 
'maintenance');
                                exit;
                        }
                }
@@ -263,10 +271,27 @@ sub process {
                notify($ERRORS{'DEBUG'}, 0, "skipping end time notice interval 
check, request duration: $request_duration_hours hours, parent reservation: 
$is_parent_reservation");
        }
        
+       # Compare remaining minutes to connect timeout
+       # Connect timeout may be longer than 15 minutes
+       # Make sure connect timeout doesn't run into the end time notice
+       if ($request_remaining_minutes < ($connect_timeout_minutes + 
$end_time_notify_minutes)) {
+               notify($ERRORS{'DEBUG'}, 0, "skipping user connection check, 
connect timeout would run into the end time notice stage:\n" .
+                       "current time     : $now_string\n" .
+                       "request end time : $request_end_string\n" .
+                       "remaining time   : $request_remaining_string\n" .
+                       "notify time      : $end_time_notify_string\n" . 
+                       "connect timeout  : $connect_timeout_string"
+               );
+               switch_state($request_data, 'inuse', 'inuse');
+               exit;
+       }
+       
        # Check if the computer is responding to SSH
+       # Skip connection checks if the computer is not responding to SSH
+       # This prevents a reservatino from timing out if the user is actually 
connected but SSH from the management node isn't working
        if (!$self->os->is_ssh_responding()) {
                notify($ERRORS{'OK'}, 0, "$computer_short_name is not 
responding to SSH, skipping user connection check");
-               update_request_state($request_id, "inuse", "inuse");
+               switch_state($request_data, 'inuse', 'inuse');
                exit;
        }
        
@@ -275,9 +300,7 @@ sub process {
        if ($request_laststate_name ne 'reserved' && 
$self->os->can('firewall_compare_update')) {
                $self->os->firewall_compare_update();
        }
-
-       # Skip connection checks if the computer is not responding to SSH
-       # This prevents a reservatino from timing out if the user is actually 
connected but SSH from the management node isn't working
+       
        # Wait for the user to acknowledge the request by clicking Connect 
button or from API
        if (!$self->code_loop_timeout(sub{$self->user_connected()}, [], 
"waiting for user to connect to $computer_short_name", 
($connect_timeout_minutes*60), 15)) {
                if (!$imagemeta_checkuser || !$request_checkuser) {
@@ -321,7 +344,7 @@ sub process {
                }
        }
        
-       update_request_state($request_id, "inuse", "inuse");
+       switch_state($request_data, 'inuse', 'inuse');
        exit;
 }
 

Modified: vcl/trunk/managementnode/lib/VCL/reserved.pm
URL: 
http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/reserved.pm?rev=1492747&r1=1492746&r2=1492747&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/reserved.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/reserved.pm Thu Jun 13 16:37:14 2013
@@ -94,25 +94,19 @@ sub process {
        my $self = shift;
        
        my $request_id                  = $self->data->get_request_id();
-       my @reservation_ids             = $self->data->get_reservation_ids();
        my $request_data                = $self->data->get_request_data();
        my $request_logid               = $self->data->get_request_log_id();
-       my $request_forimaging          = $self->data->get_request_forimaging;
        my $reservation_id              = $self->data->get_reservation_id();
        my $reservation_count           = $self->data->get_reservation_count();
        my $computer_id                 = $self->data->get_computer_id();
        my $computer_short_name         = 
$self->data->get_computer_short_name();
-       my $imagemeta_checkuser         = 
$self->data->get_imagemeta_checkuser();
-       my $server_request_id           = $self->data->get_server_request_id();
-       my $acknowledge_timeout_seconds = 
$self->data->get_variable('acknowledgetimeout') || 900;
-       my $connect_timeout_seconds     = 
$self->data->get_variable('connecttimeout') || 900;
        my $is_parent_reservation       = $self->data->is_parent_reservation();
+       my $server_request_id           = $self->data->get_server_request_id();
+       my $acknowledge_timeout_seconds = 
$self->data->get_variable('acknowledgetimeout') || 900;
        
        # Update the log loaded time to now for this request
        update_log_loaded_time($request_logid);
        
-       insertloadlog($reservation_id, $computer_id, "reserved", 
"$computer_short_name successfully reserved");
-       
        # Update the computer state to reserved
        # This causes pending to change to the Connect button on the Current 
Reservations page
        update_computer_state($computer_id, 'reserved');
@@ -124,7 +118,6 @@ sub process {
        }
        
        # User acknowledged request
-       
        # Add the cluster information to the loaded computers if this is a 
cluster reservation
        if ($reservation_count > 1 && !update_cluster_info($request_data)) {
                $self->reservation_failed("update_cluster_info failed");
@@ -144,14 +137,23 @@ sub process {
        if ($self->os->can("post_reserve") && !$self->os->post_reserve()) {
                $self->reservation_failed("OS module post_reserve failed");
        }
+
+       # Add a 'reserved' computerloadlog entry
+       # Do this last - important for cluster reservation timing
+       # Parent's reserved process will loop until this exists for all child 
reservations
+       insertloadlog($reservation_id, $computer_id, "reserved", 
"$computer_short_name successfully reserved");
        
        # For cluster reservations, the parent must wait until all child 
reserved processes have exited
        # Otherwise, the state will change to inuse while the child processes 
are still finishing up the reserved state
        # vcld will then fail to fork inuse processes for the child reservations
        if ($reservation_count > 1 && $is_parent_reservation) {
-               if 
(!$self->code_loop_timeout(sub{$self->is_child_process_running()}, [], 'waiting 
for child reserved processes to exit', 3*60, 5)) {
-                       $self->reservation_failed('child reservation reserved 
processes did not exit');
+               if 
(!$self->code_loop_timeout(sub{$self->wait_for_child_reservations()}, [], 
"waiting for child reservation reserved processes to complete", 180, 5)) {
+                       $self->reservation_failed('all child reservation 
reserved processes did not complete');
                }
+               
+               # Parent can't tell if reserved processes on other management 
nodes have terminated
+               # Wait a short time in case processes on other management nodes 
are terminating
+               sleep 3;
        }
        
        # Change the request and computer state to inuse then exit
@@ -160,36 +162,66 @@ sub process {
 
 #/////////////////////////////////////////////////////////////////////////////
 
-=head2 is_child_process_running
+=head2 wait_for_child_reservations
 
  Parameters  : none
  Returns     : boolean
- Description : 
+ Description : Checks if all child reservation 'reserved' processes have
+               completed.
 
 =cut
 
-sub is_child_process_running {
+sub wait_for_child_reservations {
        my $self = shift;
-       if (ref($self) !~ /VCL::reserved/) {
-               notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called 
as a class method of a VCL::reserved object");
+       my $request_id = $self->data->get_request_id();
+       
+       exit if is_request_deleted($request_id);
+       
+       # Check if 'reserved' computerloadlog entry exists for all reservations
+       my $request_loadstate_names = get_request_loadstate_names($request_id);
+       if (!$request_loadstate_names) {
+               notify($ERRORS{'WARNING'}, 0, "failed to retrieve request 
loadstate names");
                return;
        }
        
-       my $request_id = $self->data->get_request_id();
-       my $reservation_id = $self->data->get_reservation_id();
+       my @reserved_exists;
+       my @reserved_does_not_exist;
+       my @failed;
+       for my $reservation_id (keys %$request_loadstate_names) {
+               my @loadstate_names = 
@{$request_loadstate_names->{$reservation_id}};
+               if (grep { $_ eq 'reserved' } @loadstate_names) {
+                       push @reserved_exists, $reservation_id;
+               }
+               else {
+                       push @reserved_does_not_exist, $reservation_id;
+               }
+               
+               if (grep { $_ eq 'failed' } @loadstate_names) {
+                       push @failed, $reservation_id;
+               }
+       }
        
-       my @reservation_ids = $self->data->get_reservation_ids();
-       @reservation_ids = grep { $_ ne $reservation_id} @reservation_ids;
+       # Check if any child reservations failed
+       if (@failed) {
+               $self->reservation_failed("child reservation reserve process 
failed: " . join(', ', @failed));
+       }
        
-       my $pattern = "$request_id:(" . join('|', @reservation_ids) . ")";
-       if (my @pids = is_management_node_process_running($pattern)) {
-               notify($ERRORS{'DEBUG'}, 0, "child processes are running: " . 
join(", ", @pids));
+       if (@reserved_does_not_exist) {
+               notify($ERRORS{'DEBUG'}, 0, "computerloadlog 'reserved' entry 
does NOT exist for all reservations:\n" .
+                       "exists for reservation IDs: " . join(', ', 
@reserved_exists) . "\n" .
+                       "does not exist for reservation IDs: " . join(', ', 
@reserved_does_not_exist)
+               );
                return 0;
        }
        else {
-               notify($ERRORS{'DEBUG'}, 0, "no child processes running");
-               return 1;
+               notify($ERRORS{'DEBUG'}, 0, "computerloadlog 'reserved' entry 
exists for all reservations");
        }
+       
+       # Check if child reservation processes are running
+       return 0 unless $self->is_child_process_running();
+       
+       notify($ERRORS{'DEBUG'}, 0, "all child reservation reserved processes 
have completed");
+       return 1;
 }
 
 #/////////////////////////////////////////////////////////////////////////////

Modified: vcl/trunk/managementnode/lib/VCL/utils.pm
URL: 
http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/utils.pm?rev=1492747&r1=1492746&r2=1492747&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/utils.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/utils.pm Thu Jun 13 16:37:14 2013
@@ -144,10 +144,10 @@ our @EXPORT = qw(
   get_production_imagerevision_info
   get_random_mac_address
   get_request_by_computerid
-  get_request_computerloadstate_names
   get_request_current_state_name
   get_request_end
   get_request_info
+  get_request_loadstate_names
   get_reservation_accounts
   get_resource_groups
   get_managable_resource_groups
@@ -8597,7 +8597,7 @@ sub reservations_ready {
 
 #/////////////////////////////////////////////////////////////////////////////
 
-=head2 get_request_computerloadstate_names
+=head2 get_request_loadstate_names
 
  Parameters  :  $request_id
  Returns     :  hash reference
@@ -8609,7 +8609,7 @@ sub reservations_ready {
 
 =cut
 
-sub get_request_computerloadstate_names {
+sub get_request_loadstate_names {
        my ($request_id) = @_;
        if (!$request_id) {
                notify($ERRORS{'WARNING'}, 0, "request ID argument was not 
passed");


Reply via email to