Author: arkurth Date: Thu Sep 1 19:24:26 2011 New Revision: 1164221 URL: http://svn.apache.org/viewvc?rev=1164221&view=rev Log: VCL-500 Added loop to new.pm::process for tomaintenance requests if the computer is being used. This is likely for tomaintenance requests scheduled in the future due to the automatic reload occurring after the last reservation is complete.
Reworked computer_not_being_used subroutine to check for tomaintenance requests and several other conditions. Added code to make sure a competing reservation doesn't have any Semaphore objects open before killing the competing process for safety. Updated utils.pm::get_request_by_computerid to return a hash reference formatted similar to the other get_* subroutines. Updated calls in new.pm and VMware.pm. Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module.pm incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/VMware/VMware.pm incubator/vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm incubator/vcl/trunk/managementnode/lib/VCL/new.pm incubator/vcl/trunk/managementnode/lib/VCL/utils.pm Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module.pm?rev=1164221&r1=1164220&r2=1164221&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/Module.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/Module.pm Thu Sep 1 19:24:26 2011 @@ -169,7 +169,7 @@ sub new { next if ($arg_key eq 'data_structure'); $self->{$arg_key} = $args->{$arg_key}; - notify($ERRORS{'DEBUG'}, 0, "set '$arg_key' key for $class object from arguments"); + #notify($ERRORS{'DEBUG'}, 0, "set '$arg_key' key for $class object from arguments"); } # Bless the object as the class which new was called with @@ -347,7 +347,7 @@ sub create_mn_os_object { # Check if an OS object has already been stored in the calling object if ($ENV{mn_os}) { my $address = sprintf('%x', $ENV{mn_os}); - notify($ERRORS{'DEBUG'}, 0, "management node OS object has already been created, address: $address, returning 1"); + #notify($ERRORS{'DEBUG'}, 0, "management node OS object has already been created, address: $address, returning 1"); return 1; } @@ -813,14 +813,14 @@ sub code_loop_timeout { Examples: Semaphore is released when it is undefined: - my $semaphore = $self->get_semaphore('/tmp/test.lock'); + my $semaphore = $self->get_semaphore('test'); ... <exclusive lock is in place> undef $semaphore; ... <exclusive lock released> Semaphore is released when it goes out of scope: if (blah) { - my $semaphore = $self->get_semaphore('/tmp/test.lock'); + my $semaphore = $self->get_semaphore('test'); ... <exclusive lock is in place> } ... <exclusive lock released> @@ -841,10 +841,6 @@ sub get_semaphore { return; } - $semaphore_id =~ s/\W+/-/g; - $semaphore_id =~ s/(^-|-$)//g; - my $file_path = "/tmp/$semaphore_id.lock"; - # Attempt to create a new semaphore object my $semaphore = VCL::Module::Semaphore->new({'data_structure' => $self->data}); if (!$semaphore) { @@ -853,14 +849,14 @@ sub get_semaphore { } # Attempt to open and exclusively lock the file - if ($semaphore->get_lockfile($file_path, $total_wait_seconds, $attempt_delay_seconds)) { + if ($semaphore->get_lockfile($semaphore_id, $total_wait_seconds, $attempt_delay_seconds)) { # Return the semaphore object my $address = sprintf('%x', $semaphore); - notify($ERRORS{'DEBUG'}, 0, "created Semaphore object, memory address: $address"); + notify($ERRORS{'DEBUG'}, 0, "created '$semaphore_id' Semaphore object, memory address: $address"); return $semaphore; } else { - notify($ERRORS{'DEBUG'}, 0, "failed to open and optain exclusive lock on file: $file_path"); + notify($ERRORS{'DEBUG'}, 0, "failed to create '$semaphore_id' Semaphore object"); return; } } Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/VMware/VMware.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/VMware/VMware.pm?rev=1164221&r1=1164220&r2=1164221&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/VMware/VMware.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/VMware/VMware.pm Thu Sep 1 19:24:26 2011 @@ -2371,12 +2371,13 @@ sub reclaim_vmhost_disk_space { # Check if any reservations have been assigned to the computer - my %computer_requests = get_request_by_computerid($check_computer_id); + my $computer_requests = get_request_by_computerid($check_computer_id); + # Remove the ID for the current reservation - delete $computer_requests{$reservation_id}; - if (%computer_requests) { - notify($ERRORS{'DEBUG'}, 0, "$vmx_file_name can't be deleted because it is assigned to another reservation: " . join(", ", sort keys(%computer_requests))); - $vmx_files->{$vmx_file_path}{reservations} = [sort keys(%computer_requests)]; + delete $computer_requests->{$reservation_id}; + if (!keys(%$computer_requests)) { + notify($ERRORS{'DEBUG'}, 0, "$vmx_file_name can't be deleted because it is assigned to another reservation: " . join(", ", sort keys(%$computer_requests))); + $vmx_files->{$vmx_file_path}{reservations} = [sort keys(%$computer_requests)]; $vmx_files->{$vmx_file_path}{deletable} = 0; next; } Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm?rev=1164221&r1=1164220&r2=1164221&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm Thu Sep 1 19:24:26 2011 @@ -25,7 +25,7 @@ VCL::Module::Semaphore - VCL module to c =head1 SYNOPSIS my $semaphore = VCL::Module::Semaphore->new({data_structure => $self->data}); - $semaphore->get_lockfile($file_path, $total_wait_seconds, $attempt_delay_seconds); + $semaphore->get_lockfile($semaphore_id, $total_wait_seconds, $attempt_delay_seconds); =head1 DESCRIPTION @@ -66,6 +66,30 @@ use VCL::utils; ############################################################################## +=head1 CLASS VARIABLES + +=cut + +=head2 $LOCKFILE_DIRECTORY_PATH + + Data type : String + Description : Location on the management node of the lockfiles are stored. + +=cut + +our $LOCKFILE_DIRECTORY_PATH = "/tmp"; + +=head2 $LOCKFILE_EXTENSION + + Data type : String + Description : File extension to be used for lockfiles. + +=cut + +our $LOCKFILE_EXTENSION = "semaphore"; + +############################################################################## + =head1 OBJECT METHODS =cut @@ -74,7 +98,7 @@ use VCL::utils; =head2 get_lockfile - Parameters : $file_path, $total_wait_seconds (optional), $attempt_delay_seconds (optional) + Parameters : $semaphore_id, $total_wait_seconds (optional), $attempt_delay_seconds (optional) Returns : filehandle Description : Attempts to open and obtain an exclusive lock on the file specified by the file path argument. If unable to obtain an @@ -92,13 +116,18 @@ sub get_lockfile { return; } - # Get the file path argument - my ($file_path, $total_wait_seconds, $attempt_delay_seconds) = @_; - if (!$file_path) { - notify($ERRORS{'WARNING'}, 0, "file path argument was not supplied"); + # Get the semaphore ID argument + my ($semaphore_id, $total_wait_seconds, $attempt_delay_seconds) = @_; + if (!$semaphore_id) { + notify($ERRORS{'WARNING'}, 0, "semaphore ID argument was not supplied"); return; } + $semaphore_id =~ s/\W+/-/g; + $semaphore_id =~ s/(^-|-$)//g; + + my $file_path = "$LOCKFILE_DIRECTORY_PATH/$semaphore_id.$LOCKFILE_EXTENSION"; + # Set the wait defaults if not supplied as arguments $total_wait_seconds = 30 if !defined($total_wait_seconds); $attempt_delay_seconds = 5 if !$attempt_delay_seconds; @@ -148,6 +177,9 @@ sub open_lockfile { # Truncate and print the process information to the file $file_handle->truncate(0); print $file_handle "$$ $0\n"; + $file_handle->setpos($file_handle->getpos()); + + notify($ERRORS{'DEBUG'}, 0, "wrote to file: $file_path, contents:\n '$$ $0'"); $self->{file_handles}{$file_path} = $file_handle; return $file_handle; @@ -258,6 +290,70 @@ sub release_lockfile { #///////////////////////////////////////////////////////////////////////////// +=head2 get_reservation_semaphore_ids + + Parameters : $reservation_id + Returns : array + Description : Returns the Semaphore IDs opened by the reservation specified by + the argument. An empty list is returned if no Semaphores are + open. + +=cut + +sub get_reservation_semaphore_ids { + my $self = shift; + unless (ref($self) && $self->isa('VCL::Module')) { + notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); + return; + } + + my $reservation_id = shift || $self->data->get_reservation_id(); + if (!$reservation_id) { + notify($ERRORS{'WARNING'}, 0, "reservation ID argument was not supplied"); + return; + } + + my @lockfile_paths = $self->mn_os->find_files($LOCKFILE_DIRECTORY_PATH, "*.$LOCKFILE_EXTENSION"); + if (!@lockfile_paths) { + notify($ERRORS{'DEBUG'}, 0, "did not find any lockfiles on this management node"); + return (); + } + + my @reservation_semaphore_ids; + + for my $lockfile_path (@lockfile_paths) { + my ($semaphore_id) = $lockfile_path =~ /([^\/]+)\.$LOCKFILE_EXTENSION/; + + my @lockfile_contents = $self->mn_os->get_file_contents($lockfile_path); + if (!@lockfile_contents) { + notify($ERRORS{'WARNING'}, 0, "failed to retrieve contents of lockfile: $lockfile_path"); + next; + } + + my $lockfile_line = $lockfile_contents[0]; + + # Line should contain a string similar to this: + # 31862 vclark 2376:3116 tomaintenance vclv1-42>vclh3-12.hpc.ncsu.edu vmwarewinxp-base234-v14 admin + my ($lockfile_reservation_id) = $lockfile_line =~ / \d+:(\d+) /; + + if (!defined($lockfile_reservation_id)) { + notify($ERRORS{'WARNING'}, 0, "failed to determine reservation ID from 1st line in $lockfile_path: '$lockfile_line'"); + next; + } + + if ($lockfile_reservation_id == $reservation_id) { + notify($ERRORS{'DEBUG'}, 0, "semaphore '$semaphore_id' belongs to reservation $reservation_id"); + push @reservation_semaphore_ids, $semaphore_id; + } + else { + notify($ERRORS{'DEBUG'}, 0, "semaphore '$semaphore_id' does NOT belong to reservation $reservation_id"); + } + } + return @reservation_semaphore_ids; +} + +#///////////////////////////////////////////////////////////////////////////// + =head2 DESTROY Parameters : none Modified: incubator/vcl/trunk/managementnode/lib/VCL/new.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/new.pm?rev=1164221&r1=1164220&r2=1164221&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/new.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/new.pm Thu Sep 1 19:24:26 2011 @@ -65,6 +65,7 @@ use 5.008000; use strict; use warnings; use diagnostics; +use English '-no_match_vars'; use VCL::utils; @@ -127,18 +128,32 @@ sub process { notify($ERRORS{'OK'}, 0, "$computer_short_name is not being used"); } elsif ($request_state_name eq 'tomaintenance') { - notify($ERRORS{'CRITICAL'}, 0, "$computer_short_name could not be put into maintenance because it is NOT available"); + # Computer is being used + # Loop until computer is not being used - # Return request state back to the original - if (update_request_state($request_id, 'failed', $request_state_name)) { - notify($ERRORS{'OK'}, 0, "request state set to 'failed'/'$request_state_name'"); - } - else { - notify($ERRORS{'WARNING'}, 0, "failed to set request state back to 'failed'/'$request_state_name'"); - } + # Wait a maximum of 3 hours + my $total_wait_seconds = (60 * 60 * 3); - notify($ERRORS{'OK'}, 0, "exiting"); - exit; + # Check every 5 minutes + my $attempt_delay_seconds = (60 * 5); + + my $sub_ref = $self->can("computer_not_being_used"); + my $message = "waiting for existing reservations on $computer_short_name to end"; + + if (!$self->code_loop_timeout($sub_ref, [$self], $message, $total_wait_seconds, $attempt_delay_seconds)) { + notify($ERRORS{'CRITICAL'}, 0, "$computer_short_name could not be put into maintenance because it is NOT available"); + + # Return request state back to the original + if (update_request_state($request_id, 'failed', $request_state_name)) { + notify($ERRORS{'OK'}, 0, "request state set to 'failed'/'$request_state_name'"); + } + else { + notify($ERRORS{'WARNING'}, 0, "failed to set request state back to 'failed'/'$request_state_name'"); + } + + notify($ERRORS{'OK'}, 0, "exiting"); + exit; + } } elsif ($request_state_name ne 'new') { # Computer is not available, not a new request (most likely a simple reload) @@ -677,210 +692,194 @@ sub reload_image { =head2 computer_not_being_used - Parameters : - Returns : - Description : + Parameters : none + Returns : boolean + Description : Checks if any other reservations are currently using the + computer. =cut sub computer_not_being_used { my $self = shift; - - my $reservation_id = $self->data->get_reservation_id(); + my $request_id = $self->data->get_request_id(); my $computer_id = $self->data->get_computer_id(); my $computer_short_name = $self->data->get_computer_short_name(); my $computer_state_name = $self->data->get_computer_state_name(); + my $imagerevision_id = $self->data->get_imagerevision_id(); my $image_name = $self->data->get_image_name(); my $image_reloadtime = $self->data->get_image_reload_time(); my $request_state_name = $self->data->get_request_state_name(); - - # Possible computer states: - # available - # deleted - # failed - # inuse - # maintenance - # reloading - # reserved - # vmhostinuse - - notify($ERRORS{'DEBUG'}, 0, "$computer_short_name state is $computer_state_name"); - - # Return 0 if computer state is maintenance or deleted - if ($computer_state_name =~ /^(deleted|maintenance)$/) { + + # Return 0 if computer state is maintenance, deleted, vmhostinuse + if ($computer_state_name =~ /^(deleted|maintenance|vmhostinuse)$/) { notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, its state is $computer_state_name"); return 0; } - - # Check if request is reinstall - if ($request_state_name =~ /^(reinstall)$/) { - notify($ERRORS{'OK'}, 0, "$computer_short_name is to be reinstalled"); - return 1; + + # Warn if computer state isn't available or reload - except for reinstall requests + if ($request_state_name !~ /^(reinstall)$/ && $computer_state_name !~ /^(available|reload)$/) { + notify($ERRORS{'WARNING'}, 0, "$computer_short_name state is $computer_state_name, checking if any conflicting reservations are active"); } - # Check if computer state is available - if ($computer_state_name =~ /^(available|reload)$/) { - notify($ERRORS{'OK'}, 0, "$computer_short_name is available, its state is $computer_state_name"); - return 1; + # Check if there is another request using this machine + # Get a hash containing all of the reservations for the computer + notify($ERRORS{'OK'}, 0, "retrieving info for reservations assigned to $computer_short_name"); + my $competing_request_info = get_request_by_computerid($computer_id); + + # There should be at least 1 request -- the one being processed + if (!$competing_request_info) { + notify($ERRORS{'WARNING'}, 0, "failed to retrieve any requests for computer id=$computer_id, there should be at least 1"); + return; } - # Warn if computer state is failed, proceed to check for neighbor reservations - else { - notify($ERRORS{'WARNING'}, 0, "$computer_short_name state is $computer_state_name, checking if any conflicting requests are active"); + + # Remove the request currently being processed from the hash + delete $competing_request_info->{$request_id}; + + if (!keys(%$competing_request_info)) { + notify($ERRORS{'OK'}, 0, "$computer_short_name is not assigned to any other reservations"); + return 1; } - - # Set variables to control how may attempts are made to wait for an existing inuse reservation to end - my $inuse_loop_attempts = 4; - my $inuse_loop_wait = 30; - - INUSE_LOOP: for (my $inuse_loop_count = 0; $inuse_loop_count < $inuse_loop_attempts; $inuse_loop_count++) { - - # Check if this isn't the first iteration meaning something conflicting was found - if ($inuse_loop_count > 0) { - notify($ERRORS{'OK'}, 0, "attempt $inuse_loop_count/$inuse_loop_attempts: waiting for $inuse_loop_wait seconds before checking neighbor requests again"); - sleep $inuse_loop_wait; + + # Loop through the competing requests + COMPETING_REQUESTS: for my $competing_request_id (sort keys %$competing_request_info) { + my $competing_reservation_id = $competing_request_info->{$competing_request_id}{data}->get_reservation_id(); + my $competing_request_state = $competing_request_info->{$competing_request_id}{data}->get_request_state_name(); + my $competing_request_laststate = $competing_request_info->{$competing_request_id}{data}->get_request_laststate_name(); + my $competing_imagerevision_id = $competing_request_info->{$competing_request_id}{data}->get_imagerevision_id(); + my $competing_request_start = $competing_request_info->{$competing_request_id}{data}->get_request_start_time(); + my $competing_request_end = $competing_request_info->{$competing_request_id}{data}->get_request_end_time(); + + my $competing_request_start_epoch = convert_to_epoch_seconds($competing_request_start); + my $competing_request_end_epoch = convert_to_epoch_seconds($competing_request_end); + + my $now_epoch = time; + + my $competing_request_info_string; + $competing_request_info_string .= "request:reservation ID: $competing_request_id:$competing_reservation_id\n"; + $competing_request_info_string .= "request state: $competing_request_state/$competing_request_laststate\n"; + $competing_request_info_string .= "request start time: $competing_request_start\n"; + $competing_request_info_string .= "request end time: $competing_request_end"; + + notify($ERRORS{'DEBUG'}, 0, "checking reservation assigned to $computer_short_name:\n$competing_request_info_string"); + + # Check for existing image creation requests + if ($competing_request_state =~ /^(image)$/ || $competing_request_laststate =~ /^(image)$/) { + notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, it is assigned to an existing imaging reservation:\n$competing_request_info_string"); + return 0; } - - # Check if there is another request using this machine - # Get a hash containing all of the reservations for the computer - notify($ERRORS{'OK'}, 0, "checking neighbor reservations for $computer_short_name"); - my %neighbor_requests = get_request_by_computerid($computer_id); - - # There should be at least 1 request -- the one being processed - if (!%neighbor_requests) { - notify($ERRORS{'WARNING'}, 0, "failed to retrieve any requests for computer id=$computer_id, there should be at least 1"); - return; + + # Check for any requests in the maintenance state + if ($competing_request_state =~ /^(maintenance)$/) { + notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, it is assigned to an existing request in the '$competing_request_state' state:\n$competing_request_info_string"); + return 0; } - - notify($ERRORS{'OK'}, 0, "found " . scalar keys(%neighbor_requests) . " total reservations for $computer_short_name"); - - # Loop through the neighbor requests - NEIGHBOR_REQUESTS: foreach my $neighbor_request_key (keys %neighbor_requests) { - my $neighbor_request_id = $neighbor_requests{$neighbor_request_key}{requestid}; - my $neighbor_reservation_id = $neighbor_requests{$neighbor_request_key}{reservationid}; - my $neighbor_state_name = $neighbor_requests{$neighbor_request_key}{currentstate}; - my $neighbor_laststate_name = $neighbor_requests{$neighbor_request_key}{laststate}; - my $neighbor_request_start = $neighbor_requests{$neighbor_request_key}{requeststart}; - - my $neighbor_request_start_epoch = convert_to_epoch_seconds($neighbor_request_start); - my $now_epoch = time(); - my $neighbor_start_diff = $neighbor_request_start_epoch - $now_epoch; - - # Ignore the request currently being processed and any complete requests - if ($neighbor_reservation_id == $reservation_id) { - next NEIGHBOR_REQUESTS; - } - - notify($ERRORS{'DEBUG'}, 0, "checking neighbor request=$neighbor_request_id, reservation=$neighbor_reservation_id, state=$neighbor_state_name, laststate=$neighbor_laststate_name"); - notify($ERRORS{'DEBUG'}, 0, "neighbor start time: $neighbor_request_start ($neighbor_start_diff)"); - - # Ignore any complete requests - if ($neighbor_state_name eq "complete") { - notify($ERRORS{'OK'}, 0, "neighbor request is complete: id=$neighbor_request_id, state=$neighbor_state_name"); - next NEIGHBOR_REQUESTS; - } - - # Check for overlapping reservations which user is involved or image is being created - # Don't check for state = new, it could be a future reservation - if ($neighbor_state_name =~ /^(maintenance|reserved|inuse|image)$/) { - notify($ERRORS{'WARNING'}, 0, "detected overlapping reservation on $computer_short_name: req=$neighbor_request_id, res=$neighbor_reservation_id, request state=$neighbor_state_name, laststate=$neighbor_laststate_name, computer state=$computer_state_name"); + + # Ignore 'complete', 'failed' requests + if ($competing_request_state =~ /^(complete|failed)$/) { + notify($ERRORS{'DEBUG'}, 0, "ignoring request in state: $competing_request_state/$competing_request_laststate"); + next COMPETING_REQUESTS; + } + + # Check if the other reservation assigned to computer hasn't started yet + if ($competing_request_start_epoch > $now_epoch) { + # If they overlap, let the other reservation worry about it + notify($ERRORS{'OK'}, 0, "request $competing_request_id:$competing_reservation_id start time is in the future: $competing_request_start"); + next COMPETING_REQUESTS; + } + + # Check if the other reservation is a 'reload' reservation for the same image revision + if ($competing_imagerevision_id eq $imagerevision_id && $competing_request_state eq 'pending' && $competing_request_laststate =~ /(reload)/) { + notify($ERRORS{'OK'}, 0, "reservation $competing_reservation_id is currently loading $computer_short_name with the correct image: $image_name, waiting for the other reload process to complete"); + + my $message = "reload reservation $competing_request_id:$competing_reservation_id is still loading $computer_short_name with $image_name"; + my $total_wait_seconds = (60 * $image_reloadtime); + my $attempt_delay_seconds = 30; + + # Loop until other process is done + if ($self->code_loop_timeout(sub{return !reservation_being_processed(@_)}, [$competing_reservation_id], $message, $total_wait_seconds, $attempt_delay_seconds)) { + notify($ERRORS{'DEBUG'}, 0, "reload reservation $competing_reservation_id finished loading $computer_short_name with $image_name"); + + # Call this subroutine again in order to retrieve a current list of competing reservations + # The list of competing reservations may have changed while waiting + notify($ERRORS{'OK'}, 0, "calling this subroutine again to retrieve the current list of competing reservations assigned to $computer_short_name"); + return $self->computer_not_being_used(); + } + else { + notify($ERRORS{'WARNING'}, 0, "reload reservation $competing_reservation_id has NOT finished loading $computer_short_name with $image_name, waited $total_wait_seconds seconds"); + } + } + + # Check if the other reservation assigned to computer end time has been reached + # -or- + # Reload reservation -- either for a different image or the previous check loop monitoring the reload process for the same image timed out + # + if ($competing_request_end_epoch <= $now_epoch || + ($competing_request_state =~ /(timeout|deleted|reload)/) || + ($competing_request_state eq 'pending' && $competing_request_laststate =~ /(timeout|deleted|reload)/)) { + + # Update the competing request state to complete + # If this fails, check if the competing request has already been deleted + # Do this before checking if the reservation is being processed to prevent new processes from being created + if (update_request_state($competing_request_id, "complete", ($competing_request_state eq 'pending') ? $competing_request_laststate : $competing_request_state)) { + notify($ERRORS{'OK'}, 0, "request state set to 'complete' for competing reservation $competing_reservation_id"); + } + elsif (is_request_deleted($competing_request_id)) { + notify($ERRORS{'OK'}, 0, "request state not set to 'complete' for competing reservation $competing_reservation_id because request has been deleted"); + } + else { + notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, failed to set request state to 'complete', competing request has NOT been deleted:\n$competing_request_info_string"); return 0; } - - # Check for other currently pending requests - elsif ($neighbor_state_name eq "pending") { - - # Make sure neighbor request process is actually running - my $neighbor_process_count = checkonprocess($neighbor_laststate_name, $neighbor_request_id); - if ($neighbor_process_count) { - notify($ERRORS{'OK'}, 0, "detected neighbor request $neighbor_request_id is active"); - } - elsif ($neighbor_process_count == 0) { - notify($ERRORS{'OK'}, 0, "detected neighbor request $neighbor_request_id is NOT active, setting its state to 'complete'"); - # Process was not found, set neighbor request to complete - if (update_request_state($neighbor_request_id, "complete", $neighbor_laststate_name)) { - notify($ERRORS{'OK'}, 0, "neighbor request $neighbor_request_id state set to 'complete'"); - } - else { - notify($ERRORS{'WARNING'}, 0, "failed to set neighbor request $neighbor_request_id state to 'complete'"); - } - # Check other neighbor requests - next NEIGHBOR_REQUESTS; - } ## end elsif ($neighbor_process_count == 0) [ if ($neighbor_process_count) - else { - # Undefined was returned from checkonprocess(), meaning error occurred - notify($ERRORS{'CRITICAL'}, 0, "error occurred while checking if neighbor request $neighbor_request_id process is running"); - - # Wait then try again - next INUSE_LOOP; - } - - # Check for state = pending and laststate = new, reserved, inuse, or image - # Just return 0 for these, don't bother waiting - if ($neighbor_laststate_name =~ /^(new|reserved|inuse|image)$/) { - notify($ERRORS{'WARNING'}, 0, "detected overlapping reservation on $computer_short_name: req=$neighbor_request_id, res=$neighbor_reservation_id, request state=$neighbor_state_name, laststate=$neighbor_laststate_name, computer state=$computer_state_name"); + + # Check if the other reservation is still being processed + if (reservation_being_processed($competing_reservation_id)) { + notify($ERRORS{'OK'}, 0, "reservation $competing_reservation_id is currently being processed, making sure the process doesn't have any Semaphore objects open before attempting to kill it"); + + # Create a Semaphore object and check if the competing process owns any of its own Semaphore objects + # This would indicate it's doing something such as retrieving an image + # Don't kill it or a partial image may be copied + my $semaphore = VCL::Module::Semaphore->new(); + if ($semaphore->get_reservation_semaphore_ids($competing_reservation_id)) { + notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, reservation $competing_reservation_id is still being processed and owns a Semaphore object, not killing the competing process, it may be transferring an image:\n$competing_request_info_string"); return 0; } - - # Neighbor request state is pending and process is actively running - # Neighbor request state should be deleted|timeout|reload|reclaim - if ($neighbor_laststate_name !~ /^(deleted|timeout|reload|reclaim)$/) { - notify($ERRORS{'WARNING'}, 0, "unexpected neighbor request laststate: $neighbor_laststate_name"); - } - - # Computer should be loading - if (monitorloading($neighbor_reservation_id, $image_name, $computer_id, $computer_short_name, $image_reloadtime)) { - # Returns 1 if specified image has been successfully loaded - # Returns 0 if another image is being loaded or if loading fails - notify($ERRORS{'OK'}, 0, "$image_name should have been loaded on $computer_short_name by reservation $neighbor_reservation_id"); - - # Check other neighbor requests - next NEIGHBOR_REQUESTS; - } - - # Computer is not being loaded with the correct image or loading failed - # Take evasive action - recheck on neighbor process - if (checkonprocess($neighbor_laststate_name, $neighbor_request_id)) { - notify($ERRORS{'OK'}, 0, "neighbor request=$neighbor_request_id, reservation=$neighbor_reservation_id owning $computer_short_name is not loading correct image or taking too long, attempting to kill process for reservation $neighbor_reservation_id"); - - # Kill competing neighbor process - set it's state to complete - if (kill_reservation_process($neighbor_reservation_id)) { - notify($ERRORS{'OK'}, 0, "killed competing process for reservation $neighbor_reservation_id"); - } - else { - notify($ERRORS{'WARNING'}, 0, "failed to kill competing process for reservation $neighbor_reservation_id"); - } - } ## end if (checkonprocess($neighbor_laststate_name... - - # Either neighbor process was not found or competing process was just killed - # Set neighbor request to complete - if (update_request_state($neighbor_request_id, "deleted", $neighbor_laststate_name)) { - notify($ERRORS{'OK'}, 0, "neighbor request $neighbor_request_id state set to 'deleted'"); - # Check other neighbor requests - next NEIGHBOR_REQUESTS; + + # Kill competing process and update request state to complete + notify($ERRORS{'OK'}, 0, "attempting to kill process of competing reservation $competing_reservation_id assigned to $computer_short_name"); + if (kill_reservation_process($competing_reservation_id)) { + notify($ERRORS{'OK'}, 0, "killed process for competing reservation $competing_reservation_id"); } - else { - notify($ERRORS{'WARNING'}, 0, "failed to set neighbor request $neighbor_request_id state to 'deleted'"); + + # Wait for competing process to end before verifying that it was successfully killed + sleep 2; + + # Verify that the competing reservation process was killed + if (reservation_being_processed($competing_reservation_id)) { + notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, failed to kill process for competing reservation, competing reservation is still being processed:\n$competing_request_info_string"); + return 0; } - } ## end elsif ($neighbor_state_name eq "pending") [ if ($neighbor_state_name =~ /^(reserved|inuse|image)$/) - - # Check for other requests - else { - notify($ERRORS{'OK'}, 0, "neighbor request state is OK: $neighbor_state_name/$neighbor_laststate_name"); } - - } ## end foreach my $neighbor_request_key (keys %neighbor_requests) - - # Checked all neighbor requests and didn't find any conflicting reservations - notify($ERRORS{'OK'}, 0, "checked neighbor requests and didn't find any conflicting reservations for $computer_short_name"); - return 1; - - } ## end for (my $inuse_loop_count = 0; $inuse_loop_count... - - # Checked all neighbor requests several times and find something conflicting every time - notify($ERRORS{'WARNING'}, 0, "$computer_short_name does not appear to be available"); - return 0; - -} ## end sub computer_not_being_used + + # Call this subroutine again in order to retrieve a current list of competing reservations + # The list of competing reservations may have changed + # A new reload reservation may have been added by timeout/deleted processes + notify($ERRORS{'OK'}, 0, "calling this subroutine again to retrieve the current list of competing reservations assigned to $computer_short_name"); + return $self->computer_not_being_used(); + } + elsif (reservation_being_processed($competing_reservation_id)) { + notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, assigned overlapping reservations, competing reservation is currently being processed:\n$competing_request_info_string"); + return 0; + } + else { + notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, assigned overlapping reservations, competing reservation is NOT currently being processed:\n$competing_request_info_string"); + return 0; + } + } + + # Checked all competing requests and didn't find any conflicting reservations + notify($ERRORS{'OK'}, 0, "$computer_short_name is available, did not find any conflicting reservations"); + return 1; +} #///////////////////////////////////////////////////////////////////////////// Modified: incubator/vcl/trunk/managementnode/lib/VCL/utils.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/utils.pm?rev=1164221&r1=1164220&r2=1164221&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/utils.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/utils.pm Thu Sep 1 19:24:26 2011 @@ -4578,10 +4578,10 @@ sub get_request_info { $not_standalone_list = $ENV{management_node_info}{NOT_STANDALONE}; } if (grep(/$request_info{user}{affiliation}{name}/, split(/,/, $not_standalone_list))) { - notify($ERRORS{'DEBUG'}, 0, "non-standalone affiliation found: $request_info{user}{affiliation}{name}"); + #notify($ERRORS{'DEBUG'}, 0, "non-standalone affiliation found: $request_info{user}{affiliation}{name}"); } else { - notify($ERRORS{'DEBUG'}, 0, "standalone affiliation found: $request_info{user}{affiliation}{name}"); + #notify($ERRORS{'DEBUG'}, 0, "standalone affiliation found: $request_info{user}{affiliation}{name}"); $request_info{user}{STANDALONE} = 1; } @@ -4590,7 +4590,7 @@ sub get_request_info { $request_info{user}{STANDALONE} = 1; notify($ERRORS{'OK'}, 0, "found NULL uid setting standalone flag: $request_info{user}{unityid}, uid: NULL"); } - + # Fix the unityid if if the user's UID is >= 1000000 # Remove the domain section if the user's unityid contains @... if(defined($request_info{user}{uid})) { @@ -5415,7 +5415,7 @@ EOF sub run_ssh_command { my ($node, $identity_paths, $command, $user, $port, $output_level, $timeout_seconds) = @_; - + my $max_attempts = 3; if (ref($_[0]) eq 'HASH') { @@ -6543,33 +6543,29 @@ sub get_request_end { sub get_request_by_computerid { my ($computer_id) = @_; - my ($package, $filename, $line, $sub) = caller(0); - # Check the passed parameter - if (!(defined($computer_id))) { - notify($ERRORS{'WARNING'}, 0, "computer ID was not specified"); - return (); + if (!defined($computer_id)) { + notify($ERRORS{'WARNING'}, 0, "computer ID argument was not specified"); + return } # Create the select statement - my $select_statement = " - SELECT DISTINCT - res.id AS reservationid, - s.name AS currentstate, - ls.name AS laststate, - req.id AS requestid, - req.start AS requeststart - FROM - request req,reservation res,state s,state ls - WHERE - req.stateid=s.id AND - req.laststateid = ls.id AND - req.id=res.requestid AND - res.computerid = $computer_id + my $select_statement = <<EOF; +SELECT DISTINCT +request.id AS request_id, +reservation.id AS reservation_id - ORDER BY - res.id - "; +FROM +request, +reservation + +WHERE +request.id = reservation.requestid +AND reservation.computerid = $computer_id + +ORDER BY +reservation.id +EOF # Call the database select subroutine # This will return an array of one or more rows based on the select statement @@ -6577,26 +6573,36 @@ sub get_request_by_computerid { # Check to make sure 1 row was returned if (scalar @selected_rows == 0) { - notify($ERRORS{'OK'}, 0, "zero rows were returned from database select $computer_id"); + notify($ERRORS{'OK'}, 0, "$computer_id is not assigned to any reservations"); return (); } - my %returnhash; + my $computer_request_info; # It contains a hash - for (@selected_rows) { - my %reservation_row = %{$_}; - # Grab the reservation ID to make the code a little cleaner - my $reservation_id = $reservation_row{reservationid}; - $returnhash{$reservation_id}{"reservationid"} = $reservation_id; - $returnhash{$reservation_id}{"currentstate"} = $reservation_row{currentstate}; - $returnhash{$reservation_id}{"laststate"} = $reservation_row{laststate}; - $returnhash{$reservation_id}{"requestid"} = $reservation_row{requestid}; - $returnhash{$reservation_id}{"requeststart"} = $reservation_row{requeststart}; - } ## end for (@selected_rows) + for my $row (@selected_rows) { + my $request_id = $row->{request_id}; + my $reservation_id = $row->{reservation_id}; + + my %request_info = get_request_info($request_id); + if (!%request_info) { + notify($ERRORS{'CRITICAL'}, 0, "failed to retrieve request info, request ID: $request_id"); + return; + } + + my $data_structure; + eval {$data_structure = new VCL::DataStructure({request_data => \%request_info, reservation_id => $reservation_id});}; + if (my $exception = Exception::Class::Base->caught()) { + notify($ERRORS{'CRITICAL'}, 0, "unable to create DataStructure object" . $exception->message); + return; + } + + notify($ERRORS{'DEBUG'}, 0, "retrieved info and DataStructure object for $request_id:$reservation_id"); + $computer_request_info->{$request_id}{data} = $data_structure; + } - return %returnhash; -} ## end sub get_request_by_computerid + return $computer_request_info; +} #/////////////////////////////////////////////////////////////////////////////