Author: arkurth
Date: Fri Jan 30 17:21:21 2009
New Revision: 739338

URL: http://svn.apache.org/viewvc?rev=739338&view=rev
Log:
VCL-56
Changed 'use English;' to 'use English qw( -no_match_vars );' in vcld. 
Documentation for the English module states this improves performance.

VCL-71
Reworked REAPER and HUNTSMAN in vcld. HUNTSMAN was killing all child processes 
under some circumstances. If kill is issued without -9, any VCL process would 
attempt to kill all of the child processes of the parent vcld process. I 
removed this capability all together for safety. Kill or kill -9 now only kills 
the specified process. REAPER was modified to correctly handle the exit status 
of the process which was reaped. It had not been setting $? correctly if wait() 
reaped a process. I also changed the wait() command to waitpid() per 
recommendations in the Perl books.

VCL-72
Added die_handler and warning_handler subroutines. These catch die and warning 
signals which weren't caught at all before. A die signal could happen if any 
part of the code attempted to call an undefined subroutine. When this occurred, 
the process would exit without sending a notification via notify(). The 
die_handler subroutine causes a critical message to be sent. The 
warning_handler subroutine causes warning messages to be logged via notify() 
when a notify signal occurs.

Made some other minor spelling corrections to vcld.

Modified:
    incubator/vcl/trunk/managementnode/bin/vcld

Modified: incubator/vcl/trunk/managementnode/bin/vcld
URL: 
http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/bin/vcld?rev=739338&r1=739337&r2=739338&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/bin/vcld (original)
+++ incubator/vcl/trunk/managementnode/bin/vcld Fri Jan 30 17:21:21 2009
@@ -56,7 +56,7 @@
 use Symbol;
 use POSIX;
 use Getopt::Long;
-use English;
+use English qw( -no_match_vars );
 
 use VCL::utils;
 use VCL::DataStructure;
@@ -86,6 +86,8 @@
 $SIG{QUIT} = \&HUNTSMAN;
 $SIG{HUP}  = \&HUNTSMAN;
 $SIG{TERM} = \&HUNTSMAN;
+$SIG{__WARN__} = \&warning_handler;
+$SIG{__DIE__} = \&die_handler;
 
 # Call main subroutine
 &main();
@@ -126,7 +128,7 @@
                notify($ERRORS{'OK'}, $LOGFILE, "retrieved management node 
information from database");
        }
        else {
-               notify($ERRORS{'CRITICAL'}, $LOGFILE, "unable to retieve 
management node information from database");
+               notify($ERRORS{'CRITICAL'}, $LOGFILE, "unable to retrieve 
management node information from database");
                exit;
        }
 
@@ -172,10 +174,10 @@
                # Get all the requests assigned to this management node
                # get_management_node_requests() gets a subset of the 
information available
                if ($info{request} = 
{get_management_node_requests($management_node_id)}) {
-                       #notify($ERRORS{'DEBUG'}, $LOGFILE, "retieved request 
information for management node $management_node_id");
+                       #notify($ERRORS{'DEBUG'}, $LOGFILE, "retrieved request 
information for management node $management_node_id");
                }
                else {
-                       notify($ERRORS{'CRITICAL'}, $LOGFILE, "could not 
retieve request information for management node $management_node_id");
+                       notify($ERRORS{'CRITICAL'}, $LOGFILE, "could not 
retrieve request information for management node $management_node_id");
                }
 
                # See if there's anything to do
@@ -268,17 +270,21 @@
                                }
                                
                                # Make sure reservation is not currently being 
processed
-                               if 
(reservation_being_processed($reservation_id)) {
+                               my $being_processed = 
reservation_being_processed($reservation_id);
+                               if ($being_processed && $request_state_name ne 
'deleted') {
                                        notify($ERRORS{'WARNING'}, $LOGFILE, 
"reservation $reservation_id is already being processed");
                                        next RESERVATION;
                                }
+                               elsif ($being_processed) {
+                                       notify($ERRORS{'DEBUG'}, $LOGFILE, 
"$request_state_name processing delayed, reservation $reservation_id is 
currently being processed");
+                               }
                                else {
                                        notify($ERRORS{'DEBUG'}, $LOGFILE, 
"reservation $reservation_id is NOT already being processed");
                                }
 
                                # Get the full set of database data for this 
request
                                if (%request_info = 
get_request_info($request_id)) {
-                                       notify($ERRORS{'DEBUG'}, $LOGFILE, 
"retieved request information from database");
+                                       notify($ERRORS{'DEBUG'}, $LOGFILE, 
"retrieved request information from database");
 
                                        # Set request variables that may have 
changed by other processes to their original values
                                        # They may change if this is a cluster 
reservation
@@ -287,7 +293,7 @@
                                        $request_info{preload}         = 
$request_preload;
                                }
                                else {
-                                       notify($ERRORS{'WARNING'}, $LOGFILE, 
"could not retieve request information from database");
+                                       notify($ERRORS{'WARNING'}, $LOGFILE, 
"could not retrieve request information from database");
                                        next RESERVATION;
                                }
 
@@ -566,8 +572,9 @@
                        sigprocmask(SIG_UNBLOCK, $sigset) or die "can't unblock 
SIGINT for fork: $!\n";
                        
                        # Parent process records the child's PID and returns
+                       # Store the reservation ID so REAPER can clean up the 
reservation when it dies
                        $child_count++;
-                       $child_pids{$pid} = 1;
+                       $child_pids{$pid} = $reservation_id;
                        notify($ERRORS{'OK'}, $LOGFILE, "current number of 
forked kids: $child_count");
                        return;
                }
@@ -621,23 +628,78 @@
 
 #/////////////////////////////////////////////////////////////////////////////
 
+=head2 warning_handler
+
+ Parameters  : None
+ Returns     : Nothing
+ Description : Handles __WARN__ signals. This signal is generated when warn()
+               is called. This may occur when the VCL code encounters an
+                                       error such as:
+                                       Use of uninitialized value in 
concatenation (.) or string at
+                                       
+                                       If the signal isn't handled, the 
warning message is dumped
+                                       to STDOUT and will appear in the log 
file. This handler
+                                       causes WARN signals to be logged by the 
notify() subroutine.
+
+=cut
+
+sub warning_handler {
+       # Call notify, passing it a string of whatever is contained in @_
+       notify($ERRORS{'WARNING'}, $LOGFILE, "@_");
+       
+       # Reinstall the signal handler in case of unreliable signals
+       $SIG{__WARN__} = \&warning_handler;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
+=head2 die_handler
+
+ Parameters  : None
+ Returns     : Nothing
+ Description : Handles __DIE__ signals. This signal is generated when die()
+               is called. This may occur when the VCL code encounters an
+                                       error such as:
+                                       Uncaught exception from user code:
+               Undefined subroutine ... called at ...
+                                       
+                                       If the signal isn't handled, the output 
is dumped to STDERR
+                                       and the process exits quietly.
+
+
+=cut
+
+sub die_handler {
+       # Call notify, passing it a string of whatever is contained in @_
+       notify($ERRORS{'CRITICAL'}, $LOGFILE, "@_");
+       
+       # Reinstall the signal handler in case of unreliable signals
+       $SIG{__DIE__} = \&warning_handler;
+       
+       exit;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
 =head2 HUNTSMAN
 
- Parameters  : 
- Returns     : 
- Description :
+ Parameters  : None
+ Returns     : Nothing, process exits
+ Description : Signal handler for:
+               $SIG{INT}
+               $SIG{QUIT}
+               $SIG{HUP}
+               $SIG{TERM}
 
 =cut
 
-sub HUNTSMAN {   
-       # Temporarily override the the SIGCHLD signal handler
-       # Set SIGCHLD handler to IGNORE, meaning nothing happens when a child 
process exits
-       local ($SIG{CHLD}) = 'IGNORE';
+sub HUNTSMAN {
+       my $signal = shift;
        
-       # Send SIGINT to child processes
-       kill 'INT' => keys %child_pids;
+       local ($SIG{CHLD}) = 'IGNORE';
        
-       notify($ERRORS{'OK'}, $LOGFILE, "vcld process exiting, pid=$$");
+       # Display a message and exit
+       notify($ERRORS{'DEBUG'}, 0, "HUNTSMAN called: signal: $signal, pid: 
$PID, process exiting");
        exit;
 }
 
@@ -661,6 +723,11 @@
 =cut
 
 sub REAPER {
+       my $signal = shift;
+       
+       # Don't overwrite current error
+       local $!;
+
        # Save the information saved in $? before proceeding
        # This is done to save the exit status of the child process which died
        # If you don't save it, wait() will overwrite it
@@ -668,30 +735,39 @@
        my $child_exit_status = $? >> 8;
        my $signal_number = $? & 127;
        my $dumped_core = $? & 128;
-       #notify($ERRORS{'DEBUG'}, 0, "\$?: $?, signal: $signal_number, dumped 
core: $dumped_core, child exit status: $child_exit_status");
-       
-       # Configure the REAPER() subroutine to handle SIGCHLD signals
-       $SIG{CHLD} = \&REAPER;
-       
-       # Wait for a child process to terminate
-       # Should have already happened since this subroutine is only called 
when CHLD signals are sent
-       my $dead_pid = wait;
+       #notify($ERRORS{'DEBUG'}, 0, "REAPER called: signal: $signal, initial 
value of \$?: $status_save");
        
-       # Check if the child PID hash contains the pid of the process which 
just died
-       if (exists $child_pids{$dead_pid}) {
-               # Child which died was a VCL state process since its pid is in 
the hash
-               $child_count--;
-               delete $child_pids{$dead_pid};
-               notify($ERRORS{'OK'}, $LOGFILE, "VCL state process exited, 
pid=$dead_pid");
-       }
-       else {
-               # Child which died was some other process
-               #notify($ERRORS{'DEBUG'}, $LOGFILE, "child process exited, 
pid=$dead_pid");
+       # Wait for a child processes to die
+       my $dead_pid = -1;
+       my $wait_pid;
+       while (($wait_pid = waitpid(-1, WNOHANG)) > 0) {
+               $status_save = $?;
+               $child_exit_status = $? >> 8;
+               $signal_number = $? & 127;
+               $dumped_core = $? & 128;
+               $dead_pid = $wait_pid;
+               
+               # Assemble a string containing the dead process info
+               notify($ERRORS{'DEBUG'}, 0, "process reaped: pid: $dead_pid, 
\$?: $status_save, exit status: $child_exit_status");
+               
+               # Check if the child PID hash contains the pid of the process 
which just died
+               if (exists $child_pids{$dead_pid}) {
+                       my $dead_reservation_id = $child_pids{$dead_pid};
+                       notify($ERRORS{'DEBUG'}, 0, "VCL process exited for 
reservation $dead_reservation_id");
+                       
+                       # Child which died was a VCL state process since its 
pid is in the hash
+                       $child_count--;
+                       delete $child_pids{$dead_pid};
+               }
+               
+               # Reinstall the signal handler in case of unreliable signals
+               $SIG{CHLD} = \&REAPER;
        }
        
        # Set the special $? variable back to the exit status of the child 
which died
        # This is useful when utilities such as SSH are run in other places in 
the code
        # The code which called the utility can check the exit status to see if 
it was successful
+       notify($ERRORS{'DEBUG'}, 0, "setting \$? to $status_save, exit status: 
$child_exit_status");
        $? = $status_save;
        
        return;


Reply via email to