Hi,

This patch (already reviewed by Horms) implements a continuous logging
mechanism for real service checks in ldirectord. This is useful for
analyzing when and why real services were down or for statistical purposes.

A log file may be specified for each virtual section using a new
"monitorfile = <filename>" directive. Default is no logging. The log format
is as follows:

<unix timestamp>|<pid>|<real_service_id>|<status>|<received / message>

The "<received / message>" part is service-type specific and may contain the
contents received from the service or an error message. Currently, only
check_http() sets this field to something useful, default is "-". Adding
useful messages for other service checks is encouraged ;)

An example of a log line:
1206443690|16083|negotiate:http:tcp:172.xx.xx.xx
:8001:::1:gate:\/ok\.html:OK|up|OK

Best Regards,
Julius Volz

-- 
Google Switzerland GmbH
diff -r 17c0cf487322 ldirectord/ldirectord.in
--- a/ldirectord/ldirectord.in	Mon Mar 24 16:14:12 2008 +0100
+++ b/ldirectord/ldirectord.in	Wed Mar 26 10:31:32 2008 +0100
@@ -557,6 +557,16 @@ Default:
 
 =back 4
 
+B<monitorfile = ">I</path/to/monitorfile>B<">
+
+File to continuously log the real service checks to for this virtual
+service. This is useful for monitoring when and why real services were down
+or for statistics.
+
+The log format is:
+[timestamp|pid|real_service_id|status|message]
+
+Default: no separate logging of service checks.
 
 =head1 FILES
 
@@ -1305,6 +1315,12 @@ sub read_config
 					$vsrv{emailalertfreq} = $1;
 				} elsif  ($rcmd =~ /^emailalertstatus\s*=\s*(.*)/) {
 					$vsrv{emailalertstatus} = &parse_emailalertstatus($line, $1);
+				} elsif  ($rcmd =~ /^monitorfile\s*=\s*(.*)/) {
+					my $monitorfile = $1;
+					unless (open(MONITORFILE, ">>$monitorfile") and close(MONITORFILE)) {
+						&config_error($line, "unable to open monitorfile $monitorfile: $!");
+					}
+					$vsrv{monitorfile} = $monitorfile;
 				} else {
 					&config_error($line, "Unknown command \"$linedata\"");
 				}
@@ -2133,13 +2149,13 @@ sub ld_start
 			if (! defined($or->{$real_str}) or
 					$or->{$real_str}->{weight} == 0) {
 				$server_down->{$real_str} = [$nv, $nr];
-				#service_set($nv, $nr, "down", "force");
+				#service_set($nv, $nr, "down", {force => 1});
 			}
 			else {
 				if (defined $server_down->{$real_str}) {
 					delete($server_down->{$real_str});
 				}
-				service_set($nv, $nr, "up", "force");
+				service_set($nv, $nr, "up", {force => 1});
 			}
 			delete($or->{$real_str});
 		}
@@ -2156,7 +2172,7 @@ sub ld_start
 
 	for my $k (keys (%$server_down)) {
 	        my $v = $server_down->{$k};
-		service_set(@$v[0], @$v[1], "down", "force");
+		service_set(@$v[0], @$v[1], "down", {force => 1});
 		delete($server_down->{$k});
 		#sleep 5;
 	}
@@ -2291,7 +2307,7 @@ sub run_child
         my $virtual_id = get_virtual_id_str($v);
         $0 = $virtual_id;
         foreach my $r (@$real) {
-                service_set($v, $r, "down", "force");
+                service_set($v, $r, "down", {force => 1});
         }
         while (1) {
                 foreach my $r (@$real) {
@@ -2436,12 +2452,14 @@ sub check_http
 	my $recstr = $$r{receive};
 	if ($res->is_success && (!($recstr =~ /.+/) || 
 				$res->content =~ /$recstr/)) {
-		service_set($v, $r, "up");
+		service_set($v, $r, "up", {do_log => 1}, $res->content);
 		&ld_debug(2, "check_http: $$r{url} is up\n");
 		return $SERVICE_UP;
 	}
 
-	service_set($v, $r, "down");
+	my $log_message = $res->is_success ? $res->content : $res->status_line;
+	service_set($v, $r, "down", {do_log => 1}, $log_message);
+
 	&ld_debug(3, "Headers " .  $res->headers->as_string);
 	&ld_debug(2, "check_http: $$r{url} is down\n");
 	return $SERVICE_DOWN;
@@ -2460,10 +2478,10 @@ sub check_smtp
 			Timeout => $$v{negotiatetimeout});
 	if ($smtp) {
 		$smtp->quit;
-		service_set($v, $r, "up");
-		return $SERVICE_UP;
-	} else {
-		service_set($v, $r, "down");
+		service_set($v, $r, "up", {do_log => 1});
+		return $SERVICE_UP;
+	} else {
+		service_set($v, $r, "down", {do_log => 1});
 		return $SERVICE_DOWN;
 	}
 }
@@ -2485,7 +2503,7 @@ sub check_pop
                                         TIMEOUT => $$v{negotiatetimeout});
 
         if (!$pop) {
-                service_set($v, $r, "down");
+                service_set($v, $r, "down", {do_log => 1});
                 return $SERVICE_UP;
         }
 
@@ -2493,13 +2511,13 @@ sub check_pop
                 my $authres = $pop->login();
                 $pop->close();
                 if (!$authres) {
-                        service_set($v, $r, "down");
+                        service_set($v, $r, "down", {do_log => 1});
                         return $SERVICE_DOWN;
                 }
         }
 
         $pop->close();
-        service_set($v, $r, "up");
+        service_set($v, $r, "up", {do_log => 1});
         return $SERVICE_UP;
 }
 
@@ -2519,7 +2537,7 @@ sub check_pops
                                         DEBUG => 0,
                                         TIMEOUT => $$v{negotiatetimeout});
         if (!$pops) {
-                service_set($v, $r, "down");
+                service_set($v, $r, "down", {do_log => 1});
                 return $SERVICE_DOWN;
         }
 
@@ -2527,14 +2545,14 @@ sub check_pops
                 my $authres = $pops->login();
                 $pops->close();
                 if (!$authres) {
-                        service_set($v, $r, "down");
+                        service_set($v, $r, "down", {do_log => 1});
                         return $SERVICE_DOWN;
                 }
         }
 
 
         $pops->close();
-        service_set($v, $r, "up");
+        service_set($v, $r, "up", {do_log => 1});
         return $SERVICE_UP;
 }
 
@@ -2551,7 +2569,7 @@ sub check_imap
                                         timeout => $$v{negotiatetimeout});
         
         if (!$imap) {
-                service_set($v, $r, "down");
+                service_set($v, $r, "down", {do_log => 1});
                 return $SERVICE_DOWN;
         }
         
@@ -2559,13 +2577,13 @@ sub check_imap
                 my $authres = $imap->login($$v{login},$$v{passwd});
                 $imap->quit;
                 if (!$authres) {
-                        service_set($v, $r, "down");
+                        service_set($v, $r, "down", {do_log => 1});
                         return $SERVICE_DOWN;
                 }
         }
         
         $imap->quit();
-        service_set($v, $r, "up");
+        service_set($v, $r, "up", {do_log => 1});
         return $SERVICE_UP;
 }
 
@@ -2581,7 +2599,7 @@ sub check_imaps
                                         port => $port,
                                         timeout => $$v{negotiatetimeout});
         if (!$imaps) {
-                service_set($v, $r, "down");
+                service_set($v, $r, "down", {do_log => 1});
                 return $SERVICE_DOWN;
         }
 
@@ -2589,13 +2607,13 @@ sub check_imaps
                 my $authres = $imaps->login($$v{login},$$v{passwd});
                 $imaps->quit;
                 if (!$authres) {
-                        service_set($v, $r, "down");
+                        service_set($v, $r, "down", {do_log => 1});
                         return $SERVICE_DOWN;
                 }
         }
 
         $imaps->quit();
-        service_set($v, $r, "up");
+        service_set($v, $r, "up", {do_log => 1});
         return $SERVICE_UP;
 }
 
@@ -2611,7 +2629,7 @@ sub check_ldap
 	my $ldap = Net::LDAP->new("$$r{server}", port => $port,
 					timeout => $$v{negotiatetimeout});
 	if(!$ldap) {
-		service_set($v, $r, "down");
+		service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(4, "Connection failed");
 		return $SERVICE_DOWN;
 	}
@@ -2624,7 +2642,7 @@ sub check_ldap
 		$mesg = $ldap->bind ;
 	}
 	if ($mesg->is_error) {
-		service_set($v, $r, "down");
+		service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(4, "Bind failed");
 		return $SERVICE_DOWN;
 	}
@@ -2637,7 +2655,7 @@ sub check_ldap
 		);
 
 	if($result->count != 1) {
-		service_set($v, $r, "down");
+		service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(2, "Count failed : " . $result->count);
 		return $SERVICE_DOWN;
 	}
@@ -2645,10 +2663,10 @@ sub check_ldap
 	my $href = $result->as_struct;
 	my @arrayOfDNs  = keys %$href ;
 	if (!($recstr =~ /.+/) || $arrayOfDNs[0] =~ /$recstr/) {
-		service_set($v, $r, "up");
-		return $SERVICE_UP;
-	} else {
-		service_set($v, $r, "down");
+		service_set($v, $r, "up", {do_log => 1});
+		return $SERVICE_UP;
+	} else {
+		service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(4,"Message differs : " . ", " . $$r{receive} 
 				. ", " . $arrayOfDNs[0] . ".");
 		return $SERVICE_DOWN;
@@ -2672,20 +2690,20 @@ sub check_nntp
         unless ($sock = IO::Socket::INET->new(PeerAddr => $$r{server},
                 PeerPort => $port, Proto => 'tcp',
                 TimeOut => $$v{negotiatetimeout})) {
-                service_set($v, $r, "down");
+                service_set($v, $r, "down", {do_log => 1});
                 return $SERVICE_DOWN;
         }
         $s = IO::Select->new();
         $s->add($sock);
         if (scalar($s->can_read($$v{negotiatetimeout})) == 0) {
-                service_set($v, $r, "down");
+                service_set($v, $r, "down", {do_log => 1});
         } else {
                 sysread($sock, $buf, 64);
                 if ($buf =~ /^2/) {
-                        service_set($v, $r, "up");
+                        service_set($v, $r, "up", {do_log => 1});
 			$status = 0;
                 } else {
-                        service_set($v, $r, "down");
+                        service_set($v, $r, "down", {do_log => 1});
                 }
         }
         $s->remove($sock);
@@ -2724,12 +2742,12 @@ sub check_radius
                 alarm 0; # Cancel the alarm
         };
         if ($result eq "") {
-                &service_set($v, $r, "down");
+                &service_set($v, $r, "down", {do_log => 1});
                 &ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@");
                 &ld_debug(3, "Radius Error: ".$radius->get_error);
                 return $SERVICE_DOWN;
         } else {
-                &service_set($v, $r, "up");
+                &service_set($v, $r, "up", {do_log => 1});
                 &ld_debug(3, "Activated service $$r{server}:$$r{port}");
                 return $SERVICE_UP;
         }
@@ -2841,7 +2859,7 @@ err_disconnect:
 err_disconnect:
 	$dbh->disconnect();
 err_down:
-	service_set($v, $r, $result == $SERVICE_UP ? "up" : "down");
+	service_set($v, $r, $result == $SERVICE_UP ? "up" : "down", {do_log => 1});
 	return $result;
 }
 
@@ -2867,11 +2885,11 @@ sub check_connect
 		alarm 0; # Cancel the alarm
 	};
 	if ($@) {
-		&service_set($v, $r, "down");
+		&service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@");
 		return $SERVICE_DOWN;
 	} else {
-		&service_set($v, $r, "up");
+		&service_set($v, $r, "up", {do_log => 1});
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return $SERVICE_UP;
 	}
@@ -2899,13 +2917,13 @@ sub check_external
 		$result >>= 8;
 	};
 	if ($@ or $result != 0) {
-		&service_set($v, $r, "down");
+		&service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: " .
 			  "$@ after calling $$v{checkcommand} with result " .
 			  "$result");
 		return 0;
 	} else {
-		&service_set($v, $r, "up");
+		&service_set($v, $r, "up", {do_log => 1});
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return 1;
 	}
@@ -2997,11 +3015,11 @@ sub check_sip
 	};
 
 	if ($@) {
-		&service_set($v, $r, "down");
+		&service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@");
 		return $SERVICE_DOWN;
 	} else {
-		&service_set($v, $r, "up");
+		&service_set($v, $r, "up", {do_log => 1});
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return $SERVICE_UP;
 	}
@@ -3065,11 +3083,11 @@ sub check_simpletcp
        };
 
        if ($@) {
-	       &service_set($v, $r, "down");
+	       &service_set($v, $r, "down", {do_log => 1});
 	       &ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@");
 	       return $SERVICE_DOWN;
        } else {
-	       &service_set($v, $r, "up");
+	       &service_set($v, $r, "up", {do_log => 1});
 	       &ld_debug(3, "Activated service $$r{server}:$$r{port}");
 	       return $SERVICE_UP;
        }
@@ -3113,11 +3131,11 @@ sub check_ftp
 	close TMP;
 
 	if ($memory =~ /$$r{receive}/) {
-		service_set($v, $r, "up");
-		return $SERVICE_UP;
-	}
-
-	service_set($v, $r, "down");
+		service_set($v, $r, "up", {do_log => 1});
+		return $SERVICE_UP;
+	}
+
+	service_set($v, $r, "down", {do_log => 1});
 	return $SERVICE_DOWN;
 }
 
@@ -3160,19 +3178,19 @@ sub check_dns
 	};
  
 	if (@$ eq "timeout\n" or ! $query) {
-		 service_set($v,$r,"down");
+		 service_set($v, $r, "down", {do_log => 1});
 		 return $SERVICE_DOWN;
 	}
  
 	foreach $rr ($query->answer) {
 	        if (($rr->type eq "A" and $rr->address eq $$r{"receive"}) or
 	            ($rr->type eq "PTR" and $rr->ptrdname eq $$r{"receive"})) {
-	         	service_set($v,$r,"up");
+			service_set($v, $r, "up", {do_log => 1});
 			return $SERVICE_UP;
 		}
 	}
  
-	service_set($v,$r,"down");
+	service_set($v, $r, "down", {do_log => 1});
 	return $SERVICE_DOWN;
 }
 
@@ -3191,7 +3209,7 @@ sub check_ping
 	for (my $attempt = 0; $attempt < $$v{"checkcount"}; $attempt++) {
 		if ($p->ping($$r{server}, $$v{"checktimeout"})) {
     			&ld_debug(2, "pong from $$r{server}\n");
-			service_set($v,$r,"up");
+			service_set($v, $r, "up", {do_log => 1});
 			return $SERVICE_UP;
 		}
   		&ld_debug(2, "ping to $$r{server} timed out " .
@@ -3199,7 +3217,7 @@ sub check_ping
 					$$v{"checkcount"} . ")\n");
 	}
 
-	service_set($v,$r,"down");
+	service_set($v, $r, "down");
 	return $SERVICE_DOWN;
 }
 
@@ -3214,10 +3232,9 @@ sub check_none
 
 	&ld_debug(2, "Checking none");
 	
-	service_set($v, $r, "up");
+	service_set($v, $r, "up", {do_log => 1});
 	return $SERVICE_UP;
 }
-
 
 # service_set
 # Used to bring up and down real servers.
@@ -3232,15 +3249,30 @@ sub check_none
 #      state: up or down
 #             up to bring the real service up
 #             down to bring the real service up
+#      flags: hash with the following (optional) keys:
+#             force => 1  - force setting of the specified state
+#             do_log => 1 - log the state to the monitorfile
+#                           (when called as the result of a check)
 # post: The real server is brough up or down for each virtual service
 #       it belongs to.
 # return: none
 
 sub service_set()
 {
-	my ($v, $r, $state, $force) = @_;
-
-	my ($real, $virtual, $virt);
+	my ($v, $r, $state, $flags, $log_msg) = @_;
+
+	my ($real, $virtual, $virt, $now);
+	
+	if ($$flags{'do_log'}) {
+		$now = time();
+		
+		if (!defined($log_msg)) {
+			$log_msg = "-";
+		}
+
+		# URI-escape special log characters ('|' and newlines)
+		$log_msg =~ s/([%|\r\n])/sprintf("%%%.2x", ord($1))/eg;
+	}
 
         # Find the real server in @REAL
 	foreach $real (@REAL) {
@@ -3260,7 +3292,8 @@ sub service_set()
 	        my $found = 0;
 		my $tmp_id;
 		my $virtual_id = get_virtual_id_str($v);
-		my $log_str = "real server=" . get_real_id_str($r, $v) .
+		my $real_id = get_real_id_str($r, $v);
+		my $log_str = "real server=$real_id" .
 			      " (virtual=$virtual_id)";
 		foreach $tmp_id (@$virtual) {
 			if($virtual_id eq $tmp_id) {
@@ -3270,11 +3303,19 @@ sub service_set()
 		}
 		if ($found == 1) {
 			if ($state=~/up/i) {
-				_service_up($v, $r, $force);
+				_service_up($v, $r, $$flags{"force"});
 				&ld_debug(2, "Enabled  $log_str");
 			} elsif ($state=~/down/i) {
-				_service_down($v, $r, $force);
+				_service_down($v, $r, $$flags{"force"});
 				&ld_debug(2, "Disabled $log_str");
+			}
+			
+			if ($$v{"monitorfile"} and $$flags{"do_log"}) {
+				unless(
+					open(CHECKLOG, ">>$$v{monitorfile}") and
+					print CHECKLOG "$now|$$|$real_id|$state|$log_msg\n" and
+					close(CHECKLOG)
+				) { die("Error writing to monitorfile '$$v{monitorfile}': $!"); }
 			}
 		}
 	}
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to