Package: keepalived
Version: 1:1.1.20-1
Severity: important
Tags: patch

The option nb_get_retry is ignored when a timeout occurs. Only when a check 
fails due to digest or status code, it is retried.

The attached patch rewrites the method epilog() to uniform the handling of all 
check outcomes. A check is now retried independently of the kind of failure. 
However, if a real server is already down, failed checks are not retried.

-- System Information:
Debian Release: 6.0.1
  APT prefers stable
  APT policy: (500, 'stable')
Architecture: amd64 (x86_64)

Kernel: Linux 2.6.32-5-xen-amd64 (SMP w/2 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
diff -ur keepalived-1.1.20/keepalived/check/check_http.c keepalived-1.1.20.new/keepalived/check/check_http.c
--- keepalived-1.1.20/keepalived/check/check_http.c	2010-05-06 17:48:29.000000000 +0200
+++ keepalived-1.1.20.new/keepalived/check/check_http.c	2011-05-12 09:56:33.861014216 +0200
@@ -280,66 +280,82 @@
  * Simple epilog functions. Handling event timeout.
  * Finish the checker with memory managment or url rety check.
  *
- * c == 0 => reset to 0 retry_it counter
- * t == 0 => reset to 0 url_it counter
- * method == 1 => register a new checker thread
- * method == 2 => register a retry on url checker thread
+ * success == 0 => current check failed
+ * smtp_msg => message used for mail notification
+ *
+ * returns value of success (i.e. 0 if check failed)
  */
 int
-epilog(thread * thread_obj, int method, int t, int c)
+epilog(thread * thread_obj, int success, char *smtp_msg)
 {
 	checker *checker_obj = THREAD_ARG(thread_obj);
 	http_get_checker *http_get_check = CHECKER_ARG(checker_obj);
 	http_arg *http_arg_obj = HTTP_ARG(http_get_check);
 	REQ *req = HTTP_REQ(http_arg_obj);
-	uint16_t addr_port = get_service_port(checker_obj);
 	long delay = 0;
 
-	if (method) {
-		http_arg_obj->url_it += t ? t : -http_arg_obj->url_it;
-		http_arg_obj->retry_it += c ? c : -http_arg_obj->retry_it;
-	}
+	/* check succeeded */
+	if (success) {
+		/* reset retry counter */
+		http_arg_obj->retry_it = 0;
 
-	/*
-	 * The get retry implementation mean that we retry performing
-	 * a GET on the same url until the remote web server return 
-	 * html buffer. This is sometime needed with some applications
-	 * servers.
-	 */
-	if (http_arg_obj->retry_it > http_get_check->nb_get_retry-1) {
-		if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
-			log_message(LOG_INFO, "Check on service [%s:%d] failed after %d retry."
-			       , inet_ntop2(CHECKER_RIP(checker_obj))
-			       , ntohs(addr_port), http_arg_obj->retry_it);
-			smtp_alert(checker_obj->rs, NULL, NULL,
-				   "DOWN",
-				   "=> CHECK failed on service"
-				   " : MD5 digest mismatch <=");
-			update_svr_checker_state(DOWN, checker_obj->id
-						     , checker_obj->vs
-						     , checker_obj->rs);
-		}
+		/* all checks succeeded */
+		if (!fetch_next_url(http_get_check)) {
 
-		/* Reset it counters */
-		http_arg_obj->url_it = 0;
-		http_arg_obj->retry_it = 0;
-	}
+			/* real server is currently down */
+			if (!svr_checker_up(checker_obj->id, checker_obj->rs)) {
+				/* send mail and add server to pool */
+				smtp_alert(checker_obj->rs, NULL, NULL, "UP",
+					   smtp_msg);
+				update_svr_checker_state(UP, checker_obj->id
+							   , checker_obj->vs
+							   , checker_obj->rs);
+			}
 
-	/* register next timer thread */
-	switch (method) {
-	case 1:
-		if (req)
+			/* schedule next round of checks */
 			delay = checker_obj->vs->delay_loop;
-		else
-			delay =
-			    http_get_check->delay_before_retry;
-		break;
-	case 2:
-		if (http_arg_obj->url_it == 0 && http_arg_obj->retry_it == 0)
+			http_arg_obj->url_it = 0;
+		/* there could be urls left to check */
+		} else {
+			/* check next url immediately (delay = 0) */
+			http_arg_obj->url_it += 1;
+		}
+
+	/* check failed */
+	} else {
+
+		/* real server is currently down */
+		if (!svr_checker_up(checker_obj->id, checker_obj->rs)) {
+			/* don't retry */
+			http_arg_obj->retry_it = http_get_check->nb_get_retry;
+		} else {
+			/* increment retry counter */
+			http_arg_obj->retry_it += 1;
+		}
+
+		/* no retries left */
+		if (http_arg_obj->retry_it > http_get_check->nb_get_retry-1) {
+
+			/* real server is currently up */
+			if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
+				/* send mail and remove server from pool */
+				smtp_alert(checker_obj->rs, NULL, NULL, "DOWN",
+					   smtp_msg);
+				update_svr_checker_state(DOWN, checker_obj->id
+							   , checker_obj->vs
+							   , checker_obj->rs);
+			}
+
+			/* schedule next round of checks */
 			delay = checker_obj->vs->delay_loop;
-		else
+			http_arg_obj->retry_it = 0;
+			http_arg_obj->url_it = 0;
+		/* retry check */
+		} else {
+			/* schedule retry according to config */
 			delay = http_get_check->delay_before_retry;
-		break;
+		}
+
 	}
 
 	/* If req == NULL, fd is not created */
@@ -355,9 +371,17 @@
 
 	/* Register next checker thread */
 	thread_add_timer(thread_obj->master, http_connect_thread, checker_obj, delay);
-	return 0;
+	return success;
 }
 
+/*
+ * Wrapper for epilog() to handle timeouts,
+ * logs info message and calls epilog()
+ * with success = 0
+ *
+ * smtp_msg => message used for mail notification
+ * debug_msg => message for logging
+ */
 int
 timeout_epilog(thread * thread_obj, char *smtp_msg, char *debug_msg)
 {
@@ -369,16 +393,7 @@
 	       , inet_ntop2(CHECKER_RIP(checker_obj))
 	       , ntohs(addr_port));
 
-	/* check if server is currently alive */
-	if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
-		smtp_alert(checker_obj->rs, NULL, NULL,
-			   "DOWN", smtp_msg);
-		update_svr_checker_state(DOWN, checker_obj->id
-					     , checker_obj->vs
-					     , checker_obj->rs);
-	}
-
-	return epilog(thread_obj, 1, 0, 0);
+	return epilog(thread_obj, 0, smtp_msg);
 }
 
 /* return the url pointer of the current url iterator  */
@@ -413,34 +428,15 @@
 	/* Next check the HTTP status code */
 	if (fetched_url->status_code) {
 		if (req->status_code != fetched_url->status_code) {
-			/* check if server is currently alive */
-			if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
-				log_message(LOG_INFO,
-				       "HTTP status code error to [%s:%d] url(%s)"
-				       ", status_code [%d].",
-				       inet_ntop2(CHECKER_RIP(checker_obj)),
-				       ntohs(addr_port), fetched_url->path,
-				       req->status_code);
-				smtp_alert(checker_obj->rs, NULL, NULL,
-					   "DOWN",
-					   "=> CHECK failed on service"
+			/* check failed, log message and call epilog */
+			log_message(LOG_INFO,
+			       "HTTP status code error to [%s:%d] url(%s)"
+			       ", status_code [%d].",
+			       inet_ntop2(CHECKER_RIP(checker_obj)),
+			       ntohs(addr_port), fetched_url->path,
+			       req->status_code);
+			return epilog(thread_obj, 0, "=> CHECK failed on service"
 					   " : HTTP status code mismatch <=");
-				update_svr_checker_state(DOWN, checker_obj->id
-							     , checker_obj->vs
-							     , checker_obj->rs);
-			} else {
-				DBG("HTTP Status_code to [%s:%d] url(%d) = [%d].",
-				    inet_ntop2(CHECKER_RIP(checker_obj))
-				    , ntohs(addr_port)
-				    , http_arg_obj->url_it + 1
-				    , req->status_code);
-				/*
-				 * We set retry iterator to max value to not retry
-				 * when service is already know as die.
-				 */
-				http_arg_obj->retry_it = http_get_check->nb_get_retry;
-			}
-			return epilog(thread_obj, 2, 0, 1);
 		} else {
 			if (!svr_checker_up(checker_obj->id, checker_obj->rs))
 				log_message(LOG_INFO,
@@ -448,7 +444,7 @@
 				       inet_ntop2(CHECKER_RIP(checker_obj))
 				       , ntohs(addr_port)
 				       , http_arg_obj->url_it + 1);
-			return epilog(thread_obj, 1, 1, 0) + 1;
+			return epilog(thread_obj, 1, "=> CHECK succeed on service <=");
 		}
 	}
 
@@ -462,35 +458,16 @@
 		r = strcmp(fetched_url->digest, digest_tmp);
 
 		if (r) {
-			/* check if server is currently alive */
-			if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
-				log_message(LOG_INFO,
-				       "MD5 digest error to [%s:%d] url[%s]"
-				       ", MD5SUM [%s].",
-				       inet_ntop2(CHECKER_RIP(checker_obj)),
-				       ntohs(addr_port), fetched_url->path,
-				       digest_tmp);
-				smtp_alert(checker_obj->rs, NULL, NULL,
-					   "DOWN",
-					   "=> CHECK failed on service"
-					   " : HTTP MD5SUM mismatch <=");
-				update_svr_checker_state(DOWN, checker_obj->id
-							     , checker_obj->vs
-							     , checker_obj->rs);
-			} else {
-				DBG("MD5SUM to [%s:%d] url(%d) = [%s].",
-				    inet_ntop2(CHECKER_RIP(checker_obj))
-				    , ntohs(addr_port)
-				    , http_arg_obj->url_it + 1
-				    , digest_tmp);
-				/*
-				 * We set retry iterator to max value to not retry
-				 * when service is already know as die.
-				 */
-				http_arg_obj->retry_it = http_get_check->nb_get_retry;
-			}
+			/* check failed, log message and call epilog */
+			log_message(LOG_INFO,
+			       "MD5 digest error to [%s:%d] url[%s]"
+			       ", MD5SUM [%s].",
+			       inet_ntop2(CHECKER_RIP(checker_obj)),
+			       ntohs(addr_port), fetched_url->path,
+			       digest_tmp);
 			FREE(digest_tmp);
-			return epilog(thread_obj, 2, 0, 1);
+			return epilog(thread_obj, 0, "=> CHECK failed on service"
+					   " : HTTP MD5SUM mismatch <=");
 		} else {
 			if (!svr_checker_up(checker_obj->id, checker_obj->rs))
 				log_message(LOG_INFO, "MD5 digest success to [%s:%d] url(%d).",
@@ -498,11 +475,11 @@
 				       , ntohs(addr_port)
 				       , http_arg_obj->url_it + 1);
 			FREE(digest_tmp);
-			return epilog(thread_obj, 1, 1, 0) + 1;
+			return epilog(thread_obj, 1, "=> CHECK succeed on service <=");
 		}
 	}
 
-	return epilog(thread_obj, 1, 0, 0) + 1;
+	return epilog(thread_obj, 1, "=> CHECK succeed on service <=");
 }
 
 /* Handle response stream performing MD5 updates */
@@ -577,21 +554,13 @@
 		MD5_Final(digest, &req->context);
 
 		if (r == -1) {
-			/* We have encourred a real read error */
-			if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
-				log_message(LOG_INFO, "Read error with server [%s:%d]: %s",
-				       inet_ntop2(CHECKER_RIP(checker_obj))
-				       , ntohs(addr_port)
-				       , strerror(errno));
-				smtp_alert(checker_obj->rs, NULL, NULL,
-					   "DOWN",
-					   "=> HTTP CHECK failed on service"
+			/* check failed, log message and call epilog */
+			log_message(LOG_INFO, "Read error with server [%s:%d]: %s",
+			       inet_ntop2(CHECKER_RIP(checker_obj))
+			       , ntohs(addr_port)
+			       , strerror(errno));
+			return epilog(thread_obj, 0, "=> HTTP CHECK failed on service"
 					   " : cannot receive data <=");
-				update_svr_checker_state(DOWN, checker_obj->id
-							     , checker_obj->vs
-							     , checker_obj->rs);
-			}
-			return epilog(thread_obj, 1, 0, 0);
 		}
 
 		/* Handle response stream */
@@ -700,21 +669,12 @@
 	FREE(str_request);
 
 	if (!ret) {
+		/* check failed, log message and call epilog */
 		log_message(LOG_INFO, "Cannot send get request to [%s:%d].",
 		       inet_ntop2(CHECKER_RIP(checker_obj))
 		       , ntohs(addr_port));
-
-		/* check if server is currently alive */
-		if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
-			smtp_alert(checker_obj->rs, NULL, NULL,
-				   "DOWN",
-				   "=> CHECK failed on service"
+		return epilog(thread_obj, 0, "=> CHECK failed on service"
 				   " : cannot send data <=");
-			update_svr_checker_state(DOWN, checker_obj->id
-						     , checker_obj->vs
-						     , checker_obj->rs);
-		}
-		return epilog(thread_obj, 1, 0, 0);
 	}
 
 	/* Register read timeouted thread */
@@ -744,20 +704,12 @@
 				  , addr_port, http_check_thread);
 	switch (status) {
 	case connect_error:
-		/* check if server is currently alive */
-		if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
-			log_message(LOG_INFO, "Error connecting server [%s:%d].",
-			       inet_ntop2(CHECKER_RIP(checker_obj))
-			       , ntohs(addr_port));
-			smtp_alert(checker_obj->rs, NULL, NULL,
-				   "DOWN",
-				   "=> CHECK failed on service"
+		/* check failed, log message and call epilog */
+		log_message(LOG_INFO, "Error connecting server [%s:%d].",
+		       inet_ntop2(CHECKER_RIP(checker_obj))
+		       , ntohs(addr_port));
+		return epilog(thread_obj, 0, "=> CHECK failed on service"
 				   " : connection error <=");
-			update_svr_checker_state(DOWN, checker_obj->id
-						 , checker_obj->vs
-						 , checker_obj->rs);
-		}
-		return epilog(thread_obj, 1, 0, 0);
 		break;
 
 	case connect_timeout:
@@ -829,24 +781,20 @@
 					ssl_printerr(SSL_get_error
 						     (req->ssl, ret));
 #endif
-				if ((http_get_check->proto == PROTO_SSL) &&
-				    (svr_checker_up(checker_obj->id, checker_obj->rs))) {
+				if (http_get_check->proto == PROTO_SSL) {
+					/* check failed, log message and call epilog */
 					log_message(LOG_INFO, "SSL handshake/communication error"
 							 " connecting to server"
 							 " (openssl errno: %d) [%s:%d]."
 						       , SSL_get_error (http_arg_obj->req->ssl, ret)
 						       , inet_ntop2(CHECKER_RIP(checker_obj))
 						       , ntohs(addr_port));
-					smtp_alert(checker_obj->rs, NULL, NULL,
-						   "DOWN",
-						   "=> CHECK failed on service"
+					return epilog(thread_obj, 0, "=> CHECK failed on service"
 						   " : SSL connection error <=");
-					update_svr_checker_state(DOWN, checker_obj->id
-								 , checker_obj->vs
-								 , checker_obj->rs);
 				}
 
-				return epilog(thread_obj, 1, 0, 0);
+				return epilog(thread_obj, 0, "=> CHECK failed on service"
+						   " : connection error <=");
 			}
 		}
 		break;
@@ -888,14 +836,9 @@
 			log_message(LOG_INFO, "Remote Web server [%s:%d] succeed on service.",
 			       inet_ntop2(CHECKER_RIP(checker_obj))
 			       , ntohs(addr_port));
-			smtp_alert(checker_obj->rs, NULL, NULL, "UP",
-				   "=> CHECK succeed on service <=");
-			update_svr_checker_state(UP, checker_obj->id
-						   , checker_obj->vs
-						   , checker_obj->rs);
 		}
 		http_arg_obj->req = NULL;
-		return epilog(thread_obj, 1, 0, 0) + 1;
+		return epilog(thread_obj, 1, "=> CHECK succeed on service <=");
 	}
 
 	/* Create the socket */
diff -ur keepalived-1.1.20/keepalived/check/check_ssl.c keepalived-1.1.20.new/keepalived/check/check_ssl.c
--- keepalived-1.1.20/keepalived/check/check_ssl.c	2010-05-06 17:48:58.000000000 +0200
+++ keepalived-1.1.20.new/keepalived/check/check_ssl.c	2011-05-12 09:56:31.557014358 +0200
@@ -244,6 +244,7 @@
 	http_get_checker *http_get_check = CHECKER_ARG(checker_obj);
 	http_arg *http_arg_obj = HTTP_ARG(http_get_check);
 	REQ *req = HTTP_REQ(http_arg_obj);
+	uint16_t addr_port = get_service_port(checker_obj);
 	unsigned char digest[16];
 	int r = 0;
 	int val;
@@ -290,17 +291,13 @@
 		     SSL_ERROR_ZERO_RETURN) ? SSL_shutdown(req->ssl) : 0;
 
 		if (r && !req->extracted) {
-			/* check if server is currently alive */
-			if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
-				smtp_alert(checker_obj->rs, NULL, NULL,
-					   "DOWN",
-					   "=> SSL CHECK failed on service"
+			/* check failed, log message and call epilog */
+			log_message(LOG_INFO, "Read error with server [%s:%d]: %s",
+			       inet_ntop2(CHECKER_RIP(checker_obj))
+			       , ntohs(addr_port)
+			       , strerror(errno));
+			return epilog(thread_obj, 0, "=> SSL CHECK failed on service"
 					   " : cannot receive data <=\n\n");
-				update_svr_checker_state(DOWN, checker_obj->id
-							     , checker_obj->vs
-							     , checker_obj->rs);
-			}
-			return epilog(thread_obj, 1, 0, 0);
 		}
 
 		/* Handle response stream */
diff -ur keepalived-1.1.20/keepalived/include/check_http.h keepalived-1.1.20.new/keepalived/include/check_http.h
--- keepalived-1.1.20/keepalived/include/check_http.h	2010-05-06 17:50:29.000000000 +0200
+++ keepalived-1.1.20.new/keepalived/include/check_http.h	2011-05-09 11:37:23.265015403 +0200
@@ -89,7 +89,7 @@
 
 /* Define prototypes */
 extern void install_http_check_keyword(void);
-extern int epilog(thread * thread_obj, int metod, int t, int c);
+extern int epilog(thread * thread_obj, int success, char *smtp_msg);
 extern int timeout_epilog(thread * thread_obj, char *smtp_msg, char *debug_msg);
 extern url *fetch_next_url(http_get_checker * http_get_check);
 extern int http_process_response(REQ * req, int r);

Reply via email to