Folks,

Thanks to all for the work on Spamassassin - I'm not really in the dev community here, and am probably committed a faux pa here by sending up patches that are against the stable 3.2.4 release, not the dev tree. Sorry if this is not standard practice.

However, we've been having some problems with spamd hanging, similar to what's been discussed on the mailing list. The result for us was that if a spamd server got hung up, each message sent to that server would be stalled for the timeout value (600s), then return unprocessed. We have 3 servers handling spam, and this meant that roughly 1/3 of our messages would pass unfiltered when one of our spamd servers would get hung up. To help work around this, I've added a filter-loop to spamc. It takes two new arguments: --filter-retries and --filter-retry-sleep. I did not want to complicate things with overloading connect-retries which only comes into play if there are tcp connection troubles, not filter timeouts. By setting filter-retries to a value other than 1 (the default is 1 to keep the default behavior the same), it will try to filter the message through spamd, but if the filter times out, it will try again up to filter-retries times. I also moved the call to randomize the host list into this loop so that it will be re-randomized each time the filter is attempted.

The patch for this is attached as 'filterloop.patch'.

I've also added a small patch called 'fixdest.patch' that fixes a minor command line issue. When specifying the hosts, -d works, but --dest does not. This patch fixes that.

I put these together out of our own needs here, so I'm sorry that I have not gone through the bug trackers to see if I'm re-inventing the wheel. Still, I hope that someone who is more intimate with the code than me can make use of these fixes and features.
Thanks again for such a GREAT tool!!!

-Ty!

--
-===========================-
 Ty! Boyack
 NREL Unix Network Manager
 [EMAIL PROTECTED]
 (970) 491-1186
-===========================-

diff -Naur Mail-SpamAssassin-3.2.4/spamc/libspamc.c Mail-SpamAssassin-3.2.4.loadbalance/spamc/libspamc.c
--- Mail-SpamAssassin-3.2.4/spamc/libspamc.c	2008-01-05 14:12:39.000000000 -0700
+++ Mail-SpamAssassin-3.2.4.loadbalance/spamc/libspamc.c	2008-04-01 13:07:22.000000000 -0600
@@ -1141,6 +1141,44 @@
     return EX_OK;
 }
 
+/*
+* randomize_hosts()
+*
+*	Given the transport object that contains one or more IP addresses
+*	in this "hosts" list, rotate it by a random number of shifts to
+*	randomize them - this is a kind of load balancing. It's possible
+*	that the random number will be 0, which says not to touch. We don't
+*	do anything unless 
+*/
+
+static void _randomize_hosts(struct transport *tp)
+{
+#ifdef SPAMC_HAS_ADDRINFO
+    struct addrinfo *tmp;
+#else
+    struct in_addr tmp;
+#endif
+    int i;
+    int rnum;
+
+    assert(tp != 0);
+
+    if (tp->nhosts <= 1)
+        return;
+
+    rnum = rand() % tp->nhosts;
+
+    while (rnum-- > 0) {
+        tmp = tp->hosts[0];
+
+        /* TODO: free using freeaddrinfo() */
+        for (i = 1; i < tp->nhosts; i++)
+            tp->hosts[i - 1] = tp->hosts[i];
+
+        tp->hosts[i - 1] = tmp;
+    }
+}
+
 int message_filter(struct transport *tp, const char *username,
                    int flags, struct message *m)
 {
@@ -1162,6 +1200,9 @@
     int zlib_bufsiz = 0;
     unsigned char *towrite_buf;
     int towrite_len;
+    int filter_retry_count;
+    int filter_retry_sleep;
+    int filter_retries;
 
     assert(tp != NULL);
     assert(m != NULL);
@@ -1196,106 +1237,149 @@
     m->is_spam = EX_TOOBIG;
     m->priv->alloced_size = m->max_len + EXPANSION_ALLOWANCE + 1;
     if ((m->outbuf = malloc(m->priv->alloced_size)) == NULL) {
-	failureval = EX_OSERR;
+        failureval = EX_OSERR;
 	goto failure;
     }
     m->out = m->outbuf;
     m->out_len = 0;
-
-    /* Build spamd protocol header */
-    if (flags & SPAMC_CHECK_ONLY)
-	strcpy(buf, "CHECK ");
-    else if (flags & SPAMC_REPORT_IFSPAM)
-	strcpy(buf, "REPORT_IFSPAM ");
-    else if (flags & SPAMC_REPORT)
-	strcpy(buf, "REPORT ");
-    else if (flags & SPAMC_SYMBOLS)
-	strcpy(buf, "SYMBOLS ");
-    else if (flags & SPAMC_PING)
-	strcpy(buf, "PING ");
-    else if (flags & SPAMC_HEADERS)
-	strcpy(buf, "HEADERS ");
-    else
-	strcpy(buf, "PROCESS ");
-
-    len = strlen(buf);
-    if (len + strlen(PROTOCOL_VERSION) + 2 >= bufsiz) {
-	_use_msg_for_out(m);
-	return EX_OSERR;
-    }
-
-    strcat(buf, PROTOCOL_VERSION);
-    strcat(buf, "\r\n");
-    len = strlen(buf);
-
-    towrite_buf = (unsigned char *) m->msg;
-    towrite_len = (int) m->msg_len;
-    if (zlib_on) {
-        if (_zlib_compress(m->msg, m->msg_len, &zlib_buf, &zlib_bufsiz, flags) != EX_OK)
-        {
-            return EX_OSERR;
-        }
-        towrite_buf = zlib_buf;
-        towrite_len = zlib_bufsiz;
-    }
-
-    if (!(flags & SPAMC_PING)) {
-      if (username != NULL) {
-	if (strlen(username) + 8 >= (bufsiz - len)) {
-          _use_msg_for_out(m);
-          return EX_OSERR;
+  
+    /* If the spamd filter takes too long and we timeout, then
+     * retry again.  This gets us around a hung child thread 
+     * in spamd or a problem on a spamd host in a multi-host
+     * setup.  The hostlist is re-randomized before each 
+     * filter attempt 
+     */
+
+    /* default values */
+    filter_retry_sleep = tp->filter_retry_sleep;
+    filter_retries = tp->filter_retries;
+    if (filter_retries == 0) {
+      filter_retries = 1;
+    }
+    if (filter_retry_sleep < 0) {
+      filter_retry_sleep = 1;
+    }
+
+    /* filterloop - Ensure that we run through this at least
+     * once, and again if there are errors 
+     */
+    filter_retry_count=0;
+    while ((filter_retry_count==0)||((filter_retry_count<tp->filter_retries)&&(failureval == EX_IOERR))) {
+      if (filter_retry_count != 0){
+        if (sock != -1) {
+	  closesocket(sock);
+	  sock=-1;
 	}
-	strcpy(buf + len, "User: ");
-	strcat(buf + len, username);
-	strcat(buf + len, "\r\n");
-	len += strlen(buf + len);
+        sleep(filter_retry_sleep);
+      }
+      filter_retry_count++;
+  
+      /* Build spamd protocol header */
+      if (flags & SPAMC_CHECK_ONLY)
+	  strcpy(buf, "CHECK ");
+      else if (flags & SPAMC_REPORT_IFSPAM)
+	  strcpy(buf, "REPORT_IFSPAM ");
+      else if (flags & SPAMC_REPORT)
+	  strcpy(buf, "REPORT ");
+      else if (flags & SPAMC_SYMBOLS)
+	  strcpy(buf, "SYMBOLS ");
+      else if (flags & SPAMC_PING)
+	  strcpy(buf, "PING ");
+      else if (flags & SPAMC_HEADERS)
+	  strcpy(buf, "HEADERS ");
+      else
+	  strcpy(buf, "PROCESS ");
+  
+      len = strlen(buf);
+      if (len + strlen(PROTOCOL_VERSION) + 2 >= bufsiz) {
+	  _use_msg_for_out(m);
+	  return EX_OSERR;
       }
+  
+      strcat(buf, PROTOCOL_VERSION);
+      strcat(buf, "\r\n");
+      len = strlen(buf);
+  
+      towrite_buf = (unsigned char *) m->msg;
+      towrite_len = (int) m->msg_len;
       if (zlib_on) {
-	len += snprintf(buf + len, 8192-len, "Compress: zlib\r\n");
+          if (_zlib_compress(m->msg, m->msg_len, &zlib_buf, &zlib_bufsiz, flags) != EX_OK)
+          {
+              return EX_OSERR;
+          }
+          towrite_buf = zlib_buf;
+          towrite_len = zlib_bufsiz;
       }
-      if ((m->msg_len > SPAMC_MAX_MESSAGE_LEN) || ((len + 27) >= (bufsiz - len))) {
-	_use_msg_for_out(m);
-	return EX_DATAERR;
+  
+      if (!(flags & SPAMC_PING)) {
+        if (username != NULL) {
+	  if (strlen(username) + 8 >= (bufsiz - len)) {
+            _use_msg_for_out(m);
+            return EX_OSERR;
+	  }
+	  strcpy(buf + len, "User: ");
+	  strcat(buf + len, username);
+	  strcat(buf + len, "\r\n");
+	  len += strlen(buf + len);
+        }
+        if (zlib_on) {
+	  len += snprintf(buf + len, 8192-len, "Compress: zlib\r\n");
+        }
+        if ((m->msg_len > SPAMC_MAX_MESSAGE_LEN) || ((len + 27) >= (bufsiz - len))) {
+	  _use_msg_for_out(m);
+	  return EX_DATAERR;
+        }
+        len += snprintf(buf + len, 8192-len, "Content-length: %d\r\n\r\n", (int) towrite_len);
       }
-      len += snprintf(buf + len, 8192-len, "Content-length: %d\r\n\r\n", (int) towrite_len);
-    }
+  
+      libspamc_timeout = m->timeout;
 
-    libspamc_timeout = m->timeout;
-
-    if (tp->socketpath)
-	rc = _try_to_connect_unix(tp, &sock);
-    else
-	rc = _try_to_connect_tcp(tp, &sock);
-
-    if (rc != EX_OK) {
-	_use_msg_for_out(m);
-	return rc;      /* use the error code try_to_connect_*() gave us. */
-    }
+      /* QUASI-LOAD-BALANCING
+       *
+       * If the user wants to do quasi load balancing, "rotate"
+       * the list by a random amount based on the current time.
+       * This may later be truncated to a single item. This is
+       * meaningful only if we have more than one host.
+       */
+      if ((flags & SPAMC_RANDOMIZE_HOSTS) && tp->nhosts > 1) {
+          _randomize_hosts(tp);
+      }
 
-    if (flags & SPAMC_USE_SSL) {
+      if (tp->socketpath)
+	  rc = _try_to_connect_unix(tp, &sock);
+      else
+	  rc = _try_to_connect_tcp(tp, &sock);
+  
+      if (rc != EX_OK) {
+	  _use_msg_for_out(m);
+	  return rc;      /* use the error code try_to_connect_*() gave us. */
+      }
+  
+      if (flags & SPAMC_USE_SSL) {
 #ifdef SPAMC_SSL
-	ssl = SSL_new(ctx);
-	SSL_set_fd(ssl, sock);
-	SSL_connect(ssl);
+	  ssl = SSL_new(ctx);
+	  SSL_set_fd(ssl, sock);
+	  SSL_connect(ssl);
 #endif
-    }
-
-    /* Send to spamd */
-    if (flags & SPAMC_USE_SSL) {
+      }
+  
+      /* Send to spamd */
+      if (flags & SPAMC_USE_SSL) {
 #ifdef SPAMC_SSL
-	SSL_write(ssl, buf, len);
-	SSL_write(ssl, towrite_buf, towrite_len);
+	  SSL_write(ssl, buf, len);
+	  SSL_write(ssl, towrite_buf, towrite_len);
 #endif
-    }
-    else {
-	full_write(sock, 0, buf, len);
-	full_write(sock, 0, towrite_buf, towrite_len);
-	shutdown(sock, SHUT_WR);
-    }
-
-    /* ok, now read and parse it.  SPAMD/1.2 line first... */
-    failureval =
-	_spamc_read_full_line(m, flags, ssl, sock, buf, &len, bufsiz);
+      }
+      else {
+	  full_write(sock, 0, buf, len);
+	  full_write(sock, 0, towrite_buf, towrite_len);
+	  shutdown(sock, SHUT_WR);
+      }
+  
+      /* ok, now read and parse it.  SPAMD/1.2 line first... */
+      failureval =
+	  _spamc_read_full_line(m, flags, ssl, sock, buf, &len, bufsiz);
+    } /* end of filterloop */
     if (failureval != EX_OK) {
 	goto failure;
     }
@@ -1737,44 +1821,6 @@
 }
 
 /*
-* randomize_hosts()
-*
-*	Given the transport object that contains one or more IP addresses
-*	in this "hosts" list, rotate it by a random number of shifts to
-*	randomize them - this is a kind of load balancing. It's possible
-*	that the random number will be 0, which says not to touch. We don't
-*	do anything unless 
-*/
-
-static void _randomize_hosts(struct transport *tp)
-{
-#ifdef SPAMC_HAS_ADDRINFO
-    struct addrinfo *tmp;
-#else
-    struct in_addr tmp;
-#endif
-    int i;
-    int rnum;
-
-    assert(tp != 0);
-
-    if (tp->nhosts <= 1)
-        return;
-
-    rnum = rand() % tp->nhosts;
-
-    while (rnum-- > 0) {
-        tmp = tp->hosts[0];
-
-        /* TODO: free using freeaddrinfo() */
-        for (i = 1; i < tp->nhosts; i++)
-            tp->hosts[i - 1] = tp->hosts[i];
-
-        tp->hosts[i - 1] = tmp;
-    }
-}
-
-/*
 * transport_setup()
 *
 *	Given a "transport" object that says how we're to connect to the
@@ -2002,10 +2048,15 @@
          * the list by a random amount based on the current time.
          * This may later be truncated to a single item. This is
          * meaningful only if we have more than one host.
-         */
+
+         * This has been moved to just before the connection attempt
+	 * to allow it to be re-randomized when looping through
+	 * filter retries.
+
         if ((flags & SPAMC_RANDOMIZE_HOSTS) && tp->nhosts > 1) {
             _randomize_hosts(tp);
         }
+         */
 
         /* If the user wants no fallback, simply truncate the host
          * list to just one - this pretends that this is the extent
diff -Naur Mail-SpamAssassin-3.2.4/spamc/libspamc.h Mail-SpamAssassin-3.2.4.loadbalance/spamc/libspamc.h
--- Mail-SpamAssassin-3.2.4/spamc/libspamc.h	2008-01-05 14:12:39.000000000 -0700
+++ Mail-SpamAssassin-3.2.4.loadbalance/spamc/libspamc.h	2008-04-01 12:43:44.000000000 -0600
@@ -233,6 +233,10 @@
     /* added in SpamAssassin 3.2.0 */
     int connect_retries;
     int retry_sleep;
+
+    /* Added for filterloop */
+    int filter_retries;
+    int filter_retry_sleep;
 };
 
 /* Initialise and setup transport-specific context for the connection
diff -Naur Mail-SpamAssassin-3.2.4/spamc/spamc.c Mail-SpamAssassin-3.2.4.loadbalance/spamc/spamc.c
--- Mail-SpamAssassin-3.2.4/spamc/spamc.c	2008-01-05 14:12:39.000000000 -0700
+++ Mail-SpamAssassin-3.2.4.loadbalance/spamc/spamc.c	2008-03-31 23:33:46.000000000 -0600
@@ -153,8 +153,14 @@
     usg("  -t, --timeout timeout\n"
         "                      Timeout in seconds for communications to\n"
         "                      spamd. [default: 600]\n");
+    usg("  --filter-retries retries\n"
+        "                      Retry filtering this many times if the spamd\n"
+        "                      process fails (usually times out) [default: 3]\n");
+    usg("  --filter-retry-sleep sleep\n"
+        "                      Sleep for this time between failed filter\n"
+        "                      attempts, in seconds [default: 1]\n");
     usg("  --connect-retries retries\n"
-        "                      Try connecting to spamd this many times\n"
+        "                      Try connecting to spamd tcp socket this many times\n"
         "                      [default: 3]\n");
     usg("  --retry-sleep sleep Sleep for this time between attempts to\n"
         "                      connect to spamd, in seconds [default: 1]\n");
@@ -226,7 +232,7 @@
     int longind = 1;
 
     static struct option longoptions[] = {
-       { "dest" , required_argument, 0, 'd' },
+       { "dest", required_argument, 0, 'd' },
        { "randomize", no_argument, 0, 'H' },
        { "port", required_argument, 0, 'p' },
        { "ssl", optional_argument, 0, 'S' },
@@ -235,6 +241,8 @@
        { "timeout", required_argument, 0, 't' },
        { "connect-retries", required_argument, 0, 0 },
        { "retry-sleep", required_argument, 0, 1 },
+       { "filter-retries", required_argument, 0, 3 },
+       { "filter-retry-sleep", required_argument, 0, 4 },
        { "max-size", required_argument, 0, 's' },
        { "username", required_argument, 0, 'u' },
        { "learntype", required_argument, 0, 'L' },
@@ -471,6 +479,16 @@
                 flags |= SPAMC_HEADERS;
                 break;
             }
+            case 3:
+            {
+                ptrn->filter_retries = atoi(spamc_optarg);
+                break;
+            }
+            case 4:
+            {
+                ptrn->filter_retry_sleep = atoi(spamc_optarg);
+                break;
+            }
         }
     }
 
diff -Naur Mail-SpamAssassin-3.2.4/spamc/getopt.c Mail-SpamAssassin-3.2.4.fixdest/spamc/getopt.c
--- Mail-SpamAssassin-3.2.4/spamc/getopt.c	2008-01-05 14:12:39.000000000 -0700
+++ Mail-SpamAssassin-3.2.4.fixdest/spamc/getopt.c	2008-04-01 12:03:39.000000000 -0600
@@ -247,7 +247,7 @@
          longoptlen -= strlen(bp);
       }
 
-      for(i=1; ; i++) {
+      for(i=0; ; i++) {
          if((longopts[i].name == NULL) || (longopts[i].name == 0))
             return(longoptiserr(argc, argv, spamc_optind-1, OPTERRNF));
          if((memcmp(longopt+2, longopts[i].name, longoptlen)) == 0) {

Reply via email to