Index: src/native/mod_jk/common/jk_lb_worker.c
===================================================================
RCS file: /home/cvspublic/jakarta-tomcat/src/native/mod_jk/common/jk_lb_worker.c,v
retrieving revision 1.4
diff -c -r1.4 jk_lb_worker.c
*** src/native/mod_jk/common/jk_lb_worker.c	11 Jan 2001 03:14:45 -0000	1.4
--- src/native/mod_jk/common/jk_lb_worker.c	15 Apr 2002 22:30:22 -0000
***************
*** 84,89 ****
--- 84,90 ----
      double  lb_value;
      int     in_error_state;
      int     in_recovering;
+     int     retry_count;
      time_t  error_time;
      jk_worker_t *w;
  };
***************
*** 272,277 ****
--- 273,279 ----
                      
                      p->lb_workers[i].in_recovering  = JK_TRUE;
                      p->lb_workers[i].error_time     = now;
+                     p->lb_workers[i].retry_count++;
                      rc = &(p->lb_workers[i]);
  
                      break;
***************
*** 285,290 ****
--- 287,328 ----
          }            
      }
  
+     if ( !rc ) {
+       /* no workers found (rc is null), now try as hard as possible to get a worker anyway, pick one with largest error time.. */
+       for(i = 0 ; i < p->num_of_workers ; i++) {
+         if(p->lb_workers[i].in_error_state) {
+ 	  if(!p->lb_workers[i].in_recovering) {
+ 
+ 	    /* if the retry count is zero, that means the worker only failed once, this is to ensure that
+ 	    *  the failed worker will not continue to be retried over and over again.
+ 	    */
+ 	    if ( p->lb_workers[i].retry_count == 0 ) {
+ 	      if ( rc ) {
+ 		if ( p->lb_workers[i].error_time < rc->error_time ) {
+ 		  rc = &(p->lb_workers[i]);
+ 		}
+ 	      } else 
+ 		rc = &(p->lb_workers[i]);
+ 	    }
+ 	  }
+         } else {
+ 	  if(p->lb_workers[i].lb_value < lb_min || !rc) {
+ 	    lb_min = p->lb_workers[i].lb_value;
+ 	    rc = &(p->lb_workers[i]);
+ 	    break;
+ 	  }
+         }            
+       }
+ 
+       if ( rc  && rc->in_error_state ) {
+ 	time_t now = time(0);
+ 	rc->in_recovering  = JK_TRUE;
+ 	rc->error_time     = now;
+ 	rc->retry_count++;
+       }
+       
+     }
+ 
      if(rc) {
          rc->lb_value += rc->lb_factor;                
      }
***************
*** 307,312 ****
--- 345,355 ----
          /* you can not recover on another load balancer */
          *is_recoverable_error = JK_FALSE;
  
+ 	/* reset all the retry counts to 0 */
+ 	for(i = 0 ; i < p->worker->num_of_workers ; i++) {
+ 	  p->worker->lb_workers[i].retry_count = 0;
+ 	}
+ 
  
          while(1) {
              worker_record_t *rec = get_most_suitable_worker(p->worker, s);
***************
*** 327,332 ****
--- 370,376 ----
                          }
                          rec->in_error_state = JK_FALSE;
                          rec->in_recovering  = JK_FALSE;
+                         rec->retry_count = 0;;
                          rec->error_time     = 0;                        
                          return JK_TRUE;
                      } 
***************
*** 429,434 ****
--- 473,479 ----
                  p->lb_workers[i].lb_value = 0.0;
                  p->lb_workers[i].in_error_state = JK_FALSE;
                  p->lb_workers[i].in_recovering  = JK_FALSE;
+                 p->lb_workers[i].retry_count = 0;
                  if(!wc_create_worker(p->lb_workers[i].name, 
                                       props, 
                                       &(p->lb_workers[i].w), 

