Unfortunately I have "still no cigar". But here is some more info that
might help you:

* I patched afs_BlackListOnce() and its invocation a little bit more
  because I think as is it allways returns 1. Se attached patch.

* If I turn on fstrace, I can not repeat the timeout any more,
  probably the client is to slow to trigger the bug then.

Other guess: How dangerous is the new goto out in afs_Analyze() as it
bypasses a lot of code?

Back to logfile debugging after lunch,
Harald.

--- openafs-1.4.8pre2/src/afs/afs_analyze.c.orig        2008-08-22 
20:53:16.000000000 +0200
+++ openafs-1.4.8pre2/src/afs/afs_analyze.c     2008-10-15 10:19:08.000000000 
+0200
@@ -455,7 +455,7 @@
  *              blacklist. 
  *
  * Returns:
- *     Non-zero value if further servers are available to try,
+ *     The number of further servers available to try,
  *     zero otherwise.
  *
  * Environment:
@@ -485,7 +485,7 @@
                    areq->skipserver[i] = 1;
                }
                if (tvp->serverHost[i] &&
-                   !(tvp->serverHost[i]->addr->sa_flags & 
+                   (tvp->serverHost[i]->addr->sa_flags & 
                      SRVR_ISDOWN)) {
                    areq->skipserver[i] = 1;
                }
@@ -494,9 +494,8 @@
        }
     }
     for (i = 0; i < MAXHOSTS; i++) {
-       if (areq->skipserver[i] == 0) {
-           serversleft = 1;
-           break;
+       if (tvp->serverHost[i] && areq->skipserver[i] == 0) {
+           serversleft++;
        }
     }
     return serversleft;
@@ -545,7 +544,6 @@
     struct server *tsp;
     struct volume *tvp;
     afs_int32 shouldRetry = 0;
-    afs_int32 serversleft = 1;
     struct afs_stats_RPCErrors *aerrP;
     afs_int32 markeddown;
 
@@ -672,9 +670,8 @@
 #endif /* AFS_64BIT_CLIENT */
     if ((acode < 0) && (acode != VRESTARTING)) {
        if (acode == RX_CALL_TIMEOUT) {
-           serversleft = afs_BlackListOnce(areq, afid, tsp);
            areq->idleError++;
-           if (serversleft) {
+           if (afs_BlackListOnce(areq, afid, tsp)) {
                shouldRetry = 1;
            } else {
                shouldRetry = 0;
@@ -746,10 +743,8 @@
                    ("afs: Tokens for user of AFS id %d for cell %s have 
expired\n",
                     tu->vid, aconn->srvr->server->cell->cellName);
            } else {
-               serversleft = afs_BlackListOnce(areq, afid, tsp);
                areq->tokenError++;
-
-               if (serversleft) {
+               if (afs_BlackListOnce(areq, afid, tsp)) {
                    afs_warnuser
                        ("afs: Tokens for user of AFS id %d for cell %s: rxkad 
error=%d\n",
                         tu->vid, aconn->srvr->server->cell->cellName, acode);

Reply via email to