Rainer Toebbicke wrote:
Something's wrong with OpenAFS 1.2.13 as on several busy servers we see an ever-increasing number of host/client connections.
The attached patch (against 1.2.13, but modulo line numbers also for 1.3.73 and later) fixes a h_Hold table leak in GetSomeSpace_r:
lih_r now leaves the current lih_host held, and h_Releases those for which it changed mind during h_Enumerate. It also closes the window that ClearHostCallbacks_r opens.
The problem was: due to the leak on the h_Hold table, rx connections and host and client structures stopped being garbage collected once the fileserver went through GetSomeSpace_r. Only relevant for "busy" servers, many would never even invoke this routine. The problem did not appear before 1.2.11 as the delta that caused this was not yet in.
Tested on two servers up to now.
-- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Rainer Toebbicke European Laboratory for Particle Physics(CERN) - Geneva, Switzerland Phone: +41 22 767 8985 Fax: +41 22 767 7155
*** openafs/src/viced/callback.c.orig 2004-11-10 11:31:37.000000000 +0100
--- openafs/src/viced/callback.c 2005-01-05 16:42:05.000000000 +0100
***************
*** 1394,1415 ****
static struct host *lih_host;
! static int lih_host_held = 0;
static int lih_r(host, held, hostp)
register struct host *host, *hostp;
register int held;
{
- lih_host_held = 0;
if (host->cblist
&& ((hostp && host != hostp) || (!held && !h_OtherHolds_r(host)))
&& (!lih_host || host->ActiveCall < lih_host->ActiveCall) ) {
lih_host = host;
! }
! if (!held) {
! held = 1;
! lih_host_held = 1;
}
return held;
--- 1394,1415 ----
static struct host *lih_host;
! static int lih_host_held;
static int lih_r(host, held, hostp)
register struct host *host, *hostp;
register int held;
{
if (host->cblist
&& ((hostp && host != hostp) || (!held && !h_OtherHolds_r(host)))
&& (!lih_host || host->ActiveCall < lih_host->ActiveCall) ) {
+ if (lih_host != NULL && lih_host_held) {
+ h_Release_r(lih_host);
+ }
lih_host = host;
! lih_host_held = !held;
! held = 1;
}
return held;
***************
*** 1438,1450 ****
h_Enumerate_r(lih_r, hp2, (char *)hp1);
hp = lih_host;
if (hp) {
cbstuff.GSS4++;
if (!ClearHostCallbacks_r(hp, 0 /* not locked or held */ )) {
! if (lih_host_held)
h_Release_r(hp);
return 0;
}
! if (lih_host_held)
h_Release_r(hp);
hp2 = hp->next;
} else {
--- 1438,1451 ----
h_Enumerate_r(lih_r, hp2, (char *)hp1);
hp = lih_host;
if (hp) {
+ int lih_host_held2=lih_host_held; /* set in lih_r! private copy
before giving up H_LOCK */
cbstuff.GSS4++;
if (!ClearHostCallbacks_r(hp, 0 /* not locked or held */ )) {
! if (lih_host_held2)
h_Release_r(hp);
return 0;
}
! if (lih_host_held2)
h_Release_r(hp);
hp2 = hp->next;
} else {
