One of our recently installed 1.4.1b fileservers dumped core in host.c
yesterday (6/12/2006) and again today about half an hour ago.
Both cores print to h_gethostcps_r; I must be missing something, because
it looks like the call to h_gethost_cps is correct but the pointer to
the host structure is magically (since I can't figure out where it
happens :) null when the function starts to execute.
Kim Kimball
dhk at ccre.com
bash-2.05$ dbx fileserver
core_jplis-fil-afs32_fileserver_0_0_1150115729_2944
For information about new features see `help changes'
To remove this message, put `dbxenv suppress_startup_message 7.4' in
your .dbxrc
Reading fileserver
core file header read successfully
Reading ld.so.1
Reading libpthread.so.1
Reading libsocket.so.1
Reading libresolv.so.2
Reading libnsl.so.1
Reading libintl.so.1
Reading libdl.so.1
Reading libc.so.1
Reading libmp.so.2
Reading libc_psr.so.1
Reading libthread.so.1
Reading nss_files.so.1
WARNING!!
A loadobject was found with an unexpected checksum value.
See `help core mismatch' for details, and run `proc -map'
to see what checksum values were expected and found.
dbx: warning: Some symbolic information might be incorrect.
[EMAIL PROTECTED] ([EMAIL PROTECTED]) terminated by signal SEGV (no mapping at
the fault address)
Current function is h_gethostcps_r
490 host->hcpsfailed = 0;
(dbx) where
current thread: [EMAIL PROTECTED]
=>[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54),
line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34,
ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c"
[6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds =
0xfe2fbdb0), line 6793 in "afsfileprocs.c"
[7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109
in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp =
0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) dump
slept = 0
code = 0
host = (nil)
now = 1150115729
(dbx) list
490 host->hcpsfailed = 0;
491
492 host->hostFlags &= ~HCPS_INPROGRESS;
493 /* signal all who are waiting */
494 if (host->hostFlags & HCPS_WAITING) { /* somebody is
waiting */
495 host->hostFlags &= ~HCPS_WAITING;
496 #ifdef AFS_PTHREAD_ENV
497 assert(pthread_cond_broadcast(&host->cond) == 0);
498 #else /* AFS_PTHREAD_ENV */
499 if ((code = LWP_NoYieldSignal(&(host->hostFlags))) !=
LWP_SUCCESS)
(dbx) print host
host = (nil)
(dbx) print *host
dbx: reference through nil pointer
(dbx) # Assigned to null pointer to structure
(dbx) up
Current function is h_Lookup_r
638 h_gethostcps_r(host, now);
(dbx) dump
index = 98
haddr = 2303680610U
hport = 7001U
heldp = 0xfe2fba54
host = 0x113e628
now = 1150115729
chain = 0x11b37b0
(dbx) list
638 h_gethostcps_r(host, now);
639 }
640 break;
641 }
642 host = NULL;
643 }
644 return host;
645
646 } /*h_Lookup */
647
(dbx) where
current thread: [EMAIL PROTECTED]
[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
=>[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp =
0xfe2fba54), line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34,
ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c"
[6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds =
0xfe2fbdb0), line 6793 in "afsfileprocs.c"
[7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109
in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp =
0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) up
Current function is h_GetHost_r
1118 host = h_Lookup_r(haddr, hport, &held);
(dbx) dump
interfValid = 0
hoststr = "¼þ/¹\030"
haddr = 2303680610U
interf = RECORD
hoststr2 = "LSþ/¸¸ÿ\031\037äþ/¹Ôþ/"
oldHost = 0x1
identP = (nil)
caps = RECORD
hport = 7001U
cb_conn = (nil)
code = 0
host = (nil)
oheld = 98
held = 0
tcon = 0x11f3688
(dbx) list
1118 host = h_Lookup_r(haddr, hport, &held);
1119 identP = (struct Identity *)rx_GetSpecific(tcon,
rxcon_ident_key);
1120 if (host && !identP && !(host->Console & 1)) {
1121 /* This is a new connection, and we already have a host
1122 * structure for this address. Verify that the identity
1123 * of the caller matches the identity in the host structure.
1124 */
1125 h_Lock_r(host);
1126 if (!(host->hostFlags & ALTADDR)) {
1127 /* Another thread is doing initialization */
(dbx) where
current thread: [EMAIL PROTECTED]
[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54),
line 638 in "host.c"
=>[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34,
ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c"
[6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds =
0xfe2fbdb0), line 6793 in "afsfileprocs.c"
[7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109
in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp =
0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) up
Current function is h_FindClient_r
1676 host = h_GetHost_r(tcon); /* Returns it h_Held */
(dbx) dump
tinst = ""
client = (nil)
expTime = 2147483647
created = 0
code = 1
host = 0x543878
fail = 0
tname = ""
tcell = ""
authClass = 0
tcon = 0x11f3688
viceid = 32766
oldClient = (nil)
uname = ""
(dbx) list
1676 host = h_GetHost_r(tcon); /* Returns it h_Held */
1677
1678 retryfirstclient:
1679 /* First try to find the client structure */
1680 for (client = host->FirstClient; client; client =
client->next) {
1681 if (!client->deleted && (client->sid == rxr_CidOf(tcon))
1682 && (client->VenusEpoch == rxr_GetEpoch(tcon))) {
1683 if (client->tcon && (client->tcon != tcon)) {
1684 ViceLog(0,
1685 ("*** Vid=%d, sid=%x, tcon=%x,
Tcon=%x ***\n",
(dbx) up
Current function is CallPreamble
317 tclient = h_FindClient_r(*tconn);
(dbx) where
current thread: [EMAIL PROTECTED]
[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54),
line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
=>[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn =
0xfe2fbd34, ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c"
[6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds =
0xfe2fbdb0), line 6793 in "afsfileprocs.c"
[7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109
in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp =
0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) dump
hoststr = "þ/½\034"
tconn = 0xfe2fbd34
hoststr2 = ""
thost = (nil)
code = 0
retry_flag = 1
activecall = 0
ahostp = 0xfe2fbd30
acall = 0x11a2340
tclient = 0xc31ea8
(dbx) list
317 tclient = h_FindClient_r(*tconn);
318 thost = tclient->host;
319 if (tclient->prfail == 1) { /* couldn't get the CPS */
320 if (!retry_flag) {
321 h_ReleaseClient_r(tclient);
322 h_Release_r(thost);
323 ViceLog(0, ("CallPreamble: Couldn't get CPS. Fail\n"));
324 H_UNLOCK;
325 return -1001;
326 }
(dbx) up
Current function is SRXAFS_GetTime
6793 if ((code = CallPreamble(acall, NOTACTIVECALL, &tcon, &thost)))
(dbx) where
current thread: [EMAIL PROTECTED]
[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54),
line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34,
ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c"
=>[6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds =
0xfe2fbdb0), line 6793 in "afsfileprocs.c"
[7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109
in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp =
0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) dump
elapsedTime = RECORD
opStartTime = RECORD
opP = 0x167aa4
thost = 0x10cae80
code = 0
tpl = RECORD
USeconds = 0xfe2fbdb0
tcon = 0x11f3688
opStopTime = RECORD
acall = 0x11a2340
Seconds = 0xfe2fbdb4
(dbx) list
6793 if ((code = CallPreamble(acall, NOTACTIVECALL, &tcon, &thost)))
6794 goto Bad_GetTime;
6795
6796 FS_LOCK;
6797 AFSCallStats.GetTime++, AFSCallStats.TotalCalls++;
6798 FS_UNLOCK;
6799 TM_GetTimeOfDay(&tpl, 0);
6800 *Seconds = tpl.tv_sec;
6801 *USeconds = tpl.tv_usec;
6802
(dbx)
And today
bash-2.05$ dbx fileserver
core_jplis-fil-afs32_fileserver_0_0_1150225984_3201
For information about new features see `help changes'
To remove this message, put `dbxenv suppress_startup_message 7.4' in
your .dbxrc
Reading fileserver
core file header read successfully
Reading ld.so.1
Reading libpthread.so.1
Reading libsocket.so.1
Reading libresolv.so.2
Reading libnsl.so.1
Reading libintl.so.1
Reading libdl.so.1
Reading libc.so.1
Reading libmp.so.2
Reading libc_psr.so.1
Reading libthread.so.1
Reading nss_files.so.1
WARNING!!
A loadobject was found with an unexpected checksum value.
See `help core mismatch' for details, and run `proc -map'
to see what checksum values were expected and found.
dbx: warning: Some symbolic information might be incorrect.
[EMAIL PROTECTED] ([EMAIL PROTECTED]) terminated by signal SEGV (no mapping at
the fault address)
Current function is h_gethostcps_r
490 host->hcpsfailed = 0;
(dbx) where
current thread: [EMAIL PROTECTED]
=>[1] h_gethostcps_r(host = (nil), now = 1150225984), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2313442094U, hport = 7001U, heldp =
0xf89fba54), line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x137fef0), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x137fef0), line 1676 in "host.c"
[5] CallPreamble(acall = 0x1267388, activecall = 0, tconn =
0xf89fbd34, ahostp = 0xf89fbd30), line 317 in "afsfileprocs.c"
[6] SRXAFS_GetTime(acall = 0x1267388, Seconds = 0xf89fbdb4, USeconds =
0xf89fbdb0), line 6793 in "afsfileprocs.c"
[7] _RXAFS_GetTime(z_call = 0x1267388, z_xdrs = 0xf89fbe38), line 1109
in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x1267388), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 50, newcall = (nil), socketp =
0xf89fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) up
Current function is h_Lookup_r
638 h_gethostcps_r(host, now);
(dbx) print host
host = 0x11f35d0
(dbx) print *host
*host = {
next = 0x12da4a8
prev = 0x12e63a0
callback_rxcon = 0x1301108
holds = (0, 262144, 0, 134217728, 0)
host = 2313442094U
port = 7001U
Console = '\0'
hostFlags = 69U
InSameNetwork = '\0'
dummy = ""
hcpsfailed = '\0'
hcps = {
prlist_len = 6U
prlist_val = 0x11c67a8
}
LastCall = 1150225384U
ActiveCall = 1150214360U
FirstClient = 0x12c2ff0
cpsCall = 1150225984U
interface = 0x12991a0
cblist = 401U
index = 962U
lock = {
wait_states = '\0'
excl_locked = '\0'
readers_reading = '\0'
num_waiting = '\0'
mutex = {
__pthread_mutex_flags = {
__pthread_mutex_flag1 = 4U
__pthread_mutex_flag2 = '\0'
__pthread_mutex_ceiling = '\0'
__pthread_mutex_type = 0
__pthread_mutex_magic = 19800U
}
__pthread_mutex_lock = {
__pthread_mutex_lock64 = {
__pthread_mutex_pad = ""
}
__pthread_mutex_lock32 = {
__pthread_ownerpid = 0
__pthread_lockword = 0
}
__pthread_mutex_owner64 = 0
}
__pthread_mutex_data = 0
}
read_cv = {
__pthread_cond_flags = {
__pthread_cond_flag = ""
__pthread_cond_type = 0
__pthread_cond_magic = 17238U
}
__pthread_cond_data = 0
}
write_cv = {
__pthread_cond_flags = {
__pthread_cond_flag = ""
__pthread_cond_type = 0
__pthread_cond_magic = 17238U
}
__pthread_cond_data = 0
}
}
cond = {
__pthread_cond_flags = {
__pthread_cond_flag = ""
__pthread_cond_type = 0
__pthread_cond_magic = 17238U
}
__pthread_cond_data = 0
}
}
(dbx) down
Current function is h_gethostcps_r
490 host->hcpsfailed = 0;
(dbx) print host
host = (nil)
(dbx) print *host
dbx: reference through nil pointer
(dbx)
_______________________________________________
OpenAFS-devel mailing list
[email protected]
https://lists.openafs.org/mailman/listinfo/openafs-devel