One of our recently installed 1.4.1b fileservers dumped core in host.c yesterday (6/12/2006) and again today about half an hour ago.

Both cores print to h_gethostcps_r; I must be missing something, because it looks like the call to h_gethost_cps is correct but the pointer to the host structure is magically (since I can't figure out where it happens :) null when the function starts to execute.

Kim Kimball

dhk at ccre.com

bash-2.05$ dbx fileserver core_jplis-fil-afs32_fileserver_0_0_1150115729_2944
For information about new features see `help changes'
To remove this message, put `dbxenv suppress_startup_message 7.4' in your .dbxrc
Reading fileserver
core file header read successfully
Reading ld.so.1
Reading libpthread.so.1
Reading libsocket.so.1
Reading libresolv.so.2
Reading libnsl.so.1
Reading libintl.so.1
Reading libdl.so.1
Reading libc.so.1
Reading libmp.so.2
Reading libc_psr.so.1
Reading libthread.so.1
Reading nss_files.so.1
WARNING!!
A loadobject was found with an unexpected checksum value.
See `help core mismatch' for details, and run `proc -map'
to see what checksum values were expected and found.
dbx: warning: Some symbolic information might be incorrect.
[EMAIL PROTECTED] ([EMAIL PROTECTED]) terminated by signal SEGV (no mapping at 
the fault address)
Current function is h_gethostcps_r
490           host->hcpsfailed = 0;
(dbx) where
current thread: [EMAIL PROTECTED]
=>[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54), line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34, ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c" [6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds = 0xfe2fbdb0), line 6793 in "afsfileprocs.c" [7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109 in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp = 0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) dump
slept = 0
code = 0
host = (nil)
now = 1150115729
(dbx) list
490           host->hcpsfailed = 0;
491
492       host->hostFlags &= ~HCPS_INPROGRESS;
493       /* signal all who are waiting */
494 if (host->hostFlags & HCPS_WAITING) { /* somebody is waiting */
495           host->hostFlags &= ~HCPS_WAITING;
496   #ifdef AFS_PTHREAD_ENV
497           assert(pthread_cond_broadcast(&host->cond) == 0);
498   #else /* AFS_PTHREAD_ENV */
499 if ((code = LWP_NoYieldSignal(&(host->hostFlags))) != LWP_SUCCESS)
(dbx) print host
host = (nil)
(dbx) print *host
dbx: reference through nil pointer
(dbx) # Assigned to null pointer to structure
(dbx) up
Current function is h_Lookup_r
638                   h_gethostcps_r(host, now);
(dbx) dump
index = 98
haddr = 2303680610U
hport = 7001U
heldp = 0xfe2fba54
host = 0x113e628
now = 1150115729
chain = 0x11b37b0
(dbx) list
638                   h_gethostcps_r(host, now);
639               }
640               break;
641           }
642           host = NULL;
643       }
644       return host;
645
646   }                               /*h_Lookup */
647
(dbx) where
current thread: [EMAIL PROTECTED]
[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
=>[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54), line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34, ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c" [6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds = 0xfe2fbdb0), line 6793 in "afsfileprocs.c" [7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109 in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp = 0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) up
Current function is h_GetHost_r
1118       host = h_Lookup_r(haddr, hport, &held);
(dbx) dump
interfValid = 0
hoststr = "¼þ/¹\030"
haddr = 2303680610U
interf = RECORD
hoststr2 = "LSþ/¸¸ÿ\031\037äþ/¹Ôþ/"
oldHost = 0x1
identP = (nil)
caps = RECORD
hport = 7001U
cb_conn = (nil)
code = 0
host = (nil)
oheld = 98
held = 0
tcon = 0x11f3688
(dbx) list
1118       host = h_Lookup_r(haddr, hport, &held);
1119 identP = (struct Identity *)rx_GetSpecific(tcon, rxcon_ident_key);
1120       if (host && !identP && !(host->Console & 1)) {
1121           /* This is a new connection, and we already have a host
1122            * structure for this address. Verify that the identity
1123            * of the caller matches the identity in the host structure.
1124            */
1125           h_Lock_r(host);
1126           if (!(host->hostFlags & ALTADDR)) {
1127               /* Another thread is doing initialization */
(dbx) where
current thread: [EMAIL PROTECTED]
[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54), line 638 in "host.c"
=>[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34, ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c" [6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds = 0xfe2fbdb0), line 6793 in "afsfileprocs.c" [7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109 in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp = 0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) up
Current function is h_FindClient_r
1676           host = h_GetHost_r(tcon);       /* Returns it h_Held */
(dbx) dump
tinst = ""
client = (nil)
expTime = 2147483647
created = 0
code = 1
host = 0x543878
fail = 0
tname = ""
tcell = ""
authClass = 0
tcon = 0x11f3688
viceid = 32766
oldClient = (nil)
uname = ""
(dbx) list
1676           host = h_GetHost_r(tcon);       /* Returns it h_Held */
1677
1678       retryfirstclient:
1679           /* First try to find the client structure */
1680 for (client = host->FirstClient; client; client = client->next) {
1681               if (!client->deleted && (client->sid == rxr_CidOf(tcon))
1682                   && (client->VenusEpoch == rxr_GetEpoch(tcon))) {
1683                   if (client->tcon && (client->tcon != tcon)) {
1684                       ViceLog(0,
1685 ("*** Vid=%d, sid=%x, tcon=%x, Tcon=%x ***\n",
(dbx) up
Current function is CallPreamble
317       tclient = h_FindClient_r(*tconn);
(dbx) where
current thread: [EMAIL PROTECTED]
[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54), line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
=>[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34, ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c" [6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds = 0xfe2fbdb0), line 6793 in "afsfileprocs.c" [7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109 in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp = 0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) dump
hoststr = "þ/½\034"
tconn = 0xfe2fbd34
hoststr2 = ""
thost = (nil)
code = 0
retry_flag = 1
activecall = 0
ahostp = 0xfe2fbd30
acall = 0x11a2340
tclient = 0xc31ea8
(dbx) list
317       tclient = h_FindClient_r(*tconn);
318       thost = tclient->host;
319       if (tclient->prfail == 1) { /* couldn't get the CPS */
320           if (!retry_flag) {
321               h_ReleaseClient_r(tclient);
322               h_Release_r(thost);
323               ViceLog(0, ("CallPreamble: Couldn't get CPS. Fail\n"));
324               H_UNLOCK;
325               return -1001;
326           }
(dbx) up
Current function is SRXAFS_GetTime
6793       if ((code = CallPreamble(acall, NOTACTIVECALL, &tcon, &thost)))
(dbx) where
current thread: [EMAIL PROTECTED]
[1] h_gethostcps_r(host = (nil), now = 1150115729), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2303680610U, hport = 7001U, heldp = 0xfe2fba54), line 638 in "host.c"
[3] h_GetHost_r(tcon = 0x11f3688), line 1118 in "host.c"
[4] h_FindClient_r(tcon = 0x11f3688), line 1676 in "host.c"
[5] CallPreamble(acall = 0x11a2340, activecall = 0, tconn = 0xfe2fbd34, ahostp = 0xfe2fbd30), line 317 in "afsfileprocs.c" =>[6] SRXAFS_GetTime(acall = 0x11a2340, Seconds = 0xfe2fbdb4, USeconds = 0xfe2fbdb0), line 6793 in "afsfileprocs.c" [7] _RXAFS_GetTime(z_call = 0x11a2340, z_xdrs = 0xfe2fbe38), line 1109 in "afsint.ss.c"
[8] RXAFS_ExecuteRequest(z_call = 0x11a2340), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 98, newcall = (nil), socketp = 0xfe2fbf34), line 1407 in "rx.c"
[10] rx_ServerProc(), line 300 in "rx_pthread.c"
[11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) dump
elapsedTime = RECORD
opStartTime = RECORD
opP = 0x167aa4
thost = 0x10cae80
code = 0
tpl = RECORD
USeconds = 0xfe2fbdb0
tcon = 0x11f3688
opStopTime = RECORD
acall = 0x11a2340
Seconds = 0xfe2fbdb4
(dbx) list
6793       if ((code = CallPreamble(acall, NOTACTIVECALL, &tcon, &thost)))
6794           goto Bad_GetTime;
6795
6796       FS_LOCK;
6797       AFSCallStats.GetTime++, AFSCallStats.TotalCalls++;
6798       FS_UNLOCK;
6799       TM_GetTimeOfDay(&tpl, 0);
6800       *Seconds = tpl.tv_sec;
6801       *USeconds = tpl.tv_usec;
6802
(dbx)


And today

bash-2.05$ dbx fileserver core_jplis-fil-afs32_fileserver_0_0_1150225984_3201
For information about new features see `help changes'
To remove this message, put `dbxenv suppress_startup_message 7.4' in your .dbxrc
Reading fileserver
core file header read successfully
Reading ld.so.1
Reading libpthread.so.1
Reading libsocket.so.1
Reading libresolv.so.2
Reading libnsl.so.1
Reading libintl.so.1
Reading libdl.so.1
Reading libc.so.1
Reading libmp.so.2
Reading libc_psr.so.1
Reading libthread.so.1
Reading nss_files.so.1
WARNING!!
A loadobject was found with an unexpected checksum value.
See `help core mismatch' for details, and run `proc -map'
to see what checksum values were expected and found.
dbx: warning: Some symbolic information might be incorrect.
[EMAIL PROTECTED] ([EMAIL PROTECTED]) terminated by signal SEGV (no mapping at 
the fault address)
Current function is h_gethostcps_r
 490           host->hcpsfailed = 0;
(dbx) where
current thread: [EMAIL PROTECTED]
=>[1] h_gethostcps_r(host = (nil), now = 1150225984), line 490 in "host.c"
[2] h_Lookup_r(haddr = 2313442094U, hport = 7001U, heldp = 0xf89fba54), line 638 in "host.c"
 [3] h_GetHost_r(tcon = 0x137fef0), line 1118 in "host.c"
 [4] h_FindClient_r(tcon = 0x137fef0), line 1676 in "host.c"
[5] CallPreamble(acall = 0x1267388, activecall = 0, tconn = 0xf89fbd34, ahostp = 0xf89fbd30), line 317 in "afsfileprocs.c" [6] SRXAFS_GetTime(acall = 0x1267388, Seconds = 0xf89fbdb4, USeconds = 0xf89fbdb0), line 6793 in "afsfileprocs.c" [7] _RXAFS_GetTime(z_call = 0x1267388, z_xdrs = 0xf89fbe38), line 1109 in "afsint.ss.c"
 [8] RXAFS_ExecuteRequest(z_call = 0x1267388), line 1941 in "afsint.ss.c"
[9] rxi_ServerProc(threadID = 50, newcall = (nil), socketp = 0xf89fbf34), line 1407 in "rx.c"
 [10] rx_ServerProc(), line 300 in "rx_pthread.c"
 [11] server_entry(argp = 0xc84a0), line 98 in "rx_pthread.c"
(dbx) up
Current function is h_Lookup_r
 638                   h_gethostcps_r(host, now);
(dbx) print host
host = 0x11f35d0
(dbx) print *host
*host = {
   next           = 0x12da4a8
   prev           = 0x12e63a0
   callback_rxcon = 0x1301108
   holds          = (0, 262144, 0, 134217728, 0)
   host           = 2313442094U
   port           = 7001U
   Console        = '\0'
   hostFlags      = 69U
   InSameNetwork  = '\0'
   dummy          = ""
   hcpsfailed     = '\0'
   hcps           = {
       prlist_len = 6U
       prlist_val = 0x11c67a8
   }
   LastCall       = 1150225384U
   ActiveCall     = 1150214360U
   FirstClient    = 0x12c2ff0
   cpsCall        = 1150225984U
   interface      = 0x12991a0
   cblist         = 401U
   index          = 962U
   lock           = {
       wait_states     = '\0'
       excl_locked     = '\0'
       readers_reading = '\0'
       num_waiting     = '\0'
       mutex           = {
           __pthread_mutex_flags = {
               __pthread_mutex_flag1   = 4U
               __pthread_mutex_flag2   = '\0'
               __pthread_mutex_ceiling = '\0'
               __pthread_mutex_type    = 0
               __pthread_mutex_magic   = 19800U
           }
           __pthread_mutex_lock  = {
               __pthread_mutex_lock64  = {
                   __pthread_mutex_pad = ""
               }
               __pthread_mutex_lock32  = {
                   __pthread_ownerpid = 0
                   __pthread_lockword = 0
               }
               __pthread_mutex_owner64 = 0
           }
           __pthread_mutex_data  = 0
       }
       read_cv         = {
           __pthread_cond_flags = {
               __pthread_cond_flag  = ""
               __pthread_cond_type  = 0
               __pthread_cond_magic = 17238U
           }
           __pthread_cond_data  = 0
       }
       write_cv        = {
           __pthread_cond_flags = {
               __pthread_cond_flag  = ""
               __pthread_cond_type  = 0
               __pthread_cond_magic = 17238U
           }
           __pthread_cond_data  = 0
       }
   }
   cond           = {
       __pthread_cond_flags = {
           __pthread_cond_flag  = ""
           __pthread_cond_type  = 0
           __pthread_cond_magic = 17238U
       }
       __pthread_cond_data  = 0
   }
}
(dbx) down
Current function is h_gethostcps_r
 490           host->hcpsfailed = 0;
(dbx) print host
host = (nil)
(dbx) print *host
dbx: reference through nil pointer
(dbx)

_______________________________________________
OpenAFS-devel mailing list
[email protected]
https://lists.openafs.org/mailman/listinfo/openafs-devel

Reply via email to