Thanks for the patck. It fixed the signal 10 error, and substituted it for a signal 11 error! Here's the scoop:
Contents of /vice/srv/SrvLog:
Partition /vicepa: inodes in use: 0, total: 2097152.
14:29:49 Partition /vicepa: 63091568K available (minfree=4%), 51727984K free.
14:29:49 The server (pid 8910) can be controlled using volutil commands
14:29:49 "volutil -help" will give you a list of these commands
14:29:49 If desperate,
"kill -SIGWINCH 8910" will increase debugging level
14:29:49 "kill -SIGUSR2 8910" will set debugging level to zero
14:29:49 "kill -9 8910" will kill a runaway server
14:29:49 ****** FILE SERVER INTERRUPTED BY SIGNAL 11 ******
14:29:49 ****** Aborting outstanding transactions, stand by...
14:29:49 Uncommitted transactions: 0
14:29:49 Uncommitted transactions: 0
14:29:49 Committing suicide now ........
Contents of /vice/srv/SrvErr
Assertion failed: 0, file "srv.cc", line 302
EXITING! Bye!
Running it through gdb with flags -d 1 and backtracing shows:
blossom: {240} gdb /usr/local/sbin/codasrv
(gdb) run -d 1
Starting program: /usr/local/sbin/codasrv -d 1
Setting debuglevel to 1
Program received signal SIGSEGV, Segmentation fault.
0x40403364 in bcopy () from /usr/local/lib/libc.so.12
(gdb) bt
#0 0x40403364 in bcopy () from /usr/local/lib/libc.so.12
#1 0x00088bc0 in readints (f=0xffffffff, a=0xffffc568, b=0xffffc564, pos=2659)
at rwcdb_pack.h:73
#2 0x00087d4c in rwcdb_find (c=0x129000,
k=0xe86e0 "NAMESystem:Administrators", klen=25) at rwcdb.c:272
#3 0x0008731c in PDB_db_read (h=0x129000, id=0,
name=0xffffffff <Error reading address 0xffffffff: Invalid argument>,
data="" size=0xffffc650) at pdbdb.c:288
#4 0x00086230 in PDB_readProfile_byname (h=0x129000,
name=0x94710 "System:Administrators", r=0xffffc6c0) at pdbprofile.c:107
#5 0x000858e0 in PDB_lookupByName (name=0x94710 "System:Administrators",
id=0xcf860) at pdb.c:385
#6 0x000848e4 in AL_NameToId (
Name=0xffffffff <Error reading address 0xffffffff: Invalid argument>,
Id=0xcf860) at alprocs.c:429
#7 0x0001401c in main (argc=-1, argv=0xcf860) at srv.cc:483
#8 0x00013840 in ___start ()
(gdb) up
#1 0x00088bc0 in readints (f=0xffffffff, a=0xffffc568, b=0xffffc564, pos=2659)
at rwcdb_pack.h:73
73 bcopy(&t, buf, sizeof(struct rwcdb_tuple));
Current language: auto; currently c
(gdb) up
#2 0x00087d4c in rwcdb_find (c=0x129000,
k=0xe86e0 "NAMESystem:Administrators", klen=25) at rwcdb.c:272
272 if (readints(&c->rf, &hash2, &pos, cur_pos))
(gdb) up
#3 0x0008731c in PDB_db_read (h=0x129000, id=0,
name=0xffffffff <Error reading address 0xffffffff: Invalid argument>,
data="" size=0xffffc650) at pdbdb.c:288
288 rc = rwcdb_find(&h->main, namekey, strlen(namekey));
(gdb) up
#4 0x00086230 in PDB_readProfile_byname (h=0x129000,
name=0x94710 "System:Administrators", r=0xffffc6c0) at pdbprofile.c:107
107 PDB_db_read(h, 0, name, &data, &size);
(gdb) up
#5 0x000858e0 in PDB_lookupByName (name=0x94710 "System:Administrators",
id=0xcf860) at pdb.c:385
385 PDB_readProfile_byname(h, name, &r);
(gdb) up
#6 0x000848e4 in AL_NameToId (
Name=0xffffffff <Error reading address 0xffffffff: Invalid argument>,
Id=0xcf860) at alprocs.c:429
429 PDB_lookupByName(Name, (int32_t *) Id);
(gdb) up
#7 0x0001401c in main (argc=-1, argv=0xcf860) at srv.cc:483
483 if (AL_NameToId(PRS_ADMINGROUP, &SystemId) ||
Current language: auto; currently c++
(gdb) up
#8 0x00013840 in ___start ()
(gdb) i frame
Stack level 8, frame at 0xffffc990:
pc = 0x13840 in ___start; saved pc 0x13794
caller of frame at 0xffffc928
Arglist at 0xffffc990, args:
Locals at 0xffffc990, Previous frame's sp in sp
(gdb) list
478 DIR_Init(DIR_DATA_IN_VM);
479
480 stat(CODADB, &buff);
481 pdbtime = (int)buff.st_mtime;
482 CODA_ASSERT(AL_Initialize(AL_VERSION) == 0);
483 if (AL_NameToId(PRS_ADMINGROUP, &SystemId) ||
484 AL_NameToId(PRS_ANYUSERGROUP, &AnyUserId)) {
485 SLog(0, "Failed to find '" PRS_ADMINGROUP "' or '" PRS_ANYUSERGROUP
486 "' in the pdb database.");
487 CODA_ASSERT(0 && "check pdb database");
(gdb)
Any thoughts?
Thanks, Sean
[EMAIL PROTECTED]
On 4/25/06, Greg Troxel <[EMAIL PROTECTED]> wrote:
The code in rwcdb_pack.h doesn't check for alignment.
Try this:
--- rwcdb_pack.h.~1.4.~ 2005-06-20 08:45:54.000000000 -0400
+++ rwcdb_pack.h 2006-04-25 13:24:23.000000000 -0400
@@ -59,6 +59,7 @@
static __inline__ void packints(char *buf, const u_int32_t a, const u_int32_t b)
{
struct rwcdb_tuple *p = (struct rwcdb_tuple *)buf;
+ /* XXX alignment */
p->a = SWAP_OUT(a);
p->b = SWAP_OUT(b);
}
@@ -66,8 +67,16 @@
static __inline__ void unpackints(char *buf, u_int32_t *a, u_int32_t *b)
{
struct rwcdb_tuple *p = (struct rwcdb_tuple *)buf;
- *a = SWAP_IN(p->a);
- *b = SWAP_IN(p->b);
+ /* XXX cast is not right */
+ if ((long) p & 0x3) {
+ struct rwcdb_tuple t;
+ bcopy(&t, buf, sizeof(struct rwcdb_tuple));
+ *a = SWAP_IN(t.a);
+ *b = SWAP_IN(t.b);
+ } else {
+ *a = SWAP_IN(p->a);
+ *b = SWAP_IN(p->b);
+ }
}
#endif /* _RWCDB_PACK_H_ */
--
Greg Troxel <[EMAIL PROTECTED]>