Aloha -

OK, I seem to have gotten a handle on this thing now.

First, there's a missing mutex unlock in mach_defpager.  I'm attaching two
patches.  One fixes the debug printfs in mach_defpager/default_pager.c,
which obviously haven't been compiled for a while.  Use %p and %lx instead
of %x to silence compiler warnings, and access pthread_mutex_t's internal
structure member __held instead of held when printing mutex state.  The
second patch actual fixes the problem.

Second, the sysvinit scripts are killing mach_defpager during the shutdown
sequence, and this wreaks havoc.  The big culprit is /sbin/killall5, a C
program in the sysvinit-utils package.  It's readproc() function operates
by reading each process's stat file and parsing its startcode and endcode
values (Linux man page proc(5) - the address range of the program text),
and flagging the PID as a 'kernel' process, not to be killed, if these
values are both zero.  Obviously, this doesn't work on hurd.

I've tinkered with several band-aids - strcmp on the program name, not
killing PIDs below 100, but obviously none of this is suitable to submit as
a patch.  killall5's internal logic is just too Linux specific, IMHO.
What's the Hurdish way to do it?  I'm thinking killall5 should check that
'important' flag on the process and skip processes for which that flag is
set.  Yet, I don't understand what that flag is really intended for.  Does
this make sense?

I think this means changing killall5 so it access the Hurd process server
directly, instead of walking /proc.  Incidentally, the program currently
works by mounting /proc if it isn't mounted already - odd behavior for a
program that's supposed to be shutting things done, not starting them up!
Might have problems getting such a Hurd specific patch into the upstream
code base; who knows?

Also, what should the kernel do if it has problems with the default pager?
After I fixed the mutex bug, I started getting a bunch of
memory_object_data_request failed messages on console.  Still mysterious,
but I guess that's better than nothing!  The error code prints in hex, and
when I looked it up it was MACH_SEND_INVALID_DEST.  Is that what you get
when you send to a dead port?

Yet when the mutex locked up, the result was a silent, locked system.  A
timeout of some kind, accompanied with complaints on console, would be
better, I think, but I don't understand the vm code enough to attempt such
a change right now.

Also, there's this proxy-defpager.  Is that the actual default pager,
acting as front end to mach-defpager?  Yet killall5 seems to be able to
kill proxy-defpager without consequence.  I don't understand.

For me, though, I now have a qemu VM that can cleanly start up, use swap,
and shutdown, so I have real sense of accomplishment!

    agape
    brent
--- mach-defpager/default_pager.c.dist.almost	2016-08-16 13:11:00.000000000 -1000
+++ mach-defpager/default_pager.c	2016-08-16 13:11:31.000000000 -1000
@@ -581,7 +581,7 @@
 	/* be paranoid */
 	if (no_partition(pindex))
 	    panic("%sdealloc_page",my_name);
-ddprintf ("pager_dealloc_page(%d,%x,%d)\n",pindex,page,lock_it);
+ddprintf ("pager_dealloc_page(%d,%lx,%d)\n",pindex,page,lock_it);
 	part = partition_of(pindex);
 
 	if (page >= part->total_size)
@@ -1092,7 +1092,7 @@
 #endif
 	if (f_page >= pager->size)
 	  {
-	    ddprintf ("%spager_read_offset pager %x: bad page %d >= size %d",
+	    ddprintf ("%spager_read_offset pager %p: bad page %ld >= size %d",
 		    my_name, pager, f_page, pager->size);
 	    pthread_mutex_unlock(&pager->lock);
 	    return (union dp_map) (union dp_map *) NO_BLOCK;
@@ -1360,7 +1360,7 @@
 	}
 
 	while (f_page >= pager->size) {
-	  ddprintf ("pager_write_offset: extending: %x %x\n", f_page, pager->size);
+	  ddprintf ("pager_write_offset: extending: %lx %x\n", f_page, pager->size);
 
 	    /*
 	     * Paging object must be extended.
@@ -1380,7 +1380,7 @@
 #if	DEBUG_READER_CONFLICTS
 	    pager->readers++;
 #endif
-	    ddprintf ("pager_write_offset: done extending: %x %x\n", f_page, pager->size);
+	    ddprintf ("pager_write_offset: done extending: %lx %x\n", f_page, pager->size);
 	}
 
 	if (INDIRECT_PAGEMAP(pager->size)) {
@@ -1429,7 +1429,7 @@
 	}
 
 	block = mapptr[f_page];
-	ddprintf ("pager_write_offset: block starts as %x[%x] %x\n", mapptr, f_page, block);
+	ddprintf ("pager_write_offset: block starts as %p[%lx] %p\n", mapptr, f_page, block.indirect);
 	if (no_block(block)) {
 	    vm_offset_t	off;
 
@@ -1656,7 +1656,7 @@
 	 * Read it, trying for the entire page.
 	 */
 	offset = ptoa(block.block.p_offset);
-ddprintf ("default_read(%x,%x,%x,%d)\n",addr,size,offset,block.block.p_index);
+ddprintf ("default_read(%lx,%x,%lx,%d)\n",addr,size,offset,block.block.p_index);
 	part   = partition_of(block.block.p_index);
 	first_time = TRUE;
 	*out_addr = addr;
@@ -1723,7 +1723,7 @@
 	vm_size_t		wsize;
 	int		rc;
 
-	ddprintf ("default_write: pager offset %x\n", offset);
+	ddprintf ("default_write: pager offset %lx\n", offset);
 
 	/*
 	 * Find block in paging partition
@@ -1744,7 +1744,7 @@
 	}
 #endif	 /* CHECKSUM */
 	offset = ptoa(block.block.p_offset);
-ddprintf ("default_write(%x,%x,%x,%d)\n",addr,size,offset,block.block.p_index);
+ddprintf ("default_write(%lx,%x,%lx,%d)\n",addr,size,offset,block.block.p_index);
 	part   = partition_of(block.block.p_index);
 
 	/*
@@ -1760,7 +1760,7 @@
 					&wsize);
 	    if (rc != 0) {
 		dprintf("*** PAGER ERROR: default_write: ");
-		dprintf("ds=0x%x addr=0x%x size=0x%x offset=0x%x resid=0x%x\n",
+		dprintf("ds=0x%p addr=0x%lx size=0x%x offset=0x%lx resid=0x%x\n",
 			ds, addr, size, offset, wsize);
 		return (PAGER_ERROR);
 	    }
@@ -1865,7 +1865,7 @@
 	 */
 all_over_again:
 #if debug
-dprintf("Partition x%x (id x%x) for %s, all_ok %d\n", part, id, name, all_ok);
+dprintf("Partition x%p (id x%x) for %s, all_ok %d\n", part, id, name, all_ok);
 #endif
 	all_ok = TRUE;
 	pthread_mutex_lock(&part->p_lock);
@@ -2343,7 +2343,7 @@
 	if (ds == DEFAULT_PAGER_NULL)
 		panic(here, my_name);
 ddprintf ("seqnos_memory_object_terminate <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
-	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
+	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.__held, seqno);
 	pager_port_lock(ds, seqno);
 
 	/*
@@ -2374,7 +2374,7 @@
 	ds->pager_name = MACH_PORT_NULL;
 	ds->name_refs = 0;
 ddprintf ("seqnos_memory_object_terminate <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
-	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
+	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.__held);
 	pager_port_unlock(ds);
 
 	/*
@@ -2486,7 +2486,7 @@
 	if (ds == DEFAULT_PAGER_NULL)
 	    panic(here,my_name);
 ddprintf ("seqnos_memory_object_data_request <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
-	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
+	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.__held, seqno);
 	pager_port_lock(ds, seqno);
 	pager_port_check_request(ds, reply_to);
 	pager_port_wait_for_writers(ds);
@@ -2498,7 +2498,7 @@
 	errors = ds->errors;
 
 ddprintf ("seqnos_memory_object_data_request <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
-	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
+	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.__held);
 	pager_port_unlock(ds);
 
 	if (errors) {
@@ -2588,12 +2588,12 @@
 	if (ds == DEFAULT_PAGER_NULL)
 	    panic(here,my_name);
 ddprintf ("seqnos_memory_object_data_initialize <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
-	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
+	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.__held, seqno);
 	pager_port_lock(ds, seqno);
 	pager_port_check_request(ds, pager_request);
 	pager_port_start_write(ds);
 ddprintf ("seqnos_memory_object_data_initialize <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
-	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
+	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.__held);
 	pager_port_unlock(ds);
 
 	for (amount_sent = 0;

Attachment: default_pager.patch2
Description: Binary data

Reply via email to