> Meanwhile another process, waiting for the grower to finish, is
> spinning forever at 100% doing the mutex_exit/mutex_enter/ERESTART
> thing on the same pool. It looks to me like the grower never actually
> gets scheduled to run.

The attached diff works around the problem by not releasing the lock
during allocation in the PR_NOWAIT case. I don't know if doing it that
way could have any negative side effects?
The machine has survived for 30+ minutes where it previously hung after
just 20 seconds.

Kind regards,
-Tobias
Index: subr_pool.c
===================================================================
RCS file: /cvsroot/src/sys/kern/subr_pool.c,v
retrieving revision 1.219
diff -u -r1.219 subr_pool.c
--- subr_pool.c 16 Dec 2017 03:13:29 -0000      1.219
+++ subr_pool.c 29 Dec 2017 14:33:40 -0000
@@ -1091,7 +1091,9 @@
        if ((flags & PR_WAITOK) == 0)
                pp->pr_flags |= PR_GROWINGNOWAIT;
 
-       mutex_exit(&pp->pr_lock);
+       if (flags & PR_WAITOK)
+               mutex_exit(&pp->pr_lock);
+
        char *cp = pool_allocator_alloc(pp, flags);
        if (__predict_false(cp == NULL))
                goto out;
@@ -1102,7 +1104,8 @@
                goto out;
        }
 
-       mutex_enter(&pp->pr_lock);
+       if (flags & PR_WAITOK)
+               mutex_enter(&pp->pr_lock);
        pool_prime_page(pp, cp, ph);
        pp->pr_npagealloc++;
        KASSERT(pp->pr_flags & PR_GROWING);
@@ -1115,8 +1118,9 @@
        return 0;
 out:
        KASSERT(pp->pr_flags & PR_GROWING);
+       if (flags & PR_WAITOK)
+               mutex_enter(&pp->pr_lock);
        pp->pr_flags &= ~(PR_GROWING|PR_GROWINGNOWAIT);
-       mutex_enter(&pp->pr_lock);
        return ENOMEM;
 }
 

Reply via email to