On Mon, Oct 01, 2018 at 01:50:59PM +0200, Hrvoje Popovski wrote:
> Hi all,
>
> while testing sasha's "pfsync: avoid a recursion on PF_LOCK" diff i
> manage to get panic. first i thought that this panic have something to
> do with sasha@ work but i can easily reproduce it on clean -current.
>
> while firewall is under stress and forwarding traffic and i'm doing this
> in loop
>
> ifconfig pfsync0 destroy && sleep 2 && sh netstart pfsync0 && sleep 2
>
> i'm getting this panic:
>
>
> uvm_fault(0xffffffff81d51fe8, 0x8, 0, 2) -> e
> kernel: page fault trap, code=0
> Stopped at softclock_thread+0xef: movq %rdx,0x8(%rcx)
> ddb{0}>
pfsync_clone_destroy() lacks proper locking and its timeout cancellation
is not robust. Please try the patch below.
Index: net/if_pfsync.c
===================================================================
RCS file: src/sys/net/if_pfsync.c,v
retrieving revision 1.259
diff -u -p -r1.259 if_pfsync.c
--- net/if_pfsync.c 11 Sep 2018 07:53:38 -0000 1.259
+++ net/if_pfsync.c 1 Oct 2018 12:03:16 -0000
@@ -333,9 +333,9 @@ pfsync_clone_create(struct if_clone *ifc
ifp->if_hdrlen = sizeof(struct pfsync_header);
ifp->if_mtu = ETHERMTU;
ifp->if_xflags = IFXF_CLONED;
- timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc);
- timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
- timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
+ timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL);
+ timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL);
+ timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL);
if_attach(ifp);
if_alloc_sadl(ifp);
@@ -359,9 +359,8 @@ pfsync_clone_destroy(struct ifnet *ifp)
struct pfsync_softc *sc = ifp->if_softc;
struct pfsync_deferral *pd;
- timeout_del(&sc->sc_bulkfail_tmo);
- timeout_del(&sc->sc_bulk_tmo);
- timeout_del(&sc->sc_tmo);
+ NET_LOCK();
+
#if NCARP > 0
if (!pfsync_sync_ok)
carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
@@ -375,7 +374,11 @@ pfsync_clone_destroy(struct ifnet *ifp)
hook_disestablish(sc->sc_sync_if->if_detachhooks,
sc->sc_dhcookie);
}
+
+ /* XXXSMP breaks atomicity */
+ NET_UNLOCK();
if_detach(ifp);
+ NET_LOCK();
pfsync_drop(sc);
@@ -385,12 +388,17 @@ pfsync_clone_destroy(struct ifnet *ifp)
pfsync_undefer(pd, 0);
}
+ pfsyncif = NULL;
+ timeout_del(&sc->sc_bulkfail_tmo);
+ timeout_del(&sc->sc_bulk_tmo);
+ timeout_del(&sc->sc_tmo);
+
+ NET_UNLOCK();
+
pool_destroy(&sc->sc_pool);
free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
free(sc, M_DEVBUF, sizeof(*sc));
- pfsyncif = NULL;
-
return (0);
}
@@ -1752,6 +1760,9 @@ pfsync_undefer(struct pfsync_deferral *p
NET_ASSERT_LOCKED();
+ if (sc == NULL)
+ return;
+
TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
sc->sc_deferred--;
@@ -2252,11 +2263,14 @@ pfsync_bulk_start(void)
void
pfsync_bulk_update(void *arg)
{
- struct pfsync_softc *sc = arg;
+ struct pfsync_softc *sc;
struct pf_state *st;
int i = 0;
NET_LOCK();
+ sc = pfsyncif;
+ if (sc == NULL)
+ goto out;
st = sc->sc_bulk_next;
for (;;) {
@@ -2287,6 +2301,7 @@ pfsync_bulk_update(void *arg)
break;
}
}
+ out:
NET_UNLOCK();
}
@@ -2316,9 +2331,12 @@ pfsync_bulk_status(u_int8_t status)
void
pfsync_bulk_fail(void *arg)
{
- struct pfsync_softc *sc = arg;
+ struct pfsync_softc *sc;
NET_LOCK();
+ sc = pfsyncif;
+ if (sc == NULL)
+ goto out;
if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
/* Try again */
timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
@@ -2343,6 +2361,7 @@ pfsync_bulk_fail(void *arg)
sc->sc_link_demoted = 0;
DPFPRINTF(LOG_ERR, "failed to receive bulk update");
}
+ out:
NET_UNLOCK();
}