hello,

diff below adds a mutex to pf_state. It fixes a NULL pointer dereference panic
reported by Hrvoje sometime ago [1].  Besides adding a mutex to state the diff
addresses a race between pfsync and state purge thread. What happened in this
particular case was that state expired and its state keys got detached while it
was waiting to be processed by pfsync. Once pfsync got to it found state
keys detached and tripped on null pointer dereference. This is the
race change below fixes.

I'm not too much worried about contention on newly introduced mutex.
the thing is it is not a global mutex it is a per state mutex (per object
mutex). I don't expect to see two cpu's will be updating same state
very often.

thanks and
regards
sashan

[1] https://marc.info/?l=openbsd-bugs&m=166006758231954&w=2

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c
index d279ede9cd6..5f92ae6ec45 100644
--- a/sys/net/if_pfsync.c
+++ b/sys/net/if_pfsync.c
@@ -157,16 +157,16 @@ const struct {
 };
 
 struct pfsync_q {
-       void            (*write)(struct pf_state *, void *);
+       int             (*write)(struct pf_state *, void *);
        size_t          len;
        u_int8_t        action;
 };
 
 /* we have one of these for every PFSYNC_S_ */
-void   pfsync_out_state(struct pf_state *, void *);
-void   pfsync_out_iack(struct pf_state *, void *);
-void   pfsync_out_upd_c(struct pf_state *, void *);
-void   pfsync_out_del(struct pf_state *, void *);
+int    pfsync_out_state(struct pf_state *, void *);
+int    pfsync_out_iack(struct pf_state *, void *);
+int    pfsync_out_upd_c(struct pf_state *, void *);
+int    pfsync_out_del(struct pf_state *, void *);
 
 struct pfsync_q pfsync_qs[] = {
        { pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
@@ -1301,24 +1301,26 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
        return (0);
 }
 
-void
+int
 pfsync_out_state(struct pf_state *st, void *buf)
 {
        struct pfsync_state *sp = buf;
 
        pf_state_export(sp, st);
+       return (0);
 }
 
-void
+int
 pfsync_out_iack(struct pf_state *st, void *buf)
 {
        struct pfsync_ins_ack *iack = buf;
 
        iack->id = st->id;
        iack->creatorid = st->creatorid;
+       return (0);
 }
 
-void
+int
 pfsync_out_upd_c(struct pf_state *st, void *buf)
 {
        struct pfsync_upd_c *up = buf;
@@ -1329,9 +1331,10 @@ pfsync_out_upd_c(struct pf_state *st, void *buf)
        pf_state_peer_hton(&st->dst, &up->dst);
        up->creatorid = st->creatorid;
        up->timeout = st->timeout;
+       return (0);
 }
 
-void
+int
 pfsync_out_del(struct pf_state *st, void *buf)
 {
        struct pfsync_del_c *dp = buf;
@@ -1340,6 +1343,7 @@ pfsync_out_del(struct pf_state *st, void *buf)
        dp->creatorid = st->creatorid;
 
        SET(st->state_flags, PFSTATE_NOSYNC);
+       return (0);
 }
 
 void
@@ -1664,8 +1668,8 @@ pfsync_sendout(void)
                        KASSERT(st->snapped == 1);
                        st->sync_state = PFSYNC_S_NONE;
                        st->snapped = 0;
-                       pfsync_qs[q].write(st, m->m_data + offset);
-                       offset += pfsync_qs[q].len;
+                       if (pfsync_qs[q].write(st, m->m_data + offset) == 0)
+                               offset += pfsync_qs[q].len;
 
                        pf_state_unref(st);
                        count++;
diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h
index 5447b829d74..30c1df0de9c 100644
--- a/sys/net/if_pfsync.h
+++ b/sys/net/if_pfsync.h
@@ -326,7 +326,7 @@ int                 pfsync_sysctl(int *, u_int,  void *, 
size_t *,
 #define        PFSYNC_SI_CKSUM         0x02
 #define        PFSYNC_SI_ACK           0x04
 int                    pfsync_state_import(struct pfsync_state *, int);
-void                   pfsync_state_export(struct pfsync_state *,
+int                    pfsync_state_export(struct pfsync_state *,
                            struct pf_state *);
 
 void                   pfsync_insert_state(struct pf_state *);
diff --git a/sys/net/pf.c b/sys/net/pf.c
index c42f76dbc67..1083ee95b9a 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -185,7 +185,8 @@ int                  pf_translate_icmp_af(struct pf_pdesc*, 
int, void *);
 void                    pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
                            sa_family_t, struct pf_rule *, u_int);
 void                    pf_detach_state(struct pf_state *);
-void                    pf_state_key_detach(struct pf_state *, int);
+void                    pf_state_key_detach(struct pf_state *,
+                           struct pf_state_key *);
 u_int32_t               pf_tcp_iss(struct pf_pdesc *);
 void                    pf_rule_to_actions(struct pf_rule *,
                            struct pf_rule_actions *);
@@ -260,6 +261,9 @@ void                         
pf_state_key_unlink_inpcb(struct pf_state_key *);
 void                    pf_inpcb_unlink_state_key(struct inpcb *);
 void                    pf_pktenqueue_delayed(void *);
 int32_t                         pf_state_expires(const struct pf_state *, 
uint8_t);
+void                    pf_state_keys_take(struct pf_state *,
+                           struct pf_state_key **);
+void                    pf_state_keys_rele(struct pf_state_key **);
 
 #if NPFLOG > 0
 void                    pf_log_matches(struct pf_pdesc *, struct pf_rule *,
@@ -779,7 +783,8 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
pf_state *s, int idx)
                s->key[idx] = sk;
 
        if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
-               pf_state_key_detach(s, idx);
+               pf_state_key_detach(s, s->key[idx]);
+               s->key[idx] = NULL;
                return (-1);
        }
        si->s = s;
@@ -799,42 +804,50 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
pf_state *s, int idx)
 void
 pf_detach_state(struct pf_state *s)
 {
-       if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
-               s->key[PF_SK_WIRE] = NULL;
+       struct pf_state_key *key[2];
+
+       mtx_enter(&s->mtx);
+       key[PF_SK_WIRE] = s->key[PF_SK_WIRE];
+       key[PF_SK_STACK] = s->key[PF_SK_STACK];
+       s->key[PF_SK_WIRE] = NULL;
+       s->key[PF_SK_STACK] = NULL;
+       mtx_leave(&s->mtx);
+
+       if (key[PF_SK_WIRE] == key[PF_SK_STACK])
+               key[PF_SK_WIRE] = NULL;
 
-       if (s->key[PF_SK_STACK] != NULL)
-               pf_state_key_detach(s, PF_SK_STACK);
+       if (key[PF_SK_STACK] != NULL)
+               pf_state_key_detach(s, key[PF_SK_STACK]);
 
-       if (s->key[PF_SK_WIRE] != NULL)
-               pf_state_key_detach(s, PF_SK_WIRE);
+       if (key[PF_SK_WIRE] != NULL)
+               pf_state_key_detach(s, key[PF_SK_WIRE]);
 }
 
 void
-pf_state_key_detach(struct pf_state *s, int idx)
+pf_state_key_detach(struct pf_state *s, struct pf_state_key *key)
 {
        struct pf_state_item    *si;
-       struct pf_state_key     *sk;
 
-       if (s->key[idx] == NULL)
+       PF_STATE_ASSERT_LOCKED();
+
+       if (key == NULL)
                return;
 
-       si = TAILQ_FIRST(&s->key[idx]->states);
+       si = TAILQ_FIRST(&key->states);
        while (si && si->s != s)
            si = TAILQ_NEXT(si, entry);
 
        if (si) {
-               TAILQ_REMOVE(&s->key[idx]->states, si, entry);
+               TAILQ_REMOVE(&key->states, si, entry);
                pool_put(&pf_state_item_pl, si);
        }
 
-       sk = s->key[idx];
-       s->key[idx] = NULL;
-       if (TAILQ_EMPTY(&sk->states)) {
-               RB_REMOVE(pf_state_tree, &pf_statetbl, sk);
-               sk->removed = 1;
-               pf_state_key_unlink_reverse(sk);
-               pf_state_key_unlink_inpcb(sk);
-               pf_state_key_unref(sk);
+       if (TAILQ_EMPTY(&key->states)) {
+               RB_REMOVE(pf_state_tree, &pf_statetbl, key);
+               key->removed = 1;
+               pf_state_key_unlink_reverse(key);
+               pf_state_key_unlink_inpcb(key);
+               pf_state_key_unref(key);
        }
 }
 
@@ -997,7 +1010,9 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key 
**skw,
                }
                *skw = s->key[PF_SK_WIRE];
                if (pf_state_key_attach(*sks, s, PF_SK_STACK)) {
-                       pf_state_key_detach(s, PF_SK_WIRE);
+                       pf_state_key_detach(s, s->key[PF_SK_WIRE]);
+                       s->key[PF_SK_WIRE] = NULL;
+                       *skw = NULL;
                        PF_STATE_EXIT_WRITE();
                        return (-1);
                }
@@ -1189,30 +1204,35 @@ pf_find_state_all(struct pf_state_key_cmp *key, u_int 
dir, int *more)
        return (ret ? ret->s : NULL);
 }
 
-void
+int
 pf_state_export(struct pfsync_state *sp, struct pf_state *st)
 {
        int32_t expire;
+       struct pf_state_key *key[2] = { NULL, NULL };
 
        memset(sp, 0, sizeof(struct pfsync_state));
 
        /* copy from state key */
-       sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
-       sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
-       sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
-       sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
-       sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
-       sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af;
-       sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
-       sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
-       sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
-       sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
-       sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
-       sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af;
+       pf_state_keys_take(st, key);
+       if ((key[PF_SK_WIRE] == NULL) || (key[PF_SK_STACK] == NULL))
+               return (-1);
+
+       sp->key[PF_SK_WIRE].addr[0] = key[PF_SK_WIRE]->addr[0];
+       sp->key[PF_SK_WIRE].addr[1] = key[PF_SK_WIRE]->addr[1];
+       sp->key[PF_SK_WIRE].port[0] = key[PF_SK_WIRE]->port[0];
+       sp->key[PF_SK_WIRE].port[1] = key[PF_SK_WIRE]->port[1];
+       sp->key[PF_SK_WIRE].rdomain = htons(key[PF_SK_WIRE]->rdomain);
+       sp->key[PF_SK_WIRE].af = key[PF_SK_WIRE]->af;
+       sp->key[PF_SK_STACK].addr[0] = key[PF_SK_STACK]->addr[0];
+       sp->key[PF_SK_STACK].addr[1] = key[PF_SK_STACK]->addr[1];
+       sp->key[PF_SK_STACK].port[0] = key[PF_SK_STACK]->port[0];
+       sp->key[PF_SK_STACK].port[1] = key[PF_SK_STACK]->port[1];
+       sp->key[PF_SK_STACK].rdomain = htons(key[PF_SK_STACK]->rdomain);
+       sp->key[PF_SK_STACK].af = key[PF_SK_STACK]->af;
        sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
        sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
-       sp->proto = st->key[PF_SK_WIRE]->proto;
-       sp->af = st->key[PF_SK_WIRE]->af;
+       sp->proto = key[PF_SK_WIRE]->proto;
+       sp->af = key[PF_SK_WIRE]->af;
 
        /* copy from state */
        strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
@@ -1259,6 +1279,10 @@ pf_state_export(struct pfsync_state *sp, struct pf_state 
*st)
        sp->set_tos = st->set_tos;
        sp->set_prio[0] = st->set_prio[0];
        sp->set_prio[1] = st->set_prio[1];
+
+       pf_state_keys_rele(key);
+
+       return (0);
 }
 
 int
@@ -4348,6 +4372,7 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, 
struct pf_rule *a,
         * pf_state_inserts() grabs reference for pfsync!
         */
        refcnt_init(&s->refcnt);
+       mtx_init(&s->mtx, IPL_NET);
 
        switch (pd->proto) {
        case IPPROTO_TCP:
@@ -8019,3 +8044,19 @@ pf_pktenqueue_delayed(void *arg)
 
        pool_put(&pf_pktdelay_pl, pdy);
 }
+
+void
+pf_state_keys_take(struct pf_state *st, struct pf_state_key *keys[])
+{
+       mtx_enter(&st->mtx);
+       keys[PF_SK_WIRE] = pf_state_key_ref(st->key[PF_SK_WIRE]);
+       keys[PF_SK_STACK] = pf_state_key_ref(st->key[PF_SK_STACK]);
+       mtx_leave(&st->mtx);
+}
+
+void
+pf_state_keys_rele(struct pf_state_key *keys[])
+{
+       pf_state_key_unref(keys[PF_SK_WIRE]);
+       pf_state_key_unref(keys[PF_SK_STACK]);
+}
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index cc6c9f3dedc..e99f5993783 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -764,6 +764,7 @@ struct pf_state {
        struct pf_sn_head        src_nodes;
        struct pf_state_key     *key[2];        /* addresses stack and wire  */
        struct pfi_kif          *kif;
+       struct mutex             mtx;
        u_int64_t                packets[2];
        u_int64_t                bytes[2];
        int32_t                  creation;
@@ -1739,7 +1740,7 @@ void                               
pf_state_rm_src_node(struct pf_state *,
 extern struct pf_state         *pf_find_state_byid(struct pf_state_cmp *);
 extern struct pf_state         *pf_find_state_all(struct pf_state_key_cmp *,
                                    u_int, int *);
-extern void                     pf_state_export(struct pfsync_state *,
+extern int                      pf_state_export(struct pfsync_state *,
                                    struct pf_state *);
 int                             pf_state_import(const struct pfsync_state *,
                                     int);

Reply via email to