On Sat, Dec 25, 2021 at 09:24:07AM +0100, Hrvoje Popovski wrote:
> On 24.12.2021. 0:55, Alexander Bluhm wrote:
> > I think we can remove the ipsec_in_use workaround now.  The IPsec
> > path is protected with the kernel lock.
> > 
> > There are some issues left:
> > - npppd l2pt ipsecflowinfo is not MP safe
> > - the acquire SA feature is not MP safe
> > - Hrvoje has seen a panic with sasync
> > 
> > If you use one of these, the diff below should trigger crashes faster.
> > If you use only regular IPsec or forwarding, I hope it is stable.
> 
> Hi,
> 
> after hitting sasyncd setup with this diff for some time i've run
> ipsecctl -sa just to see what's going on and box panic

According to ddb output ipsecctl is running in userland and the
panic is triggered by a kernel timeout.  Could it be coincidence?
When I run with 4 softnet threads, I see userland starvation.  Maybe
both timeout and ipsecctl wait for the exclusive netlock and are
executed shortly after each other.

> r620-1# ipsecctl -sa
> uvm_fault(0xffffffff82200c18, 0x417, 0, 2) -> e
> kernel: page fault trap, code=0
> Stopped at      pfsync_delete_tdb+0x84: movq    %rcx,0x8(%rsi)
>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
>  290490  40316      0         0x3          0    3  ipsecctl
>   10869  22801     68        0x10          0    5  sasyncd
>  504041  13202     68        0x10       0x80    1  isakmpd
>  476980   6400      0    0x100000          0    2  ntpd
>  224100  72648      0     0x14000      0x200    4  reaper
> * 75659  10211      0     0x14000 0x40000200    0K softclock
> pfsync_delete_tdb(ffff8000012e8008) at pfsync_delete_tdb+0x84
> tdb_free(ffff8000012e8008) at tdb_free+0x67
> tdb_timeout(ffff8000012e8008) at tdb_timeout+0x7e
> softclock_thread(ffff8000fffff260) at softclock_thread+0x13b
> end trace frame: 0x0, count: 11
> https://www.openbsd.org/ddb.html describes the minimum info required in
> bug reports.  Insufficient info makes it difficult to find and fix bugs.
> ddb{0}>

/home/bluhm/openbsd/cvs/src/sys/net/if_pfsync.c:2519
    5f30:       49 8b 86 10 04 00 00    mov    0x410(%r14),%rax
    5f37:       49 8b 8e 18 04 00 00    mov    0x418(%r14),%rcx
    5f3e:       49 8d 94 24 e8 09 00    lea    0x9e8(%r12),%rdx
    5f45:       00 
    5f46:       48 8d b0 10 04 00 00    lea    0x410(%rax),%rsi
    5f4d:       48 85 c0                test   %rax,%rax
*   5f50:       48 0f 44 f2             cmove  %rdx,%rsi
    5f54:       48 89 4e 08             mov    %rcx,0x8(%rsi)
    5f58:       49 8b 86 10 04 00 00    mov    0x410(%r14),%rax
    5f5f:       49 8b 8e 18 04 00 00    mov    0x418(%r14),%rcx
    5f66:       48 89 01                mov    %rax,(%rcx)
    5f69:       49 c7 86 18 04 00 00    movq   $0xffffffffffffffff,0x418(%r14)
    5f70:       ff ff ff ff 
    5f74:       49 c7 86 10 04 00 00    movq   $0xffffffffffffffff,0x410(%r14)
    5f7b:       ff ff ff ff 
/home/bluhm/openbsd/cvs/src/sys/net/if_pfsync.c:2520

  2508  void
  2509  pfsync_delete_tdb(struct tdb *t)
  2510  {
  2511          struct pfsync_softc *sc = pfsyncif;
  2512          size_t nlen;
  2513  
  2514          if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
  2515                  return;
  2516  
  2517          mtx_enter(&sc->sc_tdb_mtx);
  2518  
* 2519          TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
  2520          CLR(t->tdb_flags, TDBF_PFSYNC);
  2521  
  2522          nlen = sizeof(struct pfsync_tdb);
  2523          if (TAILQ_EMPTY(&sc->sc_tdb_q))
  2524                  nlen += sizeof(struct pfsync_subheader);
  2525          atomic_sub_long(&sc->sc_len, nlen);
  2526  
  2527          mtx_leave(&sc->sc_tdb_mtx);
  2528  }

Most sc_tdb_q are protected by sc_tdb_mtx.  But pfsync_drop_snapshot()
and pfsync_sendout() do not have it.

> rsi                            0x40f
> rcx               0xffffffffffffffff
> rax               0xffffffffffffffff

tqe_next and tqe_prev are -1.  Looks like a double remove.  Can
pfsync_delete_tdb() be called twice?  The tdb_refcnt should enforce
that tdb_free() is only called once per TDB.

>              flags: d1040<DELETED,TUNNELING,USEDTUNNEL,PFSYNC,PFSYNC_RPL>

Flags look reasonable.  But a problem could be that they are not
protected in if_pfsync.  Then set or cleared bits may be lost.

Fix both missing mutexes .  Also put the tdb_sync_entry
modification and TDBF_PFSYNC flag in the same sc_tdb_mtx.

I still have no pf sync setup.  Hrvoje, can you try this?

ok?

bluhm

Index: net/if_pfsync.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.299
diff -u -p -r1.299 if_pfsync.c
--- net/if_pfsync.c     25 Nov 2021 13:46:02 -0000      1.299
+++ net/if_pfsync.c     25 Dec 2021 17:46:06 -0000
@@ -295,7 +295,7 @@ void        pfsync_bulk_update(void *);
 void   pfsync_bulk_fail(void *);
 
 void   pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
-void   pfsync_drop_snapshot(struct pfsync_snapshot *);
+void   pfsync_drop_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
 
 void   pfsync_send_dispatch(void *);
 void   pfsync_send_pkt(struct mbuf *);
@@ -1618,7 +1618,7 @@ pfsync_grab_snapshot(struct pfsync_snaps
 }
 
 void
-pfsync_drop_snapshot(struct pfsync_snapshot *sn)
+pfsync_drop_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc * sc)
 {
        struct pf_state *st;
        struct pfsync_upd_req_item *ur;
@@ -1645,10 +1645,14 @@ pfsync_drop_snapshot(struct pfsync_snaps
                pool_put(&sn->sn_sc->sc_pool, ur);
        }
 
+       mtx_enter(&sc->sc_tdb_mtx);
        while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) {
                TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_entry);
+               mtx_enter(&t->tdb_mtx);
                CLR(t->tdb_flags, TDBF_PFSYNC);
+               mtx_leave(&t->tdb_mtx);
        }
+       mtx_leave(&sc->sc_tdb_mtx);
 }
 
 int
@@ -1675,7 +1679,7 @@ pfsync_drop(struct pfsync_softc *sc)
        struct pfsync_snapshot  sn;
 
        pfsync_grab_snapshot(&sn, sc);
-       pfsync_drop_snapshot(&sn);
+       pfsync_drop_snapshot(&sn, sc);
 }
 
 void
@@ -1767,7 +1771,7 @@ pfsync_sendout(void)
        if (m == NULL) {
                sc->sc_if.if_oerrors++;
                pfsyncstat_inc(pfsyncs_onomem);
-               pfsync_drop_snapshot(&sn);
+               pfsync_drop_snapshot(&sn, sc);
                return;
        }
 
@@ -1777,7 +1781,7 @@ pfsync_sendout(void)
                        m_free(m);
                        sc->sc_if.if_oerrors++;
                        pfsyncstat_inc(pfsyncs_onomem);
-                       pfsync_drop_snapshot(&sn);
+                       pfsync_drop_snapshot(&sn, sc);
                        return;
                }
        }
@@ -1835,14 +1839,18 @@ pfsync_sendout(void)
                subh = (struct pfsync_subheader *)(m->m_data + offset);
                offset += sizeof(*subh);
 
+               mtx_enter(&sc->sc_tdb_mtx);
                count = 0;
                while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) {
                        TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry);
                        pfsync_out_tdb(t, m->m_data + offset);
                        offset += sizeof(struct pfsync_tdb);
+                       mtx_enter(&t->tdb_mtx);
                        CLR(t->tdb_flags, TDBF_PFSYNC);
+                       mtx_leave(&t->tdb_mtx);
                        count++;
                }
+               mtx_leave(&sc->sc_tdb_mtx);
 
                bzero(subh, sizeof(*subh));
                subh->action = PFSYNC_ACT_TDB;
@@ -2489,9 +2497,11 @@ pfsync_update_tdb(struct tdb *t, int out
                        }
 
                        TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
-                       mtx_leave(&sc->sc_tdb_mtx);
-
+                       mtx_enter(&t->tdb_mtx);
                        SET(t->tdb_flags, TDBF_PFSYNC);
+                       mtx_leave(&t->tdb_mtx);
+
+                       mtx_leave(&sc->sc_tdb_mtx);
                        t->tdb_updates = 0;
                } while (0);
        } else {
@@ -2499,10 +2509,12 @@ pfsync_update_tdb(struct tdb *t, int out
                        schednetisr(NETISR_PFSYNC);
        }
 
+       mtx_enter(&t->tdb_mtx);
        if (output)
                SET(t->tdb_flags, TDBF_PFSYNC_RPL);
        else
                CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
+       mtx_leave(&t->tdb_mtx);
 }
 
 void
@@ -2517,7 +2529,9 @@ pfsync_delete_tdb(struct tdb *t)
        mtx_enter(&sc->sc_tdb_mtx);
 
        TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
+       mtx_enter(&t->tdb_mtx);
        CLR(t->tdb_flags, TDBF_PFSYNC);
+       mtx_leave(&t->tdb_mtx);
 
        nlen = sizeof(struct pfsync_tdb);
        if (TAILQ_EMPTY(&sc->sc_tdb_q))

Reply via email to