Hrvoje, managed to trigger diagnostic panics with diff [1] sent by bluhm@ some time ago. The panic Hrvoje sees comes from ether_input() here:
414 415 /* 416 * Third phase: bridge processing. 417 * 418 * Give the packet to a bridge interface, ie, bridge(4), 419 * switch(4), or tpmr(4), if it is configured. A bridge 420 * may take the packet and forward it to another port, or it 421 * may return it here to ether_input() to support local 422 * delivery to this port. 423 */ 424 425 ac = (struct arpcom *)ifp; 426 427 smr_read_enter(); 428 eb = SMR_PTR_GET(&ac->ac_brport); 429 if (eb != NULL) { 430 m = (*eb->eb_input)(ifp, m, dst, eb->eb_port); 431 if (m == NULL) { 432 smr_read_leave(); 433 return; 434 } 435 } 436 smr_read_leave(); 437 in current tree the ether_input() is protected by NET_LOCK(), which is grabbed by caller as a writer. bluhm's diff changes NET_LOCK() readlock, so ether_input() can run concurrently. Switching NET_LOCK() to r-lock has implications on smr read section above. The ting is the call to eb->eb_input() can sleep now. This is something what needs to be avoided within smr section. diff below introduces a reference counting to bridge port using atomic ops. the section above is changed to: 415 /* 416 * Third phase: bridge processing. 417 * 418 * Give the packet to a bridge interface, ie, bridge(4), 419 * switch(4), or tpmr(4), if it is configured. A bridge 420 * may take the packet and forward it to another port, or it 421 * may return it here to ether_input() to support local 422 * delivery to this port. 423 */ 424 425 ac = (struct arpcom *)ifp; 426 427 smr_read_enter(); 428 eb = SMR_PTR_GET(&ac->ac_brport); 429 if (eb != NULL) 430 eb->eb_port_take(eb->eb_port); 431 smr_read_leave(); 432 if (eb != NULL) { 433 m = (*eb->eb_input)(ifp, m, dst, eb->eb_port); 434 eb->eb_port_rele(eb->eb_port); 435 if (m == NULL) { 436 return; 437 } 438 } 439 each bridge module must provide its own implementation for eb_port_take()/eb_port_rele(). Not sure if it is a way to go. According to testing done by Hrvoje the diagnostic panic disappears. So we can say diff solves the problem, but there might be better ways to solve it. there are other bugs to kill as pointed out here [2]. thanks and regards sashan [1] https://marc.info/?l=openbsd-tech&m=161903387904923&w=2 [2] https://marc.info/?l=openbsd-tech&m=162099270106067&w=2 --------8<---------------8<---------------8<------------------8<-------- diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 7dbcf4a2ab0..2afcbb296a0 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -138,6 +138,8 @@ int bridge_ipsec(struct ifnet *, struct ether_header *, int, struct llc *, #endif int bridge_clone_create(struct if_clone *, int); int bridge_clone_destroy(struct ifnet *); +void bridge_take(void *); +void bridge_rele(void *); #define ETHERADDR_IS_IP_MCAST(a) \ /* struct etheraddr *a; */ \ @@ -152,6 +154,8 @@ struct if_clone bridge_cloner = const struct ether_brport bridge_brport = { bridge_input, + bridge_take, + bridge_rele, NULL, }; @@ -2049,3 +2053,15 @@ bridge_send_icmp_err(struct ifnet *ifp, dropit: m_freem(n); } + +void +bridge_take(void *unused) +{ + return; +} + +void +bridge_rele(void *unused) +{ + return; +} diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index c9493b7f857..4b975e2d4a1 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -426,14 +426,16 @@ ether_input(struct ifnet *ifp, struct mbuf *m) smr_read_enter(); eb = SMR_PTR_GET(&ac->ac_brport); + if (eb != NULL) + eb->eb_port_take(eb->eb_port); + smr_read_leave(); if (eb != NULL) { m = (*eb->eb_input)(ifp, m, dst, eb->eb_port); + eb->eb_port_rele(eb->eb_port); if (m == NULL) { - smr_read_leave(); return; } } - smr_read_leave(); /* * Fourth phase: drop service delimited packets. diff --git a/sys/net/if_switch.c b/sys/net/if_switch.c index 22aecdc6b75..ca24597d1f7 100644 --- a/sys/net/if_switch.c +++ b/sys/net/if_switch.c @@ -110,6 +110,8 @@ struct mbuf void switch_flow_classifier_dump(struct switch_softc *, struct switch_flow_classify *); void switchattach(int); +void switch_take(void *); +void switch_rele(void *); struct if_clone switch_cloner = IF_CLONE_INITIALIZER("switch", switch_clone_create, switch_clone_destroy); @@ -122,6 +124,8 @@ struct pool swfcl_pool; const struct ether_brport switch_brport = { switch_input, + switch_take, + switch_rele, NULL, }; @@ -1571,3 +1575,15 @@ ofp_split_mbuf(struct mbuf *m, struct mbuf **mtail) return (0); } + +void +switch_take(void *unused) +{ + return; +} + +void +switch_rele(void *unused) +{ + return; +} diff --git a/sys/net/if_tpmr.c b/sys/net/if_tpmr.c index 981a7d5a09b..f14a8553485 100644 --- a/sys/net/if_tpmr.c +++ b/sys/net/if_tpmr.c @@ -83,6 +83,8 @@ struct tpmr_port { struct tpmr_softc *p_tpmr; unsigned int p_slot; + unsigned int p_refcnt; + struct ether_brport p_brport; }; @@ -125,6 +127,8 @@ static int tpmr_add_port(struct tpmr_softc *, static int tpmr_del_port(struct tpmr_softc *, const struct ifbreq *); static int tpmr_port_list(struct tpmr_softc *, struct ifbifconf *); +static void tpmr_p_take(void *); +static void tpmr_p_rele(void *); static struct if_clone tpmr_cloner = IF_CLONE_INITIALIZER("tpmr", tpmr_clone_create, tpmr_clone_destroy); @@ -497,6 +501,8 @@ tpmr_add_port(struct tpmr_softc *sc, const struct ifbreq *req) if_detachhook_add(ifp0, &p->p_dtask); p->p_brport.eb_input = tpmr_input; + p->p_brport.eb_port_take = tpmr_p_take; + p->p_brport.eb_port_rele = tpmr_p_rele; p->p_brport.eb_port = p; /* commit */ @@ -512,6 +518,7 @@ tpmr_add_port(struct tpmr_softc *sc, const struct ifbreq *req) p->p_slot = i; + tpmr_p_take(p); ether_brport_set(ifp0, &p->p_brport); ifp0->if_ioctl = tpmr_p_ioctl; ifp0->if_output = tpmr_p_output; @@ -665,6 +672,26 @@ tpmr_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst, return ((*p_output)(ifp0, m, dst, rt)); } +static void +tpmr_p_take(void *p) +{ + struct tpmr_port *port = p; + + atomic_inc_int((int *)(&port->p_refcnt)); +} + +static void +tpmr_p_rele(void *p) +{ + struct tpmr_port *port = p; + struct ifnet *ifp0 = port->p_ifp0; + + if (atomic_dec_int_nv((int *)(&port->p_refcnt)) == 0) { + if_put(ifp0); + free(port, M_DEVBUF, sizeof(*port)); + } +} + static void tpmr_p_dtor(struct tpmr_softc *sc, struct tpmr_port *p, const char *op) { @@ -692,8 +719,7 @@ tpmr_p_dtor(struct tpmr_softc *sc, struct tpmr_port *p, const char *op) smr_barrier(); - if_put(ifp0); - free(p, M_DEVBUF, sizeof(*p)); + tpmr_p_rele(p); if (ifp->if_link_state != LINK_STATE_DOWN) { ifp->if_link_state = LINK_STATE_DOWN; diff --git a/sys/net/if_veb.c b/sys/net/if_veb.c index 6b141bfb5cf..843ded1a48a 100644 --- a/sys/net/if_veb.c +++ b/sys/net/if_veb.c @@ -209,6 +209,9 @@ static void veb_eb_port_rele(void *, void *); static size_t veb_eb_port_ifname(void *, char *, size_t, void *); static void veb_eb_port_sa(void *, struct sockaddr_storage *, void *); +static void veb_eb_brport_take(void *); +static void veb_eb_brport_rele(void *); + static const struct etherbridge_ops veb_etherbridge_ops = { veb_eb_port_cmp, veb_eb_port_take, @@ -1256,6 +1259,9 @@ veb_add_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int span) p->p_brport.eb_input = veb_port_input; } + p->p_brport.eb_port_take = veb_eb_brport_take; + p->p_brport.eb_port_rele = veb_eb_brport_rele; + /* this might have changed if we slept for malloc or ifpromisc */ error = ether_brport_isset(ifp0); if (error != 0) @@ -1273,6 +1279,7 @@ veb_add_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int span) SMR_TAILQ_INSERT_TAIL_LOCKED(&port_list->l_list, p, p_entry); port_list->l_count++; + veb_eb_port_take(NULL, p); ether_brport_set(ifp0, &p->p_brport); if (ifp0->if_enqueue != vport_enqueue) { /* vport is special */ ifp0->if_ioctl = veb_p_ioctl; @@ -1880,8 +1887,7 @@ veb_p_dtor(struct veb_softc *sc, struct veb_port *p, const char *op) veb_rule_list_free(TAILQ_FIRST(&p->p_vrl)); - if_put(ifp0); - free(p, M_DEVBUF, sizeof(*p)); + veb_eb_port_rele(NULL, p); } static void @@ -1977,8 +1983,25 @@ static void veb_eb_port_rele(void *arg, void *port) { struct veb_port *p = port; + struct ifnet *ifp0 = p->p_ifp0; - refcnt_rele_wake(&p->p_refs); + if (refcnt_rele(&p->p_refs)) { + if_put(ifp0); + wakeup_one(&p->p_refs); + free(p, M_DEVBUF, sizeof(*p)); + } +} + +static void +veb_eb_brport_take(void *port) +{ + veb_eb_port_take(NULL, port); +} + +static void +veb_eb_brport_rele(void *port) +{ + veb_eb_port_rele(NULL, port); } static size_t @@ -2146,6 +2169,9 @@ vport_enqueue(struct ifnet *ifp, struct mbuf *m) smr_read_enter(); eb = SMR_PTR_GET(&ac->ac_brport); + if (eb != NULL) + eb->eb_port_take(eb->eb_port); + smr_read_leave(); if (eb != NULL) { struct ether_header *eh; uint64_t dst; @@ -2164,8 +2190,9 @@ vport_enqueue(struct ifnet *ifp, struct mbuf *m) m = (*eb->eb_input)(ifp, m, dst, eb->eb_port); error = 0; + + eb->eb_port_rele(eb->eb_port); } - smr_read_leave(); m_freem(m); diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h index 56e6384f39b..611e0fa704b 100644 --- a/sys/netinet/if_ether.h +++ b/sys/netinet/if_ether.h @@ -220,6 +220,8 @@ do { \ struct ether_brport { struct mbuf *(*eb_input)(struct ifnet *, struct mbuf *, uint64_t, void *); + void (*eb_port_take)(void *); + void (*eb_port_rele)(void *); void *eb_port; };