On 1/27/22, Gleb Smirnoff <[email protected]> wrote:
> The branch main has been updated by glebius:
>
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=e1882428dcbbafd2814d7e17b977a8f686784b39
>
> commit e1882428dcbbafd2814d7e17b977a8f686784b39
> Author:     Gleb Smirnoff <[email protected]>
> AuthorDate: 2022-01-27 05:58:50 +0000
> Commit:     Gleb Smirnoff <[email protected]>
> CommitDate: 2022-01-27 05:58:50 +0000
>
>     ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif
>
>     Supplement ifindex table with generation count and use it to
>     serialize & restore an ifnet pointer.
>

This commit or something from the series reliably causes my box to
crash on boot:

Fatal trap 12: page fault while in kernel mode
cpuid = 7; apic id = 07
fault virtual address   = 0x54
fault code              = supervisor read data, page not present
instruction pointer     = 0x20:0xffffffff80616d98
stack pointer           = 0x28:0xfffffe06462889e0
frame pointer           = 0x28:0xfffffe06462889e0
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 1855 (route)
trap number             = 12
panic: page fault
cpuid = 7
time = 1643302734
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe06462887a0
vpanic() at vpanic+0x17f/frame 0xfffffe06462887f0
panic() at panic+0x43/frame 0xfffffe0646288850
trap_fatal() at trap_fatal+0x385/frame 0xfffffe06462888b0
trap_pfault() at trap_pfault+0x4f/frame 0xfffffe0646288910
calltrap() at calltrap+0x8/frame 0xfffffe0646288910
--- trap 0xc, rip = 0xffffffff80616d98, rsp = 0xfffffe06462889e0, rbp
= 0xfffffe06462889e0 ---
m_rcvif_serialize() at m_rcvif_serialize+0x8/frame 0xfffffe06462889e0
netisr_queue_src() at netisr_queue_src+0x15c/frame 0xfffffe0646288a30
route_output() at route_output+0x890/frame 0xfffffe0646288c70
sosend_generic() at sosend_generic+0x456/frame 0xfffffe0646288d10
soo_write() at soo_write+0x43/frame 0xfffffe0646288d40
dofilewrite() at dofilewrite+0x81/frame 0xfffffe0646288d90
sys_write() at sys_write+0xbc/frame 0xfffffe0646288e00
amd64_syscall() at amd64_syscall+0x107/frame 0xfffffe0646288f30
fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe0646288f30
--- syscall (4, FreeBSD ELF64, sys_write), rip = 0x3b000ba6703a, rsp =
0x3b0009378e48, rbp = 0x3b00093796f0 ---
KDB: enter: panic
[ thread pid 1855 tid 102265 ]
Stopped at      kdb_enter+0x37: movq    $0,0x9cc17e(%rip)


>     Reviewed by:            kp
>     Differential revision:  https://reviews.freebsd.org/D33266
>     Fun note:               git show e6abef09187a
> ---
>  sys/kern/kern_mbuf.c | 22 ++++++++++++++++++++++
>  sys/net/if.c         | 49
> ++++++++++++++++++++++++++++++++++++-------------
>  sys/net/if_var.h     |  9 ++++++++-
>  sys/sys/mbuf.h       |  6 ++++++
>  4 files changed, 72 insertions(+), 14 deletions(-)
>
> diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
> index f1e76ef00c65..5c69f663c0e2 100644
> --- a/sys/kern/kern_mbuf.c
> +++ b/sys/kern/kern_mbuf.c
> @@ -1635,6 +1635,28 @@ m_snd_tag_destroy(struct m_snd_tag *mst)
>       counter_u64_add(snd_tag_count, -1);
>  }
>
> +void
> +m_rcvif_serialize(struct mbuf *m)
> +{
> +     u_short idx, gen;
> +
> +     M_ASSERTPKTHDR(m);
> +     idx = m->m_pkthdr.rcvif->if_index;
> +     gen = m->m_pkthdr.rcvif->if_idxgen;
> +     m->m_pkthdr.rcvidx = idx;
> +     m->m_pkthdr.rcvgen = gen;
> +}
> +
> +struct ifnet *
> +m_rcvif_restore(struct mbuf *m)
> +{
> +
> +     M_ASSERTPKTHDR(m);
> +
> +     return ((m->m_pkthdr.rcvif = ifnet_byindexgen(m->m_pkthdr.rcvidx,
> +         m->m_pkthdr.rcvgen)));
> +}
> +
>  /*
>   * Allocate an mbuf with anonymous external pages.
>   */
> diff --git a/sys/net/if.c b/sys/net/if.c
> index f148ae8c9c6d..e8d65e64518a 100644
> --- a/sys/net/if.c
> +++ b/sys/net/if.c
> @@ -313,7 +313,10 @@ VNET_DEFINE(struct ifgrouphead, ifg_head);
>  /* Table of ifnet by index. */
>  static int if_index;
>  static int if_indexlim = 8;
> -static struct ifnet **ifindex_table;
> +static struct ifindex_entry {
> +     struct ifnet    *ife_ifnet;
> +     uint16_t        ife_gencnt;
> +} *ifindex_table;
>
>  SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system,
>      CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
> @@ -325,8 +328,8 @@ sysctl_ifcount(SYSCTL_HANDLER_ARGS)
>
>       IFNET_RLOCK();
>       for (int i = 1; i <= if_index; i++)
> -             if (ifindex_table[i] != NULL &&
> -                 ifindex_table[i]->if_vnet == curvnet)
> +             if (ifindex_table[i].ife_ifnet != NULL &&
> +                 ifindex_table[i].ife_ifnet->if_vnet == curvnet)
>                       rv = i;
>       IFNET_RUNLOCK();
>
> @@ -370,7 +373,7 @@ ifnet_byindex(u_int idx)
>       if (__predict_false(idx > if_index))
>               return (NULL);
>
> -     ifp = ck_pr_load_ptr(&ifindex_table[idx]);
> +     ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet);
>
>       if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet)
>               ifp = NULL;
> @@ -391,6 +394,24 @@ ifnet_byindex_ref(u_int idx)
>       return (ifp);
>  }
>
> +struct ifnet *
> +ifnet_byindexgen(uint16_t idx, uint16_t gen)
> +{
> +     struct ifnet *ifp;
> +
> +     NET_EPOCH_ASSERT();
> +
> +     if (__predict_false(idx > if_index))
> +             return (NULL);
> +
> +     ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet);
> +
> +     if (ifindex_table[idx].ife_gencnt == gen)
> +             return (ifp);
> +     else
> +             return (NULL);
> +}
> +
>  struct ifaddr *
>  ifaddr_byindex(u_short idx)
>  {
> @@ -571,13 +592,13 @@ if_alloc_domain(u_char type, int numa_domain)
>        * next slot.
>        */
>       for (idx = 1; idx <= if_index; idx++) {
> -             if (ifindex_table[idx] == NULL)
> +             if (ifindex_table[idx].ife_ifnet == NULL)
>                       break;
>       }
>
>       /* Catch if_index overflow. */
>       if (idx >= if_indexlim) {
> -             struct ifnet **new, **old;
> +             struct ifindex_entry *new, *old;
>               int newlim;
>
>               newlim = if_indexlim * 2;
> @@ -593,7 +614,8 @@ if_alloc_domain(u_char type, int numa_domain)
>               if_index = idx;
>
>       ifp->if_index = idx;
> -     ck_pr_store_ptr(&ifindex_table[idx], ifp);
> +     ifp->if_idxgen = ifindex_table[idx].ife_gencnt;
> +     ck_pr_store_ptr(&ifindex_table[idx].ife_ifnet, ifp);
>       IFNET_WUNLOCK();
>
>       return (ifp);
> @@ -668,9 +690,10 @@ if_free(struct ifnet *ifp)
>        * virtualized and interface would outlive the vnet.
>        */
>       IFNET_WLOCK();
> -     MPASS(ifindex_table[ifp->if_index] == ifp);
> -     ck_pr_store_ptr(&ifindex_table[ifp->if_index], NULL);
> -     while (if_index > 0 && ifindex_table[if_index] == NULL)
> +     MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
> +     ck_pr_store_ptr(&ifindex_table[ifp->if_index].ife_ifnet, NULL);
> +     ifindex_table[ifp->if_index].ife_gencnt++;
> +     while (if_index > 0 && ifindex_table[if_index].ife_ifnet == NULL)
>               if_index--;
>       IFNET_WUNLOCK();
>
> @@ -819,7 +842,7 @@ if_attach_internal(struct ifnet *ifp, bool vmove)
>       struct sockaddr_dl *sdl;
>       struct ifaddr *ifa;
>
> -     MPASS(ifindex_table[ifp->if_index] == ifp);
> +     MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
>
>  #ifdef VIMAGE
>       ifp->if_vnet = curvnet;
> @@ -4508,8 +4531,8 @@ if_show_ifnet(struct ifnet *ifp)
>       IF_DB_PRINTF("%d", if_dunit);
>       IF_DB_PRINTF("%s", if_description);
>       IF_DB_PRINTF("%u", if_index);
> +     IF_DB_PRINTF("%d", if_idxgen);
>       IF_DB_PRINTF("%u", if_refcount);
> -     IF_DB_PRINTF("%d", if_index_reserved);
>       IF_DB_PRINTF("%p", if_softc);
>       IF_DB_PRINTF("%p", if_l2com);
>       IF_DB_PRINTF("%p", if_llsoftc);
> @@ -4564,7 +4587,7 @@ DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets)
>       u_short idx;
>
>       for (idx = 1; idx <= if_index; idx++) {
> -             ifp = ifindex_table[idx];
> +             ifp = ifindex_table[idx].ife_ifnet;
>               if (ifp == NULL)
>                       continue;
>               db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
> diff --git a/sys/net/if_var.h b/sys/net/if_var.h
> index dedc73718125..21b3687f62c1 100644
> --- a/sys/net/if_var.h
> +++ b/sys/net/if_var.h
> @@ -334,7 +334,7 @@ struct ifnet {
>       const char *if_dname;           /* driver name */
>       int     if_dunit;               /* unit or IF_DUNIT_NONE */
>       u_short if_index;               /* numeric abbreviation for this if  */
> -     short   if_index_reserved;      /* spare space to grow if_index */
> +     u_short if_idxgen;              /* ... and its generation count */
>       char    if_xname[IFNAMSIZ];     /* external name (name + unit) */
>       char    *if_description;        /* interface description */
>
> @@ -644,6 +644,13 @@ extern   struct sx ifnet_sxlock;
>  struct ifnet *ifnet_byindex(u_int);
>  struct ifnet *ifnet_byindex_ref(u_int);
>
> +/*
> + * ifnet_byindexgen() looks up ifnet by index and generation count,
> + * attempting to restore a weak pointer that had been stored across
> + * the epoch.
> + */
> +struct ifnet   *ifnet_byindexgen(uint16_t idx, uint16_t gen);
> +
>  /*
>   * Given the index, ifaddr_byindex() returns the one and only
>   * link-level ifaddr for the interface. You are not supposed to use
> diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
> index 77364f428b12..ebe8ef205055 100644
> --- a/sys/sys/mbuf.h
> +++ b/sys/sys/mbuf.h
> @@ -159,6 +159,10 @@ struct pkthdr {
>       union {
>               struct m_snd_tag *snd_tag;      /* send tag, if any */
>               struct ifnet    *rcvif;         /* rcv interface */
> +             struct {
> +                     uint16_t rcvidx;        /* rcv interface index ... */
> +                     uint16_t rcvgen;        /* ... and generation count */
> +             };
>       };
>       SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
>       int32_t          len;           /* total packet length */
> @@ -862,6 +866,8 @@ int                m_snd_tag_alloc(struct ifnet *,
>  void          m_snd_tag_init(struct m_snd_tag *, struct ifnet *,
>                   const struct if_snd_tag_sw *);
>  void          m_snd_tag_destroy(struct m_snd_tag *);
> +void          m_rcvif_serialize(struct mbuf *);
> +struct ifnet *m_rcvif_restore(struct mbuf *);
>
>  static __inline int
>  m_gettype(int size)
>


-- 
Mateusz Guzik <mjguzik gmail.com>

Reply via email to