On Mon, Jun 09, 2025 at 04:42:55AM +0000, Paul Vixie wrote: > ok so i've learned a lot by trying to get fibnum2 upstreamed. needed ipv6, > needed to be more minimalistic about unrelated changes, needed to keep in > synch with the freebsd-current tree -- the works. i also scrambled my git > tree > somehow in a way that i found unrecoverable, thus i ended up closing that PR > after fixing everything i learned about other than man page changes.
I've made a number of inline comments. > before i try again i'd like to go back to where it all started -- this e-mail > list. visual review would be great, testing would be even better. for those > who didn't keep state from last time, the intent is to let every interface > have its own default route, and ensure inbound/outbound path symmetry. i use > different interfaces for serving http-et-al than for doing ssh for operations/ > management, and i've never been comfortable with a single default route. > > diff is attached. "patch -p1" is your friend. > > -- > Paul Vixie > diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c > index 6c9eb7139cd1..518ee7ee3e75 100644 > --- a/sys/kern/uipc_socket.c > +++ b/sys/kern/uipc_socket.c > @@ -1003,13 +1003,13 @@ SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, > sooverinterval, CTLFLAG_RW, > * accept(2), the protocol has two options: > * 1) Call legacy sonewconn() function, which would call protocol attach > * method, same as used for socket(2). > - * 2) Call solisten_clone(), do attach that is specific to a cloned > connection, > - * and then call solisten_enqueue(). > + * 2) Call solisten_clone(), do an attach that is specific to a cloned > + * connection, and then call solisten_enqueue(). > * > * Note: the ref count on the socket is 0 on return. > */ > struct socket * > -solisten_clone(struct socket *head) > +solisten_clone(struct socket *head, int fibnum) > { > struct sbuf descrsb; > struct socket *so; > @@ -1180,7 +1180,8 @@ solisten_clone(struct socket *head) > SO_DONTROUTE | SO_LINGER | SO_OOBINLINE | SO_NOSIGPIPE); > so->so_linger = head->so_linger; > so->so_state = head->so_state; > - so->so_fibnum = head->so_fibnum; > + if ((so->so_fibnum = head->so_fibnum) == 0) > + so->so_fibnum = fibnum; The double assignment looks odd to me. It'd be neater to write: so->so_fibnum == head->so_fibnum != 0 ? head->so_fibnum : fibnum; > so->so_proto = head->so_proto; > so->so_cred = crhold(head->so_cred); > #ifdef SOCKET_HHOOK > @@ -1226,7 +1227,7 @@ sonewconn(struct socket *head, int connstatus) > { > struct socket *so; > > - if ((so = solisten_clone(head)) == NULL) > + if ((so = solisten_clone(head, head->so_fibnum)) == NULL) > return (NULL); > > if (so->so_proto->pr_attach(so, 0, NULL) != 0) { > diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c > index 95d857b2625d..29b6d2abb090 100644 > --- a/sys/kern/uipc_usrreq.c > +++ b/sys/kern/uipc_usrreq.c > @@ -2916,7 +2916,7 @@ unp_connectat(int fd, struct socket *so, struct > sockaddr *nam, > } > if (connreq) { > if (SOLISTENING(so2)) > - so2 = solisten_clone(so2); > + so2 = solisten_clone(so2, 0); > else > so2 = NULL; > if (so2 == NULL) { > diff --git a/sys/net/if.c b/sys/net/if.c > index 0255c27a3136..0eb8d028cba7 100644 > --- a/sys/net/if.c > +++ b/sys/net/if.c > @@ -1805,11 +1805,11 @@ sa_dl_equal(const struct sockaddr *a, const struct > sockaddr *b) > } > > /* > - * Locate an interface based on a complete address. > + * Locate an interface based on a complete address, > + * and optionally retrieve its FIB number. > */ > -/*ARGSUSED*/ > struct ifaddr * > -ifa_ifwithaddr(const struct sockaddr *addr) > +ifa_ifwithaddr_getfib(const struct sockaddr *addr, uint16_t *fibnum) > { > struct ifnet *ifp; > struct ifaddr *ifa; > @@ -1834,17 +1834,23 @@ ifa_ifwithaddr(const struct sockaddr *addr) > } > ifa = NULL; > done: > + if (fibnum != NULL && ifp != NULL) > + *fibnum = ifp->if_fib; Rather than complicating this function to add an additional return value, why not let the caller get the fib number directly? They can just get ifa->ifa_ifp->if_fib. > return (ifa); > } > > +/* > + * Test for existence of an interface having this complete address, > + * optionally retrieving its FIB number. > + */ > int > -ifa_ifwithaddr_check(const struct sockaddr *addr) > +ifa_ifwithaddr_check_getfib(const struct sockaddr *addr, uint16_t *fibnum) > { > struct epoch_tracker et; > int rc; > > NET_EPOCH_ENTER(et); > - rc = (ifa_ifwithaddr(addr) != NULL); > + rc = (ifa_ifwithaddr_getfib(addr, fibnum) != NULL); > NET_EPOCH_EXIT(et); > return (rc); > } Similarly, I think adding this extra helper is unnecessary. Callers which want the FIB number can use ifa_ifwithaddr() and extract it directly. That's how in6_pcbbind_avail() handles fetching ifaddr flags, for instance. > diff --git a/sys/net/if_var.h b/sys/net/if_var.h > index 83d33330987e..e825be3180a3 100644 > --- a/sys/net/if_var.h > +++ b/sys/net/if_var.h > @@ -545,8 +545,9 @@ int ifa_add_loopback_route(struct ifaddr *, struct > sockaddr *); > int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); > int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); > > -struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); > -int ifa_ifwithaddr_check(const struct sockaddr *); > +struct ifaddr *ifa_ifwithaddr_getfib(const struct sockaddr *, uint16_t > *); > +int ifa_ifwithaddr_check_getfib(const struct sockaddr *, > + uint16_t *); > struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); > struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); > struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); > @@ -555,6 +556,18 @@ struct ifaddr *ifa_ifwithroute(int, const struct > sockaddr *, > struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, if_t); > int ifa_preferred(struct ifaddr *, struct ifaddr *); > > +static inline struct ifaddr * > +ifa_ifwithaddr(const struct sockaddr *sa) > +{ > + return (ifa_ifwithaddr_getfib(sa, NULL)); > +} > + > +static inline int > +ifa_ifwithaddr_check(const struct sockaddr *sa) > +{ > + return (ifa_ifwithaddr_check_getfib(sa, NULL)); > +} > + > int if_simloop(if_t ifp, struct mbuf *m, int af, int hlen); > > typedef void *if_com_alloc_t(u_char type, if_t ifp); > diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c > index bccd4b84561a..e469c3e1275a 100644 > --- a/sys/netinet/in_pcb.c > +++ b/sys/netinet/in_pcb.c > @@ -646,7 +646,8 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) > inp->inp_pcbinfo = pcbinfo; > inp->inp_socket = so; > inp->inp_cred = crhold(so->so_cred); > - inp->inp_inc.inc_fibnum = so->so_fibnum; > + /* The FIB number is in both inp_inc and inp_socket; synchronize. */ > + inp->inp_inc.inc_fibnum = inp->inp_socket->so_fibnum; so == inp->inp_socket, so this part of the change is redundant. > #ifdef MAC > error = mac_inpcb_init(inp, M_NOWAIT); > if (error != 0) > @@ -729,7 +730,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, > int flags, > return (EINVAL); > anonport = sin == NULL || sin->sin_port == 0; > error = in_pcbbind_setup(inp, sin, &inp->inp_laddr.s_addr, > - &inp->inp_lport, flags, cred); > + &inp->inp_lport, flags, > + &inp->inp_inc.inc_fibnum, cred); > if (error) > return (error); > if (__predict_false((error = in_pcbinshash(inp)) != 0)) { > @@ -741,6 +743,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, > int flags, > } > if (anonport) > inp->inp_flags |= INP_ANONPORT; > + /* The FIB number is in both inp_inc and inp_socket; synchronize. */ > + sosetfib(inp->inp_socket, inp->inp_inc.inc_fibnum); Why is there no such call in the IPv6 case? > return (0); > } > #endif > @@ -910,8 +914,8 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, > u_short *lportp, > */ > static int > in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr, > - const u_short lport, const int fib, int sooptions, int lookupflags, > - struct ucred *cred) > + const u_short lport, const int fib, int sooptions, > + int lookupflags, uint16_t *fibnum, struct ucred *cred) > { > int reuseport, reuseport_lb; > > @@ -951,7 +955,8 @@ in_pcbbind_avail(struct inpcb *inp, const struct in_addr > laddr, > * to any endpoint address, local or not. > */ > if ((inp->inp_flags & INP_BINDANY) == 0 && > - ifa_ifwithaddr_check((const struct sockaddr *)&sin) == 0) > + ifa_ifwithaddr_check_getfib((const struct sockaddr *)&sin, > + fibnum) == 0) This excludes the IN_MULTICAST() case--is that intentional? > return (EADDRNOTAVAIL); > } > > @@ -1005,11 +1010,12 @@ in_pcbbind_avail(struct inpcb *inp, const struct > in_addr laddr, > * calling in_pcbinshash(), or they can just use the resulting > * port and address to authorise the sending of a once-off packet. > * > - * On error, the values of *laddrp and *lportp are not changed. > + * On error, the values of *laddrp, *lportp and *fibnum are not changed. > */ > int > in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t > *laddrp, > - u_short *lportp, int flags, struct ucred *cred) > + u_short *lportp, int flags, uint16_t *fibnum, > + struct ucred *cred) > { > struct socket *so = inp->inp_socket; > struct in_addr laddr; > @@ -1055,7 +1061,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in > *sin, in_addr_t *laddrp, > > /* See if this address/port combo is available. */ > error = in_pcbbind_avail(inp, laddr, lport, fib, sooptions, > - lookupflags, cred); > + lookupflags, fibnum, cred); Indentation on continuing lines should be by four spaces. > if (error != 0) > return (error); > } > diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h > index 5fe12c4f1e76..2bb36b41a60b 100644 > --- a/sys/netinet/in_pcb.h > +++ b/sys/netinet/in_pcb.h > @@ -638,7 +638,7 @@ int in_pcballoc(struct socket *, struct inpcbinfo > *); > #define INPBIND_FIB 0x0001 /* bind to the PCB's FIB only */ > int in_pcbbind(struct inpcb *, struct sockaddr_in *, int, struct ucred *); > int in_pcbbind_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *, > - u_short *, int, struct ucred *); > + u_short *, int, uint16_t *, struct ucred *); > int in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *); > void in_pcbdisconnect(struct inpcb *); > void in_pcbdrop(struct inpcb *); > diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c > index c00a102e8520..4c3d26c58dc4 100644 > --- a/sys/netinet/tcp_input.c > +++ b/sys/netinet/tcp_input.c > @@ -1047,7 +1047,8 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int > proto, uint16_t port) > } > inc.inc_fport = th->th_sport; > inc.inc_lport = th->th_dport; > - inc.inc_fibnum = so->so_fibnum; > + if ((inc.inc_fibnum = so->so_fibnum) == 0) > + inc.inc_fibnum = m->m_pkthdr.fibnum; Same comment about double assignment. > > /* > * Check for an existing connection attempt in syncache if > diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c > index 606d808676e1..f0b64edcdacc 100644 > --- a/sys/netinet/tcp_syncache.c > +++ b/sys/netinet/tcp_syncache.c > @@ -794,7 +794,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, > struct mbuf *m) > * as they would have been set up if we had created the > * connection when the SYN arrived. > */ > - if ((so = solisten_clone(lso)) == NULL) > + if ((so = solisten_clone(lso, sc->sc_inc.inc_fibnum)) == NULL) > goto allocfail; > #ifdef MAC > mac_socketpeer_set_from_mbuf(m, so); > diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c > index dafbaf6dc672..8ffc1015a125 100644 > --- a/sys/netinet/udp_usrreq.c > +++ b/sys/netinet/udp_usrreq.c > @@ -1261,7 +1261,7 @@ udp_send(struct socket *so, int flags, struct mbuf *m, > struct sockaddr *addr, > } > INP_HASH_WLOCK(pcbinfo); > error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport, > - V_udp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred); > + V_udp_bind_all_fibs ? 0 : INPBIND_FIB, NULL, td->td_ucred); > INP_HASH_WUNLOCK(pcbinfo); > if ((flags & PRUS_IPV6) != 0) > inp->inp_vflag = vflagsav; > diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c > index dfda0c60c0ba..90862c79b13a 100644 > --- a/sys/netinet6/in6_pcb.c > +++ b/sys/netinet6/in6_pcb.c > @@ -164,8 +164,9 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, > struct ucred *cred) > * Determine whether the inpcb can be bound to the specified address/port > tuple. > */ > static int > -in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6, int > fib, > - int sooptions, int lookupflags, struct ucred *cred) > +in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6, > + int fib, int sooptions, int lookupflags, > + uint16_t *fibnum, struct ucred *cred) Indentation of continuation lines should be four spaces. > { > const struct in6_addr *laddr; > int reuseport, reuseport_lb; > @@ -207,8 +208,9 @@ in6_pcbbind_avail(struct inpcb *inp, const struct > sockaddr_in6 *sin6, int fib, > sin6.sin6_addr = *laddr; > > NET_EPOCH_ENTER(et); > - if ((ifa = ifa_ifwithaddr((const struct sockaddr *)&sin6)) == > - NULL && (inp->inp_flags & INP_BINDANY) == 0) { > + ifa = ifa_ifwithaddr_getfib((const struct sockaddr *)&sin6, > + fibnum); > + if ((inp->inp_flags & INP_BINDANY) == 0 && ifa == NULL) { > NET_EPOCH_EXIT(et); > return (EADDRNOTAVAIL); > } > @@ -337,8 +339,9 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr_in6 *sin6, > int flags, > RT_ALL_FIBS; > > /* See if this address/port combo is available. */ > - error = in6_pcbbind_avail(inp, sin6, fib, sooptions, > lookupflags, > - cred); > + error = in6_pcbbind_avail(inp, sin6, fib, sooptions, > + lookupflags, > + &inp->inp_inc.inc_fibnum, cred); > if (error != 0) > return (error); > > diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h > index 6512a2d69fd5..c0243d799c5a 100644 > --- a/sys/sys/socketvar.h > +++ b/sys/sys/socketvar.h > @@ -521,7 +521,7 @@ int solisten_proto_check(struct socket *so); > bool solisten_enqueue(struct socket *, int); > int solisten_dequeue(struct socket *, struct socket **, int); > struct socket * > - solisten_clone(struct socket *); > + solisten_clone(struct socket *, int fibnum); > struct socket * > sonewconn(struct socket *head, int connstatus); > struct socket *