On Mon, Jun 09, 2025 at 04:42:55AM +0000, Paul Vixie wrote:
> ok so i've learned a lot by trying to get fibnum2 upstreamed. needed ipv6, 
> needed to be more minimalistic about unrelated changes, needed to keep in 
> synch with the freebsd-current tree -- the works. i also scrambled my git 
> tree 
> somehow in a way that i found unrecoverable, thus i ended up closing that PR 
> after fixing everything i learned about other than man page changes.

I've made a number of inline comments.

> before i try again i'd like to go back to where it all started -- this e-mail 
> list. visual review would be great, testing would be even better. for those 
> who didn't keep state from last time, the intent is to let every interface 
> have its own default route, and ensure inbound/outbound path symmetry. i use 
> different interfaces for serving http-et-al than for doing ssh for operations/
> management, and i've never been comfortable with a single default route.
> 
> diff is attached. "patch -p1" is your friend.
> 
> -- 
> Paul Vixie

> diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
> index 6c9eb7139cd1..518ee7ee3e75 100644
> --- a/sys/kern/uipc_socket.c
> +++ b/sys/kern/uipc_socket.c
> @@ -1003,13 +1003,13 @@ SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, 
> sooverinterval, CTLFLAG_RW,
>   * accept(2), the protocol has two options:
>   * 1) Call legacy sonewconn() function, which would call protocol attach
>   *    method, same as used for socket(2).
> - * 2) Call solisten_clone(), do attach that is specific to a cloned 
> connection,
> - *    and then call solisten_enqueue().
> + * 2) Call solisten_clone(), do an attach that is specific to a cloned
> + *    connection, and then call solisten_enqueue().
>   *
>   * Note: the ref count on the socket is 0 on return.
>   */
>  struct socket *
> -solisten_clone(struct socket *head)
> +solisten_clone(struct socket *head, int fibnum)
>  {
>       struct sbuf descrsb;
>       struct socket *so;
> @@ -1180,7 +1180,8 @@ solisten_clone(struct socket *head)
>           SO_DONTROUTE | SO_LINGER | SO_OOBINLINE | SO_NOSIGPIPE);
>       so->so_linger = head->so_linger;
>       so->so_state = head->so_state;
> -     so->so_fibnum = head->so_fibnum;
> +     if ((so->so_fibnum = head->so_fibnum) == 0)
> +             so->so_fibnum = fibnum;

The double assignment looks odd to me.  It'd be neater to write:

    so->so_fibnum == head->so_fibnum != 0 ? head->so_fibnum : fibnum;

>       so->so_proto = head->so_proto;
>       so->so_cred = crhold(head->so_cred);
>  #ifdef SOCKET_HHOOK
> @@ -1226,7 +1227,7 @@ sonewconn(struct socket *head, int connstatus)
>  {
>       struct socket *so;
>  
> -     if ((so = solisten_clone(head)) == NULL)
> +     if ((so = solisten_clone(head, head->so_fibnum)) == NULL)
>               return (NULL);
>  
>       if (so->so_proto->pr_attach(so, 0, NULL) != 0) {
> diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
> index 95d857b2625d..29b6d2abb090 100644
> --- a/sys/kern/uipc_usrreq.c
> +++ b/sys/kern/uipc_usrreq.c
> @@ -2916,7 +2916,7 @@ unp_connectat(int fd, struct socket *so, struct 
> sockaddr *nam,
>       }
>       if (connreq) {
>               if (SOLISTENING(so2))
> -                     so2 = solisten_clone(so2);
> +                     so2 = solisten_clone(so2, 0);
>               else
>                       so2 = NULL;
>               if (so2 == NULL) {
> diff --git a/sys/net/if.c b/sys/net/if.c
> index 0255c27a3136..0eb8d028cba7 100644
> --- a/sys/net/if.c
> +++ b/sys/net/if.c
> @@ -1805,11 +1805,11 @@ sa_dl_equal(const struct sockaddr *a, const struct 
> sockaddr *b)
>  }
>  
>  /*
> - * Locate an interface based on a complete address.
> + * Locate an interface based on a complete address,
> + * and optionally retrieve its FIB number.
>   */
> -/*ARGSUSED*/
>  struct ifaddr *
> -ifa_ifwithaddr(const struct sockaddr *addr)
> +ifa_ifwithaddr_getfib(const struct sockaddr *addr, uint16_t *fibnum)
>  {
>       struct ifnet *ifp;
>       struct ifaddr *ifa;
> @@ -1834,17 +1834,23 @@ ifa_ifwithaddr(const struct sockaddr *addr)
>       }
>       ifa = NULL;
>  done:
> +     if (fibnum != NULL && ifp != NULL)
> +             *fibnum = ifp->if_fib;

Rather than complicating this function to add an additional return
value, why not let the caller get the fib number directly?  They can
just get ifa->ifa_ifp->if_fib.

>       return (ifa);
>  }
>  
> +/*
> + * Test for existence of an interface having this complete address,
> + * optionally retrieving its FIB number.
> + */
>  int
> -ifa_ifwithaddr_check(const struct sockaddr *addr)
> +ifa_ifwithaddr_check_getfib(const struct sockaddr *addr, uint16_t *fibnum)
>  {
>       struct epoch_tracker et;
>       int rc;
>  
>       NET_EPOCH_ENTER(et);
> -     rc = (ifa_ifwithaddr(addr) != NULL);
> +     rc = (ifa_ifwithaddr_getfib(addr, fibnum) != NULL);
>       NET_EPOCH_EXIT(et);
>       return (rc);
>  }

Similarly, I think adding this extra helper is unnecessary.  Callers
which want the FIB number can use ifa_ifwithaddr() and extract it
directly.  That's how in6_pcbbind_avail() handles fetching ifaddr flags,
for instance.

> diff --git a/sys/net/if_var.h b/sys/net/if_var.h
> index 83d33330987e..e825be3180a3 100644
> --- a/sys/net/if_var.h
> +++ b/sys/net/if_var.h
> @@ -545,8 +545,9 @@ int       ifa_add_loopback_route(struct ifaddr *, struct 
> sockaddr *);
>  int  ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
>  int  ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *);
>  
> -struct       ifaddr *ifa_ifwithaddr(const struct sockaddr *);
> -int          ifa_ifwithaddr_check(const struct sockaddr *);
> +struct       ifaddr *ifa_ifwithaddr_getfib(const struct sockaddr *, uint16_t 
> *);
> +int          ifa_ifwithaddr_check_getfib(const struct sockaddr *,
> +                                         uint16_t *);
>  struct       ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int);
>  struct       ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int);
>  struct       ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int);
> @@ -555,6 +556,18 @@ struct   ifaddr *ifa_ifwithroute(int, const struct 
> sockaddr *,
>  struct       ifaddr *ifaof_ifpforaddr(const struct sockaddr *, if_t);
>  int  ifa_preferred(struct ifaddr *, struct ifaddr *);
>  
> +static inline struct ifaddr *
> +ifa_ifwithaddr(const struct sockaddr *sa)
> +{
> +     return (ifa_ifwithaddr_getfib(sa, NULL));
> +}
> +
> +static inline int
> +ifa_ifwithaddr_check(const struct sockaddr *sa)
> +{
> +     return (ifa_ifwithaddr_check_getfib(sa, NULL));
> +}
> +
>  int  if_simloop(if_t ifp, struct mbuf *m, int af, int hlen);
>  
>  typedef      void *if_com_alloc_t(u_char type, if_t ifp);
> diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
> index bccd4b84561a..e469c3e1275a 100644
> --- a/sys/netinet/in_pcb.c
> +++ b/sys/netinet/in_pcb.c
> @@ -646,7 +646,8 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
>       inp->inp_pcbinfo = pcbinfo;
>       inp->inp_socket = so;
>       inp->inp_cred = crhold(so->so_cred);
> -     inp->inp_inc.inc_fibnum = so->so_fibnum;
> +     /* The FIB number is in both inp_inc and inp_socket; synchronize. */
> +     inp->inp_inc.inc_fibnum = inp->inp_socket->so_fibnum;

so == inp->inp_socket, so this part of the change is redundant.

>  #ifdef MAC
>       error = mac_inpcb_init(inp, M_NOWAIT);
>       if (error != 0)
> @@ -729,7 +730,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, 
> int flags,
>               return (EINVAL);
>       anonport = sin == NULL || sin->sin_port == 0;
>       error = in_pcbbind_setup(inp, sin, &inp->inp_laddr.s_addr,
> -         &inp->inp_lport, flags, cred);
> +                              &inp->inp_lport, flags,
> +                              &inp->inp_inc.inc_fibnum, cred);
>       if (error)
>               return (error);
>       if (__predict_false((error = in_pcbinshash(inp)) != 0)) {
> @@ -741,6 +743,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, 
> int flags,
>       }
>       if (anonport)
>               inp->inp_flags |= INP_ANONPORT;
> +     /* The FIB number is in both inp_inc and inp_socket; synchronize. */
> +     sosetfib(inp->inp_socket, inp->inp_inc.inc_fibnum);

Why is there no such call in the IPv6 case?

>       return (0);
>  }
>  #endif
> @@ -910,8 +914,8 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, 
> u_short *lportp,
>   */
>  static int
>  in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
> -    const u_short lport, const int fib, int sooptions, int lookupflags,
> -    struct ucred *cred)
> +              const u_short lport, const int fib, int sooptions,
> +              int lookupflags, uint16_t *fibnum, struct ucred *cred)
>  {
>       int reuseport, reuseport_lb;
>  
> @@ -951,7 +955,8 @@ in_pcbbind_avail(struct inpcb *inp, const struct in_addr 
> laddr,
>                * to any endpoint address, local or not.
>                */
>               if ((inp->inp_flags & INP_BINDANY) == 0 &&
> -                 ifa_ifwithaddr_check((const struct sockaddr *)&sin) == 0)
> +                 ifa_ifwithaddr_check_getfib((const struct sockaddr *)&sin,
> +                                             fibnum) == 0)

This excludes the IN_MULTICAST() case--is that intentional?

>                       return (EADDRNOTAVAIL);
>       }
>  
> @@ -1005,11 +1010,12 @@ in_pcbbind_avail(struct inpcb *inp, const struct 
> in_addr laddr,
>   * calling in_pcbinshash(), or they can just use the resulting
>   * port and address to authorise the sending of a once-off packet.
>   *
> - * On error, the values of *laddrp and *lportp are not changed.
> + * On error, the values of *laddrp, *lportp and *fibnum are not changed.
>   */
>  int
>  in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t 
> *laddrp,
> -    u_short *lportp, int flags, struct ucred *cred)
> +              u_short *lportp, int flags, uint16_t *fibnum,
> +              struct ucred *cred)
>  {
>       struct socket *so = inp->inp_socket;
>       struct in_addr laddr;
> @@ -1055,7 +1061,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in 
> *sin, in_addr_t *laddrp,
>  
>               /* See if this address/port combo is available. */
>               error = in_pcbbind_avail(inp, laddr, lport, fib, sooptions,
> -                 lookupflags, cred);
> +                                      lookupflags, fibnum, cred);

Indentation on continuing lines should be by four spaces.

>               if (error != 0)
>                       return (error);
>       }
> diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
> index 5fe12c4f1e76..2bb36b41a60b 100644
> --- a/sys/netinet/in_pcb.h
> +++ b/sys/netinet/in_pcb.h
> @@ -638,7 +638,7 @@ int       in_pcballoc(struct socket *, struct inpcbinfo 
> *);
>  #define      INPBIND_FIB     0x0001  /* bind to the PCB's FIB only */
>  int  in_pcbbind(struct inpcb *, struct sockaddr_in *, int, struct ucred *);
>  int  in_pcbbind_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
> -         u_short *, int, struct ucred *);
> +         u_short *, int, uint16_t *, struct ucred *);
>  int  in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *);
>  void in_pcbdisconnect(struct inpcb *);
>  void in_pcbdrop(struct inpcb *);
> diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
> index c00a102e8520..4c3d26c58dc4 100644
> --- a/sys/netinet/tcp_input.c
> +++ b/sys/netinet/tcp_input.c
> @@ -1047,7 +1047,8 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int 
> proto, uint16_t port)
>               }
>               inc.inc_fport = th->th_sport;
>               inc.inc_lport = th->th_dport;
> -             inc.inc_fibnum = so->so_fibnum;
> +             if ((inc.inc_fibnum = so->so_fibnum) == 0)
> +                     inc.inc_fibnum = m->m_pkthdr.fibnum;

Same comment about double assignment.

>  
>               /*
>                * Check for an existing connection attempt in syncache if
> diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
> index 606d808676e1..f0b64edcdacc 100644
> --- a/sys/netinet/tcp_syncache.c
> +++ b/sys/netinet/tcp_syncache.c
> @@ -794,7 +794,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, 
> struct mbuf *m)
>        * as they would have been set up if we had created the
>        * connection when the SYN arrived.
>        */
> -     if ((so = solisten_clone(lso)) == NULL)
> +     if ((so = solisten_clone(lso, sc->sc_inc.inc_fibnum)) == NULL)
>               goto allocfail;
>  #ifdef MAC
>       mac_socketpeer_set_from_mbuf(m, so);
> diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
> index dafbaf6dc672..8ffc1015a125 100644
> --- a/sys/netinet/udp_usrreq.c
> +++ b/sys/netinet/udp_usrreq.c
> @@ -1261,7 +1261,7 @@ udp_send(struct socket *so, int flags, struct mbuf *m, 
> struct sockaddr *addr,
>               }
>               INP_HASH_WLOCK(pcbinfo);
>               error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport,
> -                 V_udp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
> +                 V_udp_bind_all_fibs ? 0 : INPBIND_FIB, NULL, td->td_ucred);
>               INP_HASH_WUNLOCK(pcbinfo);
>               if ((flags & PRUS_IPV6) != 0)
>                       inp->inp_vflag = vflagsav;
> diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
> index dfda0c60c0ba..90862c79b13a 100644
> --- a/sys/netinet6/in6_pcb.c
> +++ b/sys/netinet6/in6_pcb.c
> @@ -164,8 +164,9 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, 
> struct ucred *cred)
>   * Determine whether the inpcb can be bound to the specified address/port 
> tuple.
>   */
>  static int
> -in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6, int 
> fib,
> -    int sooptions, int lookupflags, struct ucred *cred)
> +in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6,
> +               int fib, int sooptions, int lookupflags,
> +               uint16_t *fibnum, struct ucred *cred)

Indentation of continuation lines should be four spaces.

>  {
>       const struct in6_addr *laddr;
>       int reuseport, reuseport_lb;
> @@ -207,8 +208,9 @@ in6_pcbbind_avail(struct inpcb *inp, const struct 
> sockaddr_in6 *sin6, int fib,
>               sin6.sin6_addr = *laddr;
>  
>               NET_EPOCH_ENTER(et);
> -             if ((ifa = ifa_ifwithaddr((const struct sockaddr *)&sin6)) ==
> -                 NULL && (inp->inp_flags & INP_BINDANY) == 0) {
> +             ifa = ifa_ifwithaddr_getfib((const struct sockaddr *)&sin6,
> +                                         fibnum);
> +             if ((inp->inp_flags & INP_BINDANY) == 0 && ifa == NULL) {
>                       NET_EPOCH_EXIT(et);
>                       return (EADDRNOTAVAIL);
>               }
> @@ -337,8 +339,9 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr_in6 *sin6, 
> int flags,
>                   RT_ALL_FIBS;
>  
>               /* See if this address/port combo is available. */
> -             error = in6_pcbbind_avail(inp, sin6, fib, sooptions, 
> lookupflags,
> -                 cred);
> +             error = in6_pcbbind_avail(inp, sin6, fib, sooptions,
> +                                       lookupflags,
> +                                       &inp->inp_inc.inc_fibnum, cred);
>               if (error != 0)
>                       return (error);
>  
> diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
> index 6512a2d69fd5..c0243d799c5a 100644
> --- a/sys/sys/socketvar.h
> +++ b/sys/sys/socketvar.h
> @@ -521,7 +521,7 @@ int       solisten_proto_check(struct socket *so);
>  bool solisten_enqueue(struct socket *, int);
>  int  solisten_dequeue(struct socket *, struct socket **, int);
>  struct socket *
> -     solisten_clone(struct socket *);
> +     solisten_clone(struct socket *, int fibnum);
>  struct socket *
>       sonewconn(struct socket *head, int connstatus);
>  struct socket *


Reply via email to