On Tue, Aug 06, 2013 at 09:24:13PM +0200, Christopher Zimmermann wrote:
> Hi,
> 
> I'm currently working towards an IP_SENDSRCADDR implementation.
> As a first step I moved the calls to in_pcbrehash() from 
> in_pcb(dis)connect() and in_pcbbind() to the call sites of those 
> functions. This should save some calls to in_pcbrehash() in the 
> in_pcbconnect() codepath.
> Also I improved code sharing between the IPv4 and IPv6 stacks by 
> implementing a generic in_pcbsetport used by both stacks.
> The next step will be to replace in_pcbconnect() by in_selectsrc() 
> and in_pcbsetport() in udp_output() like the IPv6 stack does it.
> The splsoftnet() lock could then be removed. Also implementing
> IP_SENDSRCADDR will become trivial because in_selectsrc() won't
> modify the struct inpcb like in_pcbconnect() does at the moment.
> 
> Not much testing yet. Just surfing the web and sending this mail...
> 
> I'd really like some feedback on whether I'm on the right track or 
> doing something stupid here. I have no experience in the OpenBSD 
> IP-stack yet.
> 
> 

Moving the calls of of in_pcbrehash() to outside of in_pcb.c is wrong.
This is an internal function for the specific way fast PCB lookups are
done at the moment.  Having that all over the place in higher level code
where someone does a bind or connect call is not an option IMO.
This needs to be done differently, sorry.

-- 
:wq Claudio

> Christopher
> 
> 
> 
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.139
> diff -u -p -r1.139 in_pcb.c
> --- netinet/in_pcb.c  1 Jun 2013 13:25:40 -0000       1.139
> +++ netinet/in_pcb.c  6 Aug 2013 18:25:54 -0000
> @@ -218,7 +218,6 @@ in_pcbbind(struct inpcb *inp, struct mbu
>  {
>       struct socket *so = inp->inp_socket;
>       struct inpcbtable *table = inp->inp_table;
> -     u_int16_t *lastport = &inp->inp_table->inpt_lastport;
>       struct sockaddr_in *sin;
>       u_int16_t lport = 0;
>       int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
> @@ -293,71 +292,121 @@ in_pcbbind(struct inpcb *inp, struct mbu
>               inp->inp_laddr = sin->sin_addr;
>       }
>       if (lport == 0) {
> -             u_int16_t first, last;
> -             int count;
> +             error = in_pcbsetport(inp, p);
> +             if (error != 0)
> +                     return error;
> +     }
> +     else
> +             inp->inp_lport = lport;
> +     return (0);
> +}
>  
> -             if (inp->inp_flags & INP_HIGHPORT) {
> -                     first = ipport_hifirstauto;     /* sysctl */
> -                     last = ipport_hilastauto;
> -             } else if (inp->inp_flags & INP_LOWPORT) {
> -                     if ((error = suser(p, 0)))
> -                             return (EACCES);
> -                     first = IPPORT_RESERVED-1; /* 1023 */
> -                     last = 600;                /* not IPPORT_RESERVED/2 */
> -             } else {
> -                     first = ipport_firstauto;       /* sysctl */
> -                     last  = ipport_lastauto;
> -             }
> +int
> +in_pcbsetport(struct inpcb *inp, struct proc *p)
> +{
> +     struct socket *so = inp->inp_socket;
> +     struct inpcbtable *table = inp->inp_table;
> +     u_int16_t first, last;
> +     u_int16_t *lastport = &inp->inp_table->inpt_lastport;
> +     u_int16_t lport = 0;
> +     int count;
> +     int error;
> +     int wild;
> +     u_int rtableid;
> +     void *faddrp, *laddrp;
> +#ifdef INET6
> +     extern struct in6_addr zeroin6_addr;
>  
> -             /*
> -              * Simple check to ensure all ports are not used up causing
> -              * a deadlock here.
> -              *
> -              * We split the two cases (up and down) so that the direction
> -              * is not being tested on each round of the loop.
> -              */
> +     if (sotopf(so) == PF_INET6) {
> +             wild = INPLOOKUP_IPV6;
> +             rtableid = 0;
> +             faddrp = &zeroin6_addr;
> +             laddrp = &inp->inp_laddr6;
> +     }
> +     else
> +#endif /* INET6 */
> +     {
> +             wild = 0;
> +             rtableid = inp->inp_rtableid;
> +             faddrp = &zeroin_addr;
> +             laddrp = &inp->inp_laddr;
> +     }
>  
> -             if (first > last) {
> -                     /*
> -                      * counting down
> -                      */
> -                     count = first - last;
> -                     if (count)
> -                             *lastport = first - arc4random_uniform(count);
> +     if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
> +         ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
> +          (so->so_options & SO_ACCEPTCONN) == 0))
> +             wild |= INPLOOKUP_WILDCARD;
>  
> -                     do {
> -                             if (count-- < 0)        /* completely used? */
> -                                     return (EADDRNOTAVAIL);
> -                             --*lastport;
> -                             if (*lastport > first || *lastport < last)
> -                                     *lastport = first;
> -                             lport = htons(*lastport);
> -                     } while (in_baddynamic(*lastport, 
> so->so_proto->pr_protocol) ||
> -                         in_pcblookup(table, &zeroin_addr, 0,
> -                         &inp->inp_laddr, lport, wild, inp->inp_rtableid));
> -             } else {
> -                     /*
> -                      * counting up
> -                      */
> -                     count = last - first;
> -                     if (count)
> -                             *lastport = first + arc4random_uniform(count);
> +     if (inp->inp_flags & INP_HIGHPORT) {
> +             first = ipport_hifirstauto;     /* sysctl */
> +             last = ipport_hilastauto;
> +     } else if (inp->inp_flags & INP_LOWPORT) {
> +             if ((error = suser(p, 0)))
> +                     return (EACCES);
> +             first = IPPORT_RESERVED-1; /* 1023 */
> +             last = 600;                /* not IPPORT_RESERVED/2 */
> +     } else {
> +             first = ipport_firstauto;       /* sysctl */
> +             last  = ipport_lastauto;
> +     }
>  
> -                     do {
> -                             if (count-- < 0)        /* completely used? */
> -                                     return (EADDRNOTAVAIL);
> -                             ++*lastport;
> -                             if (*lastport < first || *lastport > last)
> -                                     *lastport = first;
> -                             lport = htons(*lastport);
> -                     } while (in_baddynamic(*lastport, 
> so->so_proto->pr_protocol) ||
> -                         in_pcblookup(table, &zeroin_addr, 0,
> -                         &inp->inp_laddr, lport, wild, inp->inp_rtableid));
> -             }
> +     /*
> +      * Simple check to ensure all ports are not used up causing
> +      * a deadlock here.
> +      *
> +      * We split the two cases (up and down) so that the direction
> +      * is not being tested on each round of the loop.
> +      * XXX: Does this optimization really have a relevant effect?
> +      * How many iterations can we expect?
> +      * What about branch prediction?
> +      * Shouldn't we forbid the first > last case completely?
> +      */
> +
> +     if (first > last) {
> +             /*
> +              * counting down
> +              */
> +             count = first - last;
> +             if (count)
> +                     *lastport = first - arc4random_uniform(count);
> +
> +             do {
> +                     if (count-- < 0)        /* completely used? */
> +                             return (EADDRNOTAVAIL);
> +                     --*lastport;
> +                     if (*lastport > first || *lastport < last)
> +                             *lastport = first;
> +                     lport = htons(*lastport);
> +             } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
> +                 in_pcblookup(table, faddrp, 0,
> +                 laddrp, lport, wild, rtableid));
> +     } else {
> +             /*
> +              * counting up
> +              */
> +             count = last - first;
> +             if (count)
> +                     *lastport = first + arc4random_uniform(count);
> +
> +             do {
> +                     if (count-- < 0)        /* completely used? */
> +                             return (EADDRNOTAVAIL);
> +                     ++*lastport;
> +                     if (*lastport < first || *lastport > last)
> +                             *lastport = first;
> +                     lport = htons(*lastport);
> +             } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
> +                 in_pcblookup(table, faddrp, 0,
> +                 laddrp, lport, wild, rtableid));
>       }
> +
>       inp->inp_lport = lport;
> -     in_pcbrehash(inp);
> -     return (0);
> +
> +#if 0
> +     inp->inp_flowinfo = 0;  /* XXX */
> +#endif
> +
> +     return 0;
>  }
>  
>  /*
> @@ -418,13 +467,12 @@ in_pcbconnect(struct inpcb *inp, struct 
>               return (EADDRINUSE);
>       if (inp->inp_laddr.s_addr == INADDR_ANY) {
>               if (inp->inp_lport == 0 &&
> -                 in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL)
> -                     return (EADDRNOTAVAIL);
> +                 in_pcbsetport(inp, curproc) == EADDRNOTAVAIL)
> +                     return EADDRNOTAVAIL;
>               inp->inp_laddr = ifaddr->sin_addr;
>       }
>       inp->inp_faddr = sin->sin_addr;
>       inp->inp_fport = sin->sin_port;
> -     in_pcbrehash(inp);
>  #ifdef IPSEC
>       {
>               int error; /* This is just ignored */
> @@ -452,7 +500,6 @@ in_pcbdisconnect(struct inpcb *inp)
>       }
>  
>       inp->inp_fport = 0;
> -     in_pcbrehash(inp);
>       if (inp->inp_socket->so_state & SS_NOFDREF)
>               in_pcbdetach(inp);
>  }
> Index: netinet/in_pcb.h
> ===================================================================
> RCS file: /cvs/src/sys/netinet/in_pcb.h,v
> retrieving revision 1.79
> diff -u -p -r1.79 in_pcb.h
> --- netinet/in_pcb.h  31 May 2013 13:15:53 -0000      1.79
> +++ netinet/in_pcb.h  6 Aug 2013 18:25:54 -0000
> @@ -297,6 +297,6 @@ int       in6_pcbnotify(struct inpcbtable *, s
>       u_int, const struct sockaddr_in6 *, u_int, int, void *,
>       void (*)(struct inpcb *, int));
>  int  in6_selecthlim(struct inpcb *, struct ifnet *);
> -int  in6_pcbsetport(struct in6_addr *, struct inpcb *, struct proc *);
> +int  in_pcbsetport(struct inpcb *, struct proc *);
>  #endif /* _KERNEL */
>  #endif /* _NETINET_IN_PCB_H_ */
> Index: netinet/ip_divert.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_divert.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 ip_divert.c
> --- netinet/ip_divert.c       8 Apr 2013 15:32:23 -0000       1.13
> +++ netinet/ip_divert.c       6 Aug 2013 18:25:55 -0000
> @@ -319,6 +319,8 @@ divert_usrreq(struct socket *so, int req
>       case PRU_BIND:
>               s = splsoftnet();
>               error = in_pcbbind(inp, addr, p);
> +             if (error == 0)
> +                     in_pcbrehash(inp);
>               splx(s);
>               break;
>  
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_input.c,v
> retrieving revision 1.265
> diff -u -p -r1.265 tcp_input.c
> --- netinet/tcp_input.c       1 Jul 2013 10:53:52 -0000       1.265
> +++ netinet/tcp_input.c       6 Aug 2013 18:26:01 -0000
> @@ -3820,6 +3820,7 @@ syn_cache_get(struct sockaddr *src, stru
>               break;
>  #endif
>       }
> +     in_pcbrehash(inp);
>       (void) m_free(am);
>  
>       tp = intotcpcb(inp);
> Index: netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
> retrieving revision 1.112
> diff -u -p -r1.112 tcp_usrreq.c
> --- netinet/tcp_usrreq.c      17 May 2013 09:04:30 -0000      1.112
> +++ netinet/tcp_usrreq.c      6 Aug 2013 18:26:02 -0000
> @@ -228,6 +228,7 @@ tcp_usrreq(so, req, m, nam, control, p)
>                       error = in_pcbbind(inp, nam, p);
>               if (error)
>                       break;
> +             in_pcbrehash(inp);
>               break;
>  
>       /*
> @@ -244,8 +245,10 @@ tcp_usrreq(so, req, m, nam, control, p)
>               }
>               /* If the in_pcbbind() above is called, the tp->pf
>                  should still be whatever it was before. */
> -             if (error == 0)
> +             if (error == 0) {
> +                     in_pcbrehash(inp);
>                       tp->t_state = TCPS_LISTEN;
> +             }
>               break;
>  
>       /*
> @@ -304,6 +307,8 @@ tcp_usrreq(so, req, m, nam, control, p)
>                       error = ENOBUFS;
>                       break;
>               }
> +
> +             in_pcbrehash(inp);
>  
>               so->so_state |= SS_CONNECTOUT;
>               
> Index: netinet/udp_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
> retrieving revision 1.164
> diff -u -p -r1.164 udp_usrreq.c
> --- netinet/udp_usrreq.c      9 Jun 2013 22:03:06 -0000       1.164
> +++ netinet/udp_usrreq.c      6 Aug 2013 18:26:04 -0000
> @@ -1164,6 +1164,8 @@ udp_usrreq(struct socket *so, int req, s
>               else
>  #endif
>                       error = in_pcbbind(inp, addr, p);
> +             if (error == 0)
> +                     in_pcbrehash(inp);
>               splx(s);
>               break;
>  
> @@ -1180,7 +1182,6 @@ udp_usrreq(struct socket *so, int req, s
>                       }
>                       s = splsoftnet();
>                       error = in6_pcbconnect(inp, addr);
> -                     splx(s);
>               } else
>  #endif /* INET6 */
>               {
> @@ -1190,8 +1191,9 @@ udp_usrreq(struct socket *so, int req, s
>                       }
>                       s = splsoftnet();
>                       error = in_pcbconnect(inp, addr);
> -                     splx(s);
>               }
> +             in_pcbrehash(inp);
> +             splx(s);
>  
>               if (error == 0)
>                       soisconnected(so);
> @@ -1229,6 +1231,7 @@ udp_usrreq(struct socket *so, int req, s
>  #endif /* INET6 */
>                       inp->inp_laddr.s_addr = INADDR_ANY;
>               in_pcbdisconnect(inp);
> +             in_pcbrehash(inp);
>  
>               splx(s);
>               so->so_state &= ~SS_ISCONNECTED;                /* XXX */
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/in6_pcb.c,v
> retrieving revision 1.56
> diff -u -p -r1.56 in6_pcb.c
> --- netinet6/in6_pcb.c        31 May 2013 15:04:24 -0000      1.56
> +++ netinet6/in6_pcb.c        6 Aug 2013 18:26:05 -0000
> @@ -267,8 +267,8 @@ in6_pcbbind(struct inpcb *inp, struct mb
>                               return error;
>  
>                       t = in_pcblookup(head,
> -                         (struct in_addr *)&zeroin6_addr, 0,
> -                         (struct in_addr *)&sin6->sin6_addr, lport,
> +                         &zeroin6_addr, 0,
> +                         &sin6->sin6_addr, lport,
>                           wild, /* XXX */ 0);
>  
>                       if (t && (reuseport & t->inp_socket->so_options) == 0)
> @@ -278,101 +278,11 @@ in6_pcbbind(struct inpcb *inp, struct mb
>       }
>  
>       if (lport == 0) {
> -             error = in6_pcbsetport(&inp->inp_laddr6, inp, p);
> +             error = in_pcbsetport(inp, p);
>               if (error != 0)
>                       return error;
> -     } else {
> +     } else
>               inp->inp_lport = lport;
> -             in_pcbrehash(inp);
> -     }
> -
> -     return 0;
> -}
> -
> -int
> -in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct proc *p)
> -{
> -     struct socket *so = inp->inp_socket;
> -     struct inpcbtable *table = inp->inp_table;
> -     u_int16_t first, last;
> -     u_int16_t *lastport = &inp->inp_table->inpt_lastport;
> -     u_int16_t lport = 0;
> -     int count;
> -     int wild = INPLOOKUP_IPV6;
> -     int error;
> -
> -     /* XXX we no longer support IPv4 mapped address, so no tweaks here */
> -
> -     if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
> -         ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
> -          (so->so_options & SO_ACCEPTCONN) == 0))
> -             wild |= INPLOOKUP_WILDCARD;
> -
> -     if (inp->inp_flags & INP_HIGHPORT) {
> -             first = ipport_hifirstauto;     /* sysctl */
> -             last = ipport_hilastauto;
> -     } else if (inp->inp_flags & INP_LOWPORT) {
> -             if ((error = suser(p, 0)))
> -                     return (EACCES);
> -             first = IPPORT_RESERVED-1; /* 1023 */
> -             last = 600;                /* not IPPORT_RESERVED/2 */
> -     } else {
> -             first = ipport_firstauto;       /* sysctl */
> -             last  = ipport_lastauto;
> -     }
> -
> -     /*
> -      * Simple check to ensure all ports are not used up causing
> -      * a deadlock here.
> -      *
> -      * We split the two cases (up and down) so that the direction
> -      * is not being tested on each round of the loop.
> -      */
> -
> -     if (first > last) {
> -             /*
> -              * counting down
> -              */
> -             count = first - last;
> -             if (count)
> -                     *lastport = first - arc4random_uniform(count);
> -
> -             do {
> -                     if (count-- < 0)        /* completely used? */
> -                             return (EADDRNOTAVAIL);
> -                     --*lastport;
> -                     if (*lastport > first || *lastport < last)
> -                             *lastport = first;
> -                     lport = htons(*lastport);
> -             } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
> -                 in_pcblookup(table, &zeroin6_addr, 0,
> -                 &inp->inp_laddr6, lport, wild, /* XXX */ 0));
> -     } else {
> -             /*
> -              * counting up
> -              */
> -             count = last - first;
> -             if (count)
> -                     *lastport = first + arc4random_uniform(count);
> -
> -             do {
> -                     if (count-- < 0)        /* completely used? */
> -                             return (EADDRNOTAVAIL);
> -                     ++*lastport;
> -                     if (*lastport < first || *lastport > last)
> -                             *lastport = first;
> -                     lport = htons(*lastport);
> -             } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
> -                 in_pcblookup(table, &zeroin6_addr, 0,
> -                 &inp->inp_laddr6, lport, wild, /* XXX */ 0));
> -     }
> -
> -     inp->inp_lport = lport;
> -     in_pcbrehash(inp);
> -
> -#if 0
> -     inp->inp_flowinfo = 0;  /* XXX */
> -#endif
>  
>       return 0;
>  }
> @@ -449,8 +359,9 @@ in6_pcbconnect(struct inpcb *inp, struct
>               return (EADDRINUSE);
>       }
>       if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
> -             if (inp->inp_lport == 0)
> -                     (void)in6_pcbbind(inp, NULL, curproc);
> +             if (inp->inp_lport == 0 &&
> +                 in_pcbsetport(inp, curproc) == EADDRNOTAVAIL)
> +                     return EADDRNOTAVAIL;
>               inp->inp_laddr6 = *in6a;
>       }
>       inp->inp_faddr6 = sin6->sin6_addr;
> @@ -459,7 +370,6 @@ in6_pcbconnect(struct inpcb *inp, struct
>       if (ip6_auto_flowlabel)
>               inp->inp_flowinfo |=
>                   (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
> -     in_pcbrehash(inp);
>       return (0);
>  }
>  
> Index: netinet6/ip6_divert.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 ip6_divert.c
> --- netinet6/ip6_divert.c     26 Jun 2013 09:12:40 -0000      1.13
> +++ netinet6/ip6_divert.c     6 Aug 2013 18:26:05 -0000
> @@ -314,6 +314,8 @@ divert6_usrreq(struct socket *so, int re
>       case PRU_BIND:
>               s = splsoftnet();
>               error = in6_pcbbind(inp, addr, p);
> +             if (error == 0)
> +                     in_pcbrehash(inp);
>               splx(s);
>               break;
>  
> Index: netinet6/udp6_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/udp6_output.c,v
> retrieving revision 1.19
> diff -u -p -r1.19 udp6_output.c
> --- netinet6/udp6_output.c    28 Mar 2013 16:45:16 -0000      1.19
> +++ netinet6/udp6_output.c    6 Aug 2013 18:26:05 -0000
> @@ -194,9 +194,12 @@ udp6_output(struct in6pcb *in6p, struct 
>                               error = EADDRNOTAVAIL;
>                       goto release;
>               }
> -             if (in6p->in6p_lport == 0 &&
> -                 (error = in6_pcbsetport(laddr, in6p, p)) != 0)
> -                     goto release;
> +             if (in6p->in6p_lport == 0) {
> +                     error = in_pcbsetport(in6p, p);
> +                     if (error)
> +                             goto release;
> +                     in_pcbrehash(in6p);
> +             }
>       } else {
>               if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
>                       error = ENOTCONN;
> 
> 
> -- 
> http://gmerlin.de
> OpenPGP: http://gmerlin.de/christopher.pub
> 1917 680A 723C BF3D 2CA3  0E44 7E24 D19F 34B8 2A2A

Reply via email to