preparations for IP_SENDSRCADDR / (un)tangle pcb_bind...

2013-08-06 Thread Christopher Zimmermann
Hi,

I'm currently working towards an IP_SENDSRCADDR implementation.
As a first step I moved the calls to in_pcbrehash() from 
in_pcb(dis)connect() and in_pcbbind() to the call sites of those 
functions. This should save some calls to in_pcbrehash() in the 
in_pcbconnect() codepath.
Also I improved code sharing between the IPv4 and IPv6 stacks by 
implementing a generic in_pcbsetport used by both stacks.
The next step will be to replace in_pcbconnect() by in_selectsrc() 
and in_pcbsetport() in udp_output() like the IPv6 stack does it.
The splsoftnet() lock could then be removed. Also implementing
IP_SENDSRCADDR will become trivial because in_selectsrc() won't
modify the struct inpcb like in_pcbconnect() does at the moment.

Not much testing yet. Just surfing the web and sending this mail...

I'd really like some feedback on whether I'm on the right track or 
doing something stupid here. I have no experience in the OpenBSD 
IP-stack yet.


Christopher



Index: netinet/in_pcb.c
===
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.139
diff -u -p -r1.139 in_pcb.c
--- netinet/in_pcb.c1 Jun 2013 13:25:40 -   1.139
+++ netinet/in_pcb.c6 Aug 2013 18:25:54 -
@@ -218,7 +218,6 @@ in_pcbbind(struct inpcb *inp, struct mbu
 {
struct socket *so = inp-inp_socket;
struct inpcbtable *table = inp-inp_table;
-   u_int16_t *lastport = inp-inp_table-inpt_lastport;
struct sockaddr_in *sin;
u_int16_t lport = 0;
int wild = 0, reuseport = (so-so_options  SO_REUSEPORT);
@@ -293,71 +292,121 @@ in_pcbbind(struct inpcb *inp, struct mbu
inp-inp_laddr = sin-sin_addr;
}
if (lport == 0) {
-   u_int16_t first, last;
-   int count;
+   error = in_pcbsetport(inp, p);
+   if (error != 0)
+   return error;
+   }
+   else
+   inp-inp_lport = lport;
+   return (0);
+}
 
-   if (inp-inp_flags  INP_HIGHPORT) {
-   first = ipport_hifirstauto; /* sysctl */
-   last = ipport_hilastauto;
-   } else if (inp-inp_flags  INP_LOWPORT) {
-   if ((error = suser(p, 0)))
-   return (EACCES);
-   first = IPPORT_RESERVED-1; /* 1023 */
-   last = 600;/* not IPPORT_RESERVED/2 */
-   } else {
-   first = ipport_firstauto;   /* sysctl */
-   last  = ipport_lastauto;
-   }
+int
+in_pcbsetport(struct inpcb *inp, struct proc *p)
+{
+   struct socket *so = inp-inp_socket;
+   struct inpcbtable *table = inp-inp_table;
+   u_int16_t first, last;
+   u_int16_t *lastport = inp-inp_table-inpt_lastport;
+   u_int16_t lport = 0;
+   int count;
+   int error;
+   int wild;
+   u_int rtableid;
+   void *faddrp, *laddrp;
+#ifdef INET6
+   extern struct in6_addr zeroin6_addr;
 
-   /*
-* Simple check to ensure all ports are not used up causing
-* a deadlock here.
-*
-* We split the two cases (up and down) so that the direction
-* is not being tested on each round of the loop.
-*/
+   if (sotopf(so) == PF_INET6) {
+   wild = INPLOOKUP_IPV6;
+   rtableid = 0;
+   faddrp = zeroin6_addr;
+   laddrp = inp-inp_laddr6;
+   }
+   else
+#endif /* INET6 */
+   {
+   wild = 0;
+   rtableid = inp-inp_rtableid;
+   faddrp = zeroin_addr;
+   laddrp = inp-inp_laddr;
+   }
 
-   if (first  last) {
-   /*
-* counting down
-*/
-   count = first - last;
-   if (count)
-   *lastport = first - arc4random_uniform(count);
+   if ((so-so_options  (SO_REUSEADDR|SO_REUSEPORT)) == 0 
+   ((so-so_proto-pr_flags  PR_CONNREQUIRED) == 0 ||
+(so-so_options  SO_ACCEPTCONN) == 0))
+   wild |= INPLOOKUP_WILDCARD;
 
-   do {
-   if (count--  0)/* completely used? */
-   return (EADDRNOTAVAIL);
-   --*lastport;
-   if (*lastport  first || *lastport  last)
-   *lastport = first;
-   lport = htons(*lastport);
-   } while (in_baddynamic(*lastport, 
so-so_proto-pr_protocol) ||
-   in_pcblookup(table, zeroin_addr, 0,
-   inp-inp_laddr, lport, wild, inp-inp_rtableid));
-   } else {
- 

Re: preparations for IP_SENDSRCADDR / (un)tangle pcb_bind...

2013-08-06 Thread Claudio Jeker
On Tue, Aug 06, 2013 at 09:24:13PM +0200, Christopher Zimmermann wrote:
 Hi,
 
 I'm currently working towards an IP_SENDSRCADDR implementation.
 As a first step I moved the calls to in_pcbrehash() from 
 in_pcb(dis)connect() and in_pcbbind() to the call sites of those 
 functions. This should save some calls to in_pcbrehash() in the 
 in_pcbconnect() codepath.
 Also I improved code sharing between the IPv4 and IPv6 stacks by 
 implementing a generic in_pcbsetport used by both stacks.
 The next step will be to replace in_pcbconnect() by in_selectsrc() 
 and in_pcbsetport() in udp_output() like the IPv6 stack does it.
 The splsoftnet() lock could then be removed. Also implementing
 IP_SENDSRCADDR will become trivial because in_selectsrc() won't
 modify the struct inpcb like in_pcbconnect() does at the moment.
 
 Not much testing yet. Just surfing the web and sending this mail...
 
 I'd really like some feedback on whether I'm on the right track or 
 doing something stupid here. I have no experience in the OpenBSD 
 IP-stack yet.
 
 

Moving the calls of of in_pcbrehash() to outside of in_pcb.c is wrong.
This is an internal function for the specific way fast PCB lookups are
done at the moment.  Having that all over the place in higher level code
where someone does a bind or connect call is not an option IMO.
This needs to be done differently, sorry.

-- 
:wq Claudio

 Christopher
 
 
 
 Index: netinet/in_pcb.c
 ===
 RCS file: /cvs/src/sys/netinet/in_pcb.c,v
 retrieving revision 1.139
 diff -u -p -r1.139 in_pcb.c
 --- netinet/in_pcb.c  1 Jun 2013 13:25:40 -   1.139
 +++ netinet/in_pcb.c  6 Aug 2013 18:25:54 -
 @@ -218,7 +218,6 @@ in_pcbbind(struct inpcb *inp, struct mbu
  {
   struct socket *so = inp-inp_socket;
   struct inpcbtable *table = inp-inp_table;
 - u_int16_t *lastport = inp-inp_table-inpt_lastport;
   struct sockaddr_in *sin;
   u_int16_t lport = 0;
   int wild = 0, reuseport = (so-so_options  SO_REUSEPORT);
 @@ -293,71 +292,121 @@ in_pcbbind(struct inpcb *inp, struct mbu
   inp-inp_laddr = sin-sin_addr;
   }
   if (lport == 0) {
 - u_int16_t first, last;
 - int count;
 + error = in_pcbsetport(inp, p);
 + if (error != 0)
 + return error;
 + }
 + else
 + inp-inp_lport = lport;
 + return (0);
 +}
  
 - if (inp-inp_flags  INP_HIGHPORT) {
 - first = ipport_hifirstauto; /* sysctl */
 - last = ipport_hilastauto;
 - } else if (inp-inp_flags  INP_LOWPORT) {
 - if ((error = suser(p, 0)))
 - return (EACCES);
 - first = IPPORT_RESERVED-1; /* 1023 */
 - last = 600;/* not IPPORT_RESERVED/2 */
 - } else {
 - first = ipport_firstauto;   /* sysctl */
 - last  = ipport_lastauto;
 - }
 +int
 +in_pcbsetport(struct inpcb *inp, struct proc *p)
 +{
 + struct socket *so = inp-inp_socket;
 + struct inpcbtable *table = inp-inp_table;
 + u_int16_t first, last;
 + u_int16_t *lastport = inp-inp_table-inpt_lastport;
 + u_int16_t lport = 0;
 + int count;
 + int error;
 + int wild;
 + u_int rtableid;
 + void *faddrp, *laddrp;
 +#ifdef INET6
 + extern struct in6_addr zeroin6_addr;
  
 - /*
 -  * Simple check to ensure all ports are not used up causing
 -  * a deadlock here.
 -  *
 -  * We split the two cases (up and down) so that the direction
 -  * is not being tested on each round of the loop.
 -  */
 + if (sotopf(so) == PF_INET6) {
 + wild = INPLOOKUP_IPV6;
 + rtableid = 0;
 + faddrp = zeroin6_addr;
 + laddrp = inp-inp_laddr6;
 + }
 + else
 +#endif /* INET6 */
 + {
 + wild = 0;
 + rtableid = inp-inp_rtableid;
 + faddrp = zeroin_addr;
 + laddrp = inp-inp_laddr;
 + }
  
 - if (first  last) {
 - /*
 -  * counting down
 -  */
 - count = first - last;
 - if (count)
 - *lastport = first - arc4random_uniform(count);
 + if ((so-so_options  (SO_REUSEADDR|SO_REUSEPORT)) == 0 
 + ((so-so_proto-pr_flags  PR_CONNREQUIRED) == 0 ||
 +  (so-so_options  SO_ACCEPTCONN) == 0))
 + wild |= INPLOOKUP_WILDCARD;
  
 - do {
 - if (count--  0)/* completely used? */
 - return (EADDRNOTAVAIL);
 - --*lastport;
 - if (*lastport  first || *lastport  last)
 -