Hi, I'm currently working towards an IP_SENDSRCADDR implementation. As a first step I moved the calls to in_pcbrehash() from in_pcb(dis)connect() and in_pcbbind() to the call sites of those functions. This should save some calls to in_pcbrehash() in the in_pcbconnect() codepath. Also I improved code sharing between the IPv4 and IPv6 stacks by implementing a generic in_pcbsetport used by both stacks. The next step will be to replace in_pcbconnect() by in_selectsrc() and in_pcbsetport() in udp_output() like the IPv6 stack does it. The splsoftnet() lock could then be removed. Also implementing IP_SENDSRCADDR will become trivial because in_selectsrc() won't modify the struct inpcb like in_pcbconnect() does at the moment.
Not much testing yet. Just surfing the web and sending this mail...
I'd really like some feedback on whether I'm on the right track or
doing something stupid here. I have no experience in the OpenBSD
IP-stack yet.
Christopher
Index: netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.139
diff -u -p -r1.139 in_pcb.c
--- netinet/in_pcb.c 1 Jun 2013 13:25:40 -0000 1.139
+++ netinet/in_pcb.c 6 Aug 2013 18:25:54 -0000
@@ -218,7 +218,6 @@ in_pcbbind(struct inpcb *inp, struct mbu
{
struct socket *so = inp->inp_socket;
struct inpcbtable *table = inp->inp_table;
- u_int16_t *lastport = &inp->inp_table->inpt_lastport;
struct sockaddr_in *sin;
u_int16_t lport = 0;
int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
@@ -293,71 +292,121 @@ in_pcbbind(struct inpcb *inp, struct mbu
inp->inp_laddr = sin->sin_addr;
}
if (lport == 0) {
- u_int16_t first, last;
- int count;
+ error = in_pcbsetport(inp, p);
+ if (error != 0)
+ return error;
+ }
+ else
+ inp->inp_lport = lport;
+ return (0);
+}
- if (inp->inp_flags & INP_HIGHPORT) {
- first = ipport_hifirstauto; /* sysctl */
- last = ipport_hilastauto;
- } else if (inp->inp_flags & INP_LOWPORT) {
- if ((error = suser(p, 0)))
- return (EACCES);
- first = IPPORT_RESERVED-1; /* 1023 */
- last = 600; /* not IPPORT_RESERVED/2 */
- } else {
- first = ipport_firstauto; /* sysctl */
- last = ipport_lastauto;
- }
+int
+in_pcbsetport(struct inpcb *inp, struct proc *p)
+{
+ struct socket *so = inp->inp_socket;
+ struct inpcbtable *table = inp->inp_table;
+ u_int16_t first, last;
+ u_int16_t *lastport = &inp->inp_table->inpt_lastport;
+ u_int16_t lport = 0;
+ int count;
+ int error;
+ int wild;
+ u_int rtableid;
+ void *faddrp, *laddrp;
+#ifdef INET6
+ extern struct in6_addr zeroin6_addr;
- /*
- * Simple check to ensure all ports are not used up causing
- * a deadlock here.
- *
- * We split the two cases (up and down) so that the direction
- * is not being tested on each round of the loop.
- */
+ if (sotopf(so) == PF_INET6) {
+ wild = INPLOOKUP_IPV6;
+ rtableid = 0;
+ faddrp = &zeroin6_addr;
+ laddrp = &inp->inp_laddr6;
+ }
+ else
+#endif /* INET6 */
+ {
+ wild = 0;
+ rtableid = inp->inp_rtableid;
+ faddrp = &zeroin_addr;
+ laddrp = &inp->inp_laddr;
+ }
- if (first > last) {
- /*
- * counting down
- */
- count = first - last;
- if (count)
- *lastport = first - arc4random_uniform(count);
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
+ ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
+ (so->so_options & SO_ACCEPTCONN) == 0))
+ wild |= INPLOOKUP_WILDCARD;
- do {
- if (count-- < 0) /* completely used? */
- return (EADDRNOTAVAIL);
- --*lastport;
- if (*lastport > first || *lastport < last)
- *lastport = first;
- lport = htons(*lastport);
- } while (in_baddynamic(*lastport,
so->so_proto->pr_protocol) ||
- in_pcblookup(table, &zeroin_addr, 0,
- &inp->inp_laddr, lport, wild, inp->inp_rtableid));
- } else {
- /*
- * counting up
- */
- count = last - first;
- if (count)
- *lastport = first + arc4random_uniform(count);
+ if (inp->inp_flags & INP_HIGHPORT) {
+ first = ipport_hifirstauto; /* sysctl */
+ last = ipport_hilastauto;
+ } else if (inp->inp_flags & INP_LOWPORT) {
+ if ((error = suser(p, 0)))
+ return (EACCES);
+ first = IPPORT_RESERVED-1; /* 1023 */
+ last = 600; /* not IPPORT_RESERVED/2 */
+ } else {
+ first = ipport_firstauto; /* sysctl */
+ last = ipport_lastauto;
+ }
- do {
- if (count-- < 0) /* completely used? */
- return (EADDRNOTAVAIL);
- ++*lastport;
- if (*lastport < first || *lastport > last)
- *lastport = first;
- lport = htons(*lastport);
- } while (in_baddynamic(*lastport,
so->so_proto->pr_protocol) ||
- in_pcblookup(table, &zeroin_addr, 0,
- &inp->inp_laddr, lport, wild, inp->inp_rtableid));
- }
+ /*
+ * Simple check to ensure all ports are not used up causing
+ * a deadlock here.
+ *
+ * We split the two cases (up and down) so that the direction
+ * is not being tested on each round of the loop.
+ * XXX: Does this optimization really have a relevant effect?
+ * How many iterations can we expect?
+ * What about branch prediction?
+ * Shouldn't we forbid the first > last case completely?
+ */
+
+ if (first > last) {
+ /*
+ * counting down
+ */
+ count = first - last;
+ if (count)
+ *lastport = first - arc4random_uniform(count);
+
+ do {
+ if (count-- < 0) /* completely used? */
+ return (EADDRNOTAVAIL);
+ --*lastport;
+ if (*lastport > first || *lastport < last)
+ *lastport = first;
+ lport = htons(*lastport);
+ } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
+ in_pcblookup(table, faddrp, 0,
+ laddrp, lport, wild, rtableid));
+ } else {
+ /*
+ * counting up
+ */
+ count = last - first;
+ if (count)
+ *lastport = first + arc4random_uniform(count);
+
+ do {
+ if (count-- < 0) /* completely used? */
+ return (EADDRNOTAVAIL);
+ ++*lastport;
+ if (*lastport < first || *lastport > last)
+ *lastport = first;
+ lport = htons(*lastport);
+ } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
+ in_pcblookup(table, faddrp, 0,
+ laddrp, lport, wild, rtableid));
}
+
inp->inp_lport = lport;
- in_pcbrehash(inp);
- return (0);
+
+#if 0
+ inp->inp_flowinfo = 0; /* XXX */
+#endif
+
+ return 0;
}
/*
@@ -418,13 +467,12 @@ in_pcbconnect(struct inpcb *inp, struct
return (EADDRINUSE);
if (inp->inp_laddr.s_addr == INADDR_ANY) {
if (inp->inp_lport == 0 &&
- in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL)
- return (EADDRNOTAVAIL);
+ in_pcbsetport(inp, curproc) == EADDRNOTAVAIL)
+ return EADDRNOTAVAIL;
inp->inp_laddr = ifaddr->sin_addr;
}
inp->inp_faddr = sin->sin_addr;
inp->inp_fport = sin->sin_port;
- in_pcbrehash(inp);
#ifdef IPSEC
{
int error; /* This is just ignored */
@@ -452,7 +500,6 @@ in_pcbdisconnect(struct inpcb *inp)
}
inp->inp_fport = 0;
- in_pcbrehash(inp);
if (inp->inp_socket->so_state & SS_NOFDREF)
in_pcbdetach(inp);
}
Index: netinet/in_pcb.h
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.h,v
retrieving revision 1.79
diff -u -p -r1.79 in_pcb.h
--- netinet/in_pcb.h 31 May 2013 13:15:53 -0000 1.79
+++ netinet/in_pcb.h 6 Aug 2013 18:25:54 -0000
@@ -297,6 +297,6 @@ int in6_pcbnotify(struct inpcbtable *, s
u_int, const struct sockaddr_in6 *, u_int, int, void *,
void (*)(struct inpcb *, int));
int in6_selecthlim(struct inpcb *, struct ifnet *);
-int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct proc *);
+int in_pcbsetport(struct inpcb *, struct proc *);
#endif /* _KERNEL */
#endif /* _NETINET_IN_PCB_H_ */
Index: netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.13
diff -u -p -r1.13 ip_divert.c
--- netinet/ip_divert.c 8 Apr 2013 15:32:23 -0000 1.13
+++ netinet/ip_divert.c 6 Aug 2013 18:25:55 -0000
@@ -319,6 +319,8 @@ divert_usrreq(struct socket *so, int req
case PRU_BIND:
s = splsoftnet();
error = in_pcbbind(inp, addr, p);
+ if (error == 0)
+ in_pcbrehash(inp);
splx(s);
break;
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.265
diff -u -p -r1.265 tcp_input.c
--- netinet/tcp_input.c 1 Jul 2013 10:53:52 -0000 1.265
+++ netinet/tcp_input.c 6 Aug 2013 18:26:01 -0000
@@ -3820,6 +3820,7 @@ syn_cache_get(struct sockaddr *src, stru
break;
#endif
}
+ in_pcbrehash(inp);
(void) m_free(am);
tp = intotcpcb(inp);
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.112
diff -u -p -r1.112 tcp_usrreq.c
--- netinet/tcp_usrreq.c 17 May 2013 09:04:30 -0000 1.112
+++ netinet/tcp_usrreq.c 6 Aug 2013 18:26:02 -0000
@@ -228,6 +228,7 @@ tcp_usrreq(so, req, m, nam, control, p)
error = in_pcbbind(inp, nam, p);
if (error)
break;
+ in_pcbrehash(inp);
break;
/*
@@ -244,8 +245,10 @@ tcp_usrreq(so, req, m, nam, control, p)
}
/* If the in_pcbbind() above is called, the tp->pf
should still be whatever it was before. */
- if (error == 0)
+ if (error == 0) {
+ in_pcbrehash(inp);
tp->t_state = TCPS_LISTEN;
+ }
break;
/*
@@ -304,6 +307,8 @@ tcp_usrreq(so, req, m, nam, control, p)
error = ENOBUFS;
break;
}
+
+ in_pcbrehash(inp);
so->so_state |= SS_CONNECTOUT;
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.164
diff -u -p -r1.164 udp_usrreq.c
--- netinet/udp_usrreq.c 9 Jun 2013 22:03:06 -0000 1.164
+++ netinet/udp_usrreq.c 6 Aug 2013 18:26:04 -0000
@@ -1164,6 +1164,8 @@ udp_usrreq(struct socket *so, int req, s
else
#endif
error = in_pcbbind(inp, addr, p);
+ if (error == 0)
+ in_pcbrehash(inp);
splx(s);
break;
@@ -1180,7 +1182,6 @@ udp_usrreq(struct socket *so, int req, s
}
s = splsoftnet();
error = in6_pcbconnect(inp, addr);
- splx(s);
} else
#endif /* INET6 */
{
@@ -1190,8 +1191,9 @@ udp_usrreq(struct socket *so, int req, s
}
s = splsoftnet();
error = in_pcbconnect(inp, addr);
- splx(s);
}
+ in_pcbrehash(inp);
+ splx(s);
if (error == 0)
soisconnected(so);
@@ -1229,6 +1231,7 @@ udp_usrreq(struct socket *so, int req, s
#endif /* INET6 */
inp->inp_laddr.s_addr = INADDR_ANY;
in_pcbdisconnect(inp);
+ in_pcbrehash(inp);
splx(s);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_pcb.c,v
retrieving revision 1.56
diff -u -p -r1.56 in6_pcb.c
--- netinet6/in6_pcb.c 31 May 2013 15:04:24 -0000 1.56
+++ netinet6/in6_pcb.c 6 Aug 2013 18:26:05 -0000
@@ -267,8 +267,8 @@ in6_pcbbind(struct inpcb *inp, struct mb
return error;
t = in_pcblookup(head,
- (struct in_addr *)&zeroin6_addr, 0,
- (struct in_addr *)&sin6->sin6_addr, lport,
+ &zeroin6_addr, 0,
+ &sin6->sin6_addr, lport,
wild, /* XXX */ 0);
if (t && (reuseport & t->inp_socket->so_options) == 0)
@@ -278,101 +278,11 @@ in6_pcbbind(struct inpcb *inp, struct mb
}
if (lport == 0) {
- error = in6_pcbsetport(&inp->inp_laddr6, inp, p);
+ error = in_pcbsetport(inp, p);
if (error != 0)
return error;
- } else {
+ } else
inp->inp_lport = lport;
- in_pcbrehash(inp);
- }
-
- return 0;
-}
-
-int
-in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct proc *p)
-{
- struct socket *so = inp->inp_socket;
- struct inpcbtable *table = inp->inp_table;
- u_int16_t first, last;
- u_int16_t *lastport = &inp->inp_table->inpt_lastport;
- u_int16_t lport = 0;
- int count;
- int wild = INPLOOKUP_IPV6;
- int error;
-
- /* XXX we no longer support IPv4 mapped address, so no tweaks here */
-
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
- ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
- (so->so_options & SO_ACCEPTCONN) == 0))
- wild |= INPLOOKUP_WILDCARD;
-
- if (inp->inp_flags & INP_HIGHPORT) {
- first = ipport_hifirstauto; /* sysctl */
- last = ipport_hilastauto;
- } else if (inp->inp_flags & INP_LOWPORT) {
- if ((error = suser(p, 0)))
- return (EACCES);
- first = IPPORT_RESERVED-1; /* 1023 */
- last = 600; /* not IPPORT_RESERVED/2 */
- } else {
- first = ipport_firstauto; /* sysctl */
- last = ipport_lastauto;
- }
-
- /*
- * Simple check to ensure all ports are not used up causing
- * a deadlock here.
- *
- * We split the two cases (up and down) so that the direction
- * is not being tested on each round of the loop.
- */
-
- if (first > last) {
- /*
- * counting down
- */
- count = first - last;
- if (count)
- *lastport = first - arc4random_uniform(count);
-
- do {
- if (count-- < 0) /* completely used? */
- return (EADDRNOTAVAIL);
- --*lastport;
- if (*lastport > first || *lastport < last)
- *lastport = first;
- lport = htons(*lastport);
- } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
- in_pcblookup(table, &zeroin6_addr, 0,
- &inp->inp_laddr6, lport, wild, /* XXX */ 0));
- } else {
- /*
- * counting up
- */
- count = last - first;
- if (count)
- *lastport = first + arc4random_uniform(count);
-
- do {
- if (count-- < 0) /* completely used? */
- return (EADDRNOTAVAIL);
- ++*lastport;
- if (*lastport < first || *lastport > last)
- *lastport = first;
- lport = htons(*lastport);
- } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
- in_pcblookup(table, &zeroin6_addr, 0,
- &inp->inp_laddr6, lport, wild, /* XXX */ 0));
- }
-
- inp->inp_lport = lport;
- in_pcbrehash(inp);
-
-#if 0
- inp->inp_flowinfo = 0; /* XXX */
-#endif
return 0;
}
@@ -449,8 +359,9 @@ in6_pcbconnect(struct inpcb *inp, struct
return (EADDRINUSE);
}
if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
- if (inp->inp_lport == 0)
- (void)in6_pcbbind(inp, NULL, curproc);
+ if (inp->inp_lport == 0 &&
+ in_pcbsetport(inp, curproc) == EADDRNOTAVAIL)
+ return EADDRNOTAVAIL;
inp->inp_laddr6 = *in6a;
}
inp->inp_faddr6 = sin6->sin6_addr;
@@ -459,7 +370,6 @@ in6_pcbconnect(struct inpcb *inp, struct
if (ip6_auto_flowlabel)
inp->inp_flowinfo |=
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
- in_pcbrehash(inp);
return (0);
}
Index: netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.13
diff -u -p -r1.13 ip6_divert.c
--- netinet6/ip6_divert.c 26 Jun 2013 09:12:40 -0000 1.13
+++ netinet6/ip6_divert.c 6 Aug 2013 18:26:05 -0000
@@ -314,6 +314,8 @@ divert6_usrreq(struct socket *so, int re
case PRU_BIND:
s = splsoftnet();
error = in6_pcbbind(inp, addr, p);
+ if (error == 0)
+ in_pcbrehash(inp);
splx(s);
break;
Index: netinet6/udp6_output.c
===================================================================
RCS file: /cvs/src/sys/netinet6/udp6_output.c,v
retrieving revision 1.19
diff -u -p -r1.19 udp6_output.c
--- netinet6/udp6_output.c 28 Mar 2013 16:45:16 -0000 1.19
+++ netinet6/udp6_output.c 6 Aug 2013 18:26:05 -0000
@@ -194,9 +194,12 @@ udp6_output(struct in6pcb *in6p, struct
error = EADDRNOTAVAIL;
goto release;
}
- if (in6p->in6p_lport == 0 &&
- (error = in6_pcbsetport(laddr, in6p, p)) != 0)
- goto release;
+ if (in6p->in6p_lport == 0) {
+ error = in_pcbsetport(in6p, p);
+ if (error)
+ goto release;
+ in_pcbrehash(in6p);
+ }
} else {
if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
error = ENOTCONN;
--
http://gmerlin.de
OpenPGP: http://gmerlin.de/christopher.pub
1917 680A 723C BF3D 2CA3 0E44 7E24 D19F 34B8 2A2A
pgplabvOFHMUC.pgp
Description: PGP signature
