Hi,
Check the attached patches. The first adds option to sockets to use
nonlocal bind (IP_FREEBIND in Linux) and the second adds bgp option to
use such sockets ("nonlocal bind yes|no"). Some additional thoughts:
- probably the option could be implemented for any protocol, not only for bgp
- I think it is better to check if this option is changed during
reconfiguration, so to reload protocol in a hard way
- did not check how it works with bfd enabled, maybe it will need to
inherit this socket option from bgp somehow
- it can be also considered to enable nonlocal bind for all bgp
unconditionally, at least I see no obvious problems yet
What do you think?
On Sat, Jan 11, 2020 at 6:14 PM Ondrej Zajicek <[email protected]> wrote:
>
> On Thu, Jan 09, 2020 at 08:15:25PM +0100, Alexander Zubkov wrote:
> > Hi,
> >
> > Ondrej, what do you think of adding some option to bind socket in
> > Linux with IP_FREEBIND or IP_TRANSPARENT setsockopt?
>
> Hi
>
> Using IP_FREEBIND looks like an interesting way to fix this.
>
> --
> Elen sila lumenn' omentielvo
>
> Ondrej 'Santiago' Zajicek (email: [email protected])
> OpenPGP encrypted e-mails preferred (KeyID 0x11DEADC3, wwwkeys.pgp.net)
> "To err is human -- to blame it on a computer is even more so."
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -179,7 +179,7 @@ bgp_open(struct bgp_proto *p)
sk->sport = port;
sk->iface = ifa;
sk->vrf = p->p.vrf;
- sk->flags = 0;
+ sk->flags = p->cf->nonlocal_bind ? SKF_BIND_NLOC : 0;
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->rbsize = BGP_RX_BUFFER_SIZE;
sk->tbsize = BGP_TX_BUFFER_SIZE;
@@ -1103,6 +1103,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
s->dport = p->cf->remote_port;
s->iface = p->neigh ? p->neigh->iface : NULL;
s->vrf = p->p.vrf;
+ s->flags = p->cf->nonlocal_bind ? SKF_BIND_NLOC : 0;
s->ttl = p->cf->ttl_security ? 255 : hops;
s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -86,6 +86,7 @@ struct bgp_config {
int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */
int multihop; /* Number of hops if multihop */
int strict_bind; /* Bind listening socket to local address */
+ int nonlocal_bind; /* Bind listening socket even to nonexistent address */
int ttl_security; /* Enable TTL security [RFC 5082] */
int compare_path_lengths; /* Use path lengths when selecting best route */
int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -31,7 +31,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS,
DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE,
- FIRST)
+ FIRST, NONLOCAL)
%type <i> bgp_nh
%type <i32> bgp_afi
@@ -154,6 +154,7 @@ bgp_proto:
}
| bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); }
| bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; }
+ | bgp_proto NONLOCAL BIND bool ';' { BGP_CFG->nonlocal_bind = $4; }
| bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; }
| bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; }
| bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; }
diff --git a/lib/socket.h b/lib/socket.h
--- a/lib/socket.h
+++ b/lib/socket.h
@@ -123,6 +123,7 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou
#define SKF_TTL_RX 0x08 /* Report TTL / Hop Limit for RX packets */
#define SKF_BIND 0x10 /* Bind datagram socket to given source address */
#define SKF_HIGH_PORT 0x20 /* Choose port from high range if possible */
+#define SKF_BIND_NLOC 0x40 /* Use nonlocal bind option for the socket */
#define SKF_THREAD 0x100 /* Socked used in thread, Do not add to main loop */
#define SKF_TRUNCATED 0x200 /* Received packet was truncated, set by IO layer */
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -1328,6 +1328,7 @@ sk_open(sock *s)
int bind_port = 0;
ip_addr bind_addr = IPA_NONE;
sockaddr sa;
+ int y = 1;
if (s->type <= SK_IP)
{
@@ -1436,6 +1437,16 @@ sk_open(sock *s)
if (sk_set_high_port(s) < 0)
log(L_WARN "Socket error: %s%#m", s->err);
+ if (s->flags & SKF_BIND_NLOC)
+ {
+#ifdef IP_FREEBIND
+ if (setsockopt(s->fd, SOL_IP, IP_FREEBIND, &y, sizeof(y)) < 0)
+ ERR("IP_FREEBIND");
+#else
+ ERR("nonlocal bind is not supported");
+#endif
+ }
+
sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
ERR2("bind");