cisco have a tweak to gre that splits the key field into a 24bit
key and 8 bit flow id. the key still identifies the network, but
the flow id represents the traffic inside the tunnel.
i figured since gre has it's own special ioctls for configuring the
keepalives, it could grow another small one to enable key entropy.
their use is implemented in ifconfig as vnentropy and -vnetentropy
options. if a vnetid has entropy enabled, it's suffixed with + in
the encap output.
it is interesting to note that this arrangement of the gre key field
is used in nvgre. in fact, egre with vnentropy enabled and nvgre
are indistinguishable on the wire. adding key entropy to egre with
this diff is a step toward support for nvgre.
i tested interoperability with a cisco csr1000v using the following
config:
interface Tunnel1
ip address 100.64.0.1 255.255.255.255
tunnel source GigabitEthernet1
tunnel destination 192.168.0.40
tunnel entropy
tunnel key 1234
!
interface GigabitEthernet1
ip address 192.168.0.59 255.255.255.0
negotiation auto
no mop enabled
no mop sysid
!
ip route 100.64.0.2 255.255.255.255 Tunnel1
the state is:
Router#sh int tu1
Tunnel1 is up, line protocol is up
Hardware is Tunnel
Internet address is 100.64.0.1/32
MTU 9972 bytes, BW 100 Kbit/sec, DLY 50000 usec,
reliability 255/255, txload 1/255, rxload 1/255
Encapsulation TUNNEL, loopback not set
Keepalive not set
Tunnel linestate evaluation up
Tunnel source 192.168.0.59 (GigabitEthernet1), destination 192.168.0.40
Tunnel Subblocks:
src-track:
Tunnel1 source tracking subblock associated with GigabitEthernet1
Set of tunnels with source GigabitEthernet1, 1 member (includes
iterators), on interface <OK>
Tunnel protocol/transport GRE/IP
Key 0x4D2, sequencing disabled
Checksumming of packets disabled
Tunnel Entropy Calculation Enabled (24-bit Key)
Tunnel TTL 255, Fast tunneling enabled
Tunnel transport MTU 1472 bytes
Tunnel transmit bandwidth 8000 (kbps)
Tunnel receive bandwidth 8000 (kbps)
Last input never, output never, output hang never
Last clearing of "show interface" counters 00:01:07
Input queue: 0/375/0/0 (size/max/drops/flushes); Total output drops: 0
Queueing strategy: fifo
Output queue: 0/0 (size/max)
5 minute input rate 0 bits/sec, 0 packets/sec
5 minute output rate 0 bits/sec, 0 packets/sec
0 packets input, 0 bytes, 0 no buffer
Received 0 broadcasts (0 IP multicasts)
0 runts, 0 giants, 0 throttles
0 input errors, 0 CRC, 0 frame, 0 overrun, 0 ignored, 0 abort
0 packets output, 0 bytes, 0 underruns
0 output errors, 0 collisions, 0 interface resets
0 unknown protocol drops
0 output buffer failures, 0 output buffers swapped out
on the openbsd side:
# cat gre
ifconfig gre0 tunnel 192.168.0.40 192.168.0.59
ifconfig gre0 vnetid 1234
ifconfig gre0 vnentropy
ifconfig gre0 inet 100.64.0.2 100.64.0.1 netmask 255.255.255.255
# ifconfig trunk0
gre0: flags=8051<UP,POINTOPOINT,RUNNING,MULTICAST> mtu 1476
index 8 priority 0 llprio 3
encap: vnetid 1234+
groups: gre
tunnel: inet 192.168.0.40 -> 192.168.0.59 ttl 64 nodf
inet 100.64.0.2 --> 100.64.0.1 netmask 0xffffffff
i can ping from cisco to openbsd, and visa versa:
Router#sh ip route 100.64.0.2
Routing entry for 100.64.0.2/32
Known via "static", distance 1, metric 0 (connected)
Routing Descriptor Blocks:
* directly connected, via Tunnel1
Route metric is 0, traffic share count is 1
Router#ping 100.64.0.2
Type escape sequence to abort.
Sending 5, 100-byte ICMP Echos to 100.64.0.2, timeout is 2 seconds:
!!!!!
Success rate is 100 percent (5/5), round-trip min/avg/max = 1/1/1 ms
$ ping -c 2 100.64.0.1
PING 100.64.0.1 (100.64.0.1): 56 data bytes
64 bytes from 100.64.0.1: icmp_seq=0 ttl=255 time=2.686 ms
64 bytes from 100.64.0.1: icmp_seq=1 ttl=255 time=0.470 ms
--- 100.64.0.1 ping statistics ---
2 packets transmitted, 2 packets received, 0.0% packet loss
round-trip min/avg/max/std-dev = 0.470/1.578/2.686/1.108 ms
and it looks like this in tcpdump:
15:00:55.084359 192.168.0.59 > 192.168.0.40: gre key=315979|1234+4b 100.64.0.1
> 100.64.0.2: icmp: echo request
15:00:55.084391 192.168.0.40 > 192.168.0.59: gre key=316142|1234+ee 100.64.0.2
> 100.64.0.1: icmp: echo reply
15:00:55.085212 192.168.0.59 > 192.168.0.40: gre key=315979|1234+4b 100.64.0.1
> 100.64.0.2: icmp: echo request
15:00:55.085234 192.168.0.40 > 192.168.0.59: gre key=316142|1234+ee 100.64.0.2
> 100.64.0.1: icmp: echo reply
15:00:55.085955 192.168.0.59 > 192.168.0.40: gre key=315979|1234+4b 100.64.0.1
> 100.64.0.2: icmp: echo request
15:00:55.085977 192.168.0.40 > 192.168.0.59: gre key=316142|1234+ee 100.64.0.2
> 100.64.0.1: icmp: echo reply
15:00:55.086745 192.168.0.59 > 192.168.0.40: gre key=315979|1234+4b 100.64.0.1
> 100.64.0.2: icmp: echo request
15:00:55.086766 192.168.0.40 > 192.168.0.59: gre key=316142|1234+ee 100.64.0.2
> 100.64.0.1: icmp: echo reply
15:00:55.087345 192.168.0.59 > 192.168.0.40: gre key=315979|1234+4b 100.64.0.1
> 100.64.0.2: icmp: echo request
15:00:55.087367 192.168.0.40 > 192.168.0.59: gre key=316142|1234+ee 100.64.0.2
> 100.64.0.1: icmp: echo reply
15:01:33.542708 192.168.0.40 > 192.168.0.59: gre key=316143|1234+ef 100.64.0.2
> 100.64.0.1: icmp: echo request
15:01:33.545155 192.168.0.59 > 192.168.0.40: gre key=315979|1234+4b 100.64.0.1
> 100.64.0.2: icmp: echo reply
15:01:34.551662 192.168.0.40 > 192.168.0.59: gre key=316143|1234+ef 100.64.0.2
> 100.64.0.1: icmp: echo request
15:01:34.552088 192.168.0.59 > 192.168.0.40: gre key=315979|1234+4b 100.64.0.1
> 100.64.0.2: icmp: echo reply
ok?
Index: sys/sys/sockio.h
===================================================================
RCS file: /cvs/src/sys/sys/sockio.h,v
retrieving revision 1.74
diff -u -p -r1.74 sockio.h
--- sys/sys/sockio.h 19 Feb 2018 00:21:31 -0000 1.74
+++ sys/sys/sockio.h 19 Feb 2018 04:37:57 -0000
@@ -194,6 +194,9 @@
#define SIOCSLIFPHYDF _IOW('i', 193, struct ifreq) /* set tunnel
df/nodf */
#define SIOCGLIFPHYDF _IOWR('i', 194, struct ifreq) /* set tunnel
df/nodf */
+#define SIOCSVNENTROPY _IOW('i', 195, struct ifreq) /* set vnetid
entropy */
+#define SIOCGVNENTROPY _IOWR('i', 196, struct ifreq) /* get vnetid
entropy */
+
#define SIOCSVH _IOWR('i', 245, struct ifreq) /* set carp
param */
#define SIOCGVH _IOWR('i', 246, struct ifreq) /* get carp
param */
Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.545
diff -u -p -r1.545 if.c
--- sys/net/if.c 19 Feb 2018 00:24:48 -0000 1.545
+++ sys/net/if.c 19 Feb 2018 04:37:57 -0000
@@ -2119,6 +2119,7 @@ ifioctl(struct socket *so, u_long cmd, c
case SIOCDELMULTI:
case SIOCSIFMEDIA:
case SIOCSVNETID:
+ case SIOCSVNENTROPY:
case SIOCSIFPAIR:
case SIOCSIFPARENT:
case SIOCDIFPARENT:
Index: sys/net/if_gre.c
===================================================================
RCS file: /cvs/src/sys/net/if_gre.c,v
retrieving revision 1.105
diff -u -p -r1.105 if_gre.c
--- sys/net/if_gre.c 19 Feb 2018 00:46:27 -0000 1.105
+++ sys/net/if_gre.c 19 Feb 2018 04:37:57 -0000
@@ -142,6 +142,14 @@ union gre_addr {
struct in6_addr in6;
};
+#define GRE_KEY_MIN 0x00000000U
+#define GRE_KEY_MAX 0xffffffffU
+#define GRE_KEY_SHIFT 0
+
+#define GRE_KEY_ENTROPY_MIN 0x00000000U
+#define GRE_KEY_ENTROPY_MAX 0x00ffffffU
+#define GRE_KEY_ENTROPY_SHIFT 8
+
struct gre_tunnel {
uint32_t t_key_mask;
#define GRE_KEY_NONE htonl(0x00000000U)
@@ -172,6 +180,9 @@ static int gre_set_vnetid(struct gre_tun
static int gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
static int gre_del_vnetid(struct gre_tunnel *);
+static int gre_set_vnentropy(struct gre_tunnel *, struct ifreq *);
+static int gre_get_vnentropy(struct gre_tunnel *, struct ifreq *);
+
static struct mbuf *
gre_encap(const struct gre_tunnel *, struct mbuf *, uint16_t,
uint8_t, uint8_t);
@@ -649,6 +660,11 @@ gre_input_key(struct mbuf **mp, int *off
pf_pkt_addr_changed(m);
#endif
+ if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
+ m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
+ (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY);
+ }
+
ifp->if_ipackets++;
ifp->if_ibytes += m->m_pkthdr.len;
@@ -709,6 +725,11 @@ egre_input(const struct gre_tunnel *key,
pf_pkt_addr_changed(m);
#endif
+ if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
+ m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
+ (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY);
+ }
+
ml_enqueue(&ml, m);
if_input(&sc->sc_ac.ac_if, &ml);
@@ -991,6 +1012,12 @@ gre_encap(const struct gre_tunnel *tunne
gkh = (struct gre_h_key *)(gh + 1);
gkh->gre_key = tunnel->t_key;
+
+ if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
+ ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) {
+ gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
+ (m->m_pkthdr.ph_flowid & M_FLOWID_MASK));
+ }
}
switch (tunnel->t_af) {
@@ -1101,6 +1128,14 @@ gre_tunnel_ioctl(struct ifnet *ifp, stru
error = gre_del_vnetid(tunnel);
break;
+ case SIOCSVNENTROPY:
+ error = gre_set_vnentropy(tunnel, ifr);
+ break;
+
+ case SIOCGVNENTROPY:
+ error = gre_get_vnentropy(tunnel, ifr);
+ break;
+
case SIOCSLIFPHYADDR:
error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data);
break;
@@ -1248,6 +1283,7 @@ egre_ioctl(struct ifnet *ifp, u_long cmd
case SIOCSVNETID:
case SIOCDVNETID:
+ case SIOCSVNENTROPY:
case SIOCSLIFPHYADDR:
case SIOCDIFPHYADDR:
case SIOCSLIFPHYRTABLE:
@@ -1576,17 +1612,25 @@ static int
gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
{
uint32_t key;
+ uint32_t min = GRE_KEY_MIN;
+ uint32_t max = GRE_KEY_MAX;
+ unsigned int shift = GRE_KEY_SHIFT;
+ uint32_t mask = GRE_KEY_MASK;
+
+ if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
+ min = GRE_KEY_ENTROPY_MIN;
+ max = GRE_KEY_ENTROPY_MAX;
+ shift = GRE_KEY_ENTROPY_SHIFT;
+ mask = GRE_KEY_ENTROPY;
+ }
- if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffffffff)
- return EINVAL;
-
- key = htonl(ifr->ifr_vnetid);
+ if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
+ return (EINVAL);
- if (tunnel->t_key_mask == GRE_KEY_MASK && tunnel->t_key == key)
- return (0);
+ key = htonl(ifr->ifr_vnetid << shift);
/* commit */
- tunnel->t_key_mask = GRE_KEY_MASK;
+ tunnel->t_key_mask = mask;
tunnel->t_key = key;
return (0);
@@ -1595,10 +1639,20 @@ gre_set_vnetid(struct gre_tunnel *tunnel
static int
gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
{
- if (tunnel->t_key_mask == GRE_KEY_NONE)
+ int shift;
+
+ switch (tunnel->t_key_mask) {
+ case GRE_KEY_NONE:
return (EADDRNOTAVAIL);
+ case GRE_KEY_ENTROPY:
+ shift = GRE_KEY_ENTROPY_SHIFT;
+ break;
+ case GRE_KEY_MASK:
+ shift = GRE_KEY_SHIFT;
+ break;
+ }
- ifr->ifr_vnetid = (int64_t)ntohl(tunnel->t_key);
+ ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
return (0);
}
@@ -1607,6 +1661,47 @@ static int
gre_del_vnetid(struct gre_tunnel *tunnel)
{
tunnel->t_key_mask = GRE_KEY_NONE;
+
+ return (0);
+}
+
+static int
+gre_set_vnentropy(struct gre_tunnel *tunnel, struct ifreq *ifr)
+{
+ uint32_t mask, key;
+
+ if (tunnel->t_key_mask == GRE_KEY_NONE)
+ return (EADDRNOTAVAIL);
+
+ mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
+ if (tunnel->t_key_mask == mask) {
+ /* nop */
+ return (0);
+ }
+
+ key = ntohl(tunnel->t_key);
+ if (mask == GRE_KEY_ENTROPY) {
+ if (key > GRE_KEY_ENTROPY_MAX)
+ return (ERANGE);
+
+ key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
+ } else
+ key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
+
+ /* commit */
+ tunnel->t_key_mask = mask;
+ tunnel->t_key = key;
+
+ return (0);
+}
+
+static int
+gre_get_vnentropy(struct gre_tunnel *tunnel, struct ifreq *ifr)
+{
+ if (tunnel->t_key_mask == GRE_KEY_NONE)
+ return (EADDRNOTAVAIL);
+
+ ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
return (0);
}
Index: sbin/ifconfig/ifconfig.8
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.300
diff -u -p -r1.300 ifconfig.8
--- sbin/ifconfig/ifconfig.8 19 Feb 2018 00:23:57 -0000 1.300
+++ sbin/ifconfig/ifconfig.8 19 Feb 2018 04:37:57 -0000
@@ -1606,6 +1606,7 @@ for a complete list of the available pro
.Op Oo Fl Oc Ns Cm tunneldf
.Op Cm tunnelttl Ar ttl
.Op Oo Fl Oc Ns Cm vnetid Ar network-id
+.Op Oo Fl Oc Ns Cm vnentropy
.Ek
.nr nS 0
.Pp
@@ -1681,6 +1682,14 @@ to accept packets with arbitrary network
multipoint-to-multipoint modes).
.It Cm -vnetid
Clear the virtual network identifier.
+.It Cm vnentropy
+Enable splitting of the virtual network identifier space into a
+network identifier and a flow identifier.
+This may allow load balancing of the encapsulated traffic over
+multiple links.
+.It Cm -vnentropy
+Enable splitting of the virtual network identifier space into a
+network identifier and a flow identifier.
.El
.Sh UMB
.nr nS 1
Index: sbin/ifconfig/ifconfig.c
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.358
diff -u -p -r1.358 ifconfig.c
--- sbin/ifconfig/ifconfig.c 19 Feb 2018 00:23:57 -0000 1.358
+++ sbin/ifconfig/ifconfig.c 19 Feb 2018 04:37:57 -0000
@@ -121,6 +121,23 @@
"\5VLAN_MTU\6VLAN_HWTAGGING\10CSUM_TCPv6" \
"\11CSUM_UDPv6\20WOL"
+struct ifencap {
+ unsigned int ife_flags;
+#define IFE_VNETID_MASK 0xf
+#define IFE_VNETID_NOPE 0x0
+#define IFE_VNETID_NONE 0x1
+#define IFE_VNETID_ANY 0x2
+#define IFE_VNETID_SET 0x3
+ int64_t ife_vnetid;
+#define IFE_VNETID_ENTROPY 0x10
+
+#define IFE_PARENT_MASK 0xf00
+#define IFE_PARENT_NOPE 0x000
+#define IFE_PARENT_NONE 0x100
+#define IFE_PARENT_SET 0x200
+ char ife_parent[IFNAMSIZ];
+};
+
struct ifreq ifr, ridreq;
struct in_aliasreq in_addreq;
struct in6_ifreq ifr6;
@@ -252,6 +269,9 @@ void setpfsync_syncpeer(const char *, in
void unsetpfsync_syncpeer(const char *, int);
void setpfsync_defer(const char *, int);
void pfsync_status(void);
+void setvnentropy(const char *, int);
+void delvnentropy(const char *, int);
+void getvnentropy(struct ifencap *);
void settunneldf(const char *, int);
void settunnelnodf(const char *, int);
void setpppoe_dev(const char *,int);
@@ -438,6 +458,8 @@ const struct cmd {
{ "tunnelttl", NEXTARG, 0, settunnelttl },
{ "tunneldf", 0, 0, settunneldf },
{ "-tunneldf", 0, 0, settunnelnodf },
+ { "vnentropy", 0, 0, setvnentropy },
+ { "-vnentropy", 0, 0, delvnentropy },
{ "pppoedev", NEXTARG, 0, setpppoe_dev },
{ "pppoesvc", NEXTARG, 0, setpppoe_svc },
{ "-pppoesvc", 1, 0, setpppoe_svc },
@@ -3312,6 +3334,40 @@ settunnelnodf(const char *ignored, int a
}
void
+setvnentropy(const char *ignored, int alsoignored)
+{
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+
+ ifr.ifr_vnetid = 1;
+ if (ioctl(s, SIOCSVNENTROPY, &ifr) < 0)
+ warn("SIOCSVNENTROPY");
+}
+
+/* ARGSUSED */
+void
+delvnentropy(const char *ignored, int alsoignored)
+{
+ ifr.ifr_vnetid = 0;
+ if (ioctl(s, SIOCSVNENTROPY, &ifr) < 0)
+ warn("SIOCSVNENTROPY");
+}
+
+void
+getvnentropy(struct ifencap *ife)
+{
+ if (strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)) >=
+ sizeof(ifr.ifr_name))
+ errx(1, "vnentropy: name is too long");
+
+ if (ioctl(s, SIOCGVNENTROPY, &ifr) == -1)
+ return;
+
+ if (ifr.ifr_vnetid)
+ ife->ife_flags |= IFE_VNETID_ENTROPY;
+}
+
+
+void
mpe_status(void)
{
struct shim_hdr shim;
@@ -3497,22 +3553,6 @@ setmpwcontrolword(const char *value, int
}
#endif /* SMALL */
-struct ifencap {
- unsigned int ife_flags;
-#define IFE_VNETID_MASK 0xf
-#define IFE_VNETID_NOPE 0x0
-#define IFE_VNETID_NONE 0x1
-#define IFE_VNETID_ANY 0x2
-#define IFE_VNETID_SET 0x3
- int64_t ife_vnetid;
-
-#define IFE_PARENT_MASK 0xf0
-#define IFE_PARENT_NOPE 0x00
-#define IFE_PARENT_NONE 0x10
-#define IFE_PARENT_SET 0x20
- char ife_parent[IFNAMSIZ];
-};
-
void
setvnetid(const char *id, int param)
{
@@ -3564,6 +3604,12 @@ getvnetid(struct ifencap *ife)
ife->ife_flags |= IFE_VNETID_SET;
ife->ife_vnetid = ifr.ifr_vnetid;
+
+ if (ioctl(s, SIOCGVNENTROPY, &ifr) == -1)
+ return;
+
+ if (ifr.ifr_vnetid)
+ ife->ife_flags |= IFE_VNETID_ENTROPY;
}
void
@@ -3619,6 +3665,7 @@ getencap(void)
struct ifencap ife = { .ife_flags = 0 };
getvnetid(&ife);
+ getvnentropy(&ife);
getifparent(&ife);
if (ife.ife_flags == 0)
@@ -3635,6 +3682,8 @@ getencap(void)
break;
case IFE_VNETID_SET:
printf(" vnetid %lld", ife.ife_vnetid);
+ if (ife.ife_flags & IFE_VNETID_ENTROPY)
+ printf("+");
break;
}