Hi,
The LISP tunneling support as of now is not yet ready for upstreaming,
for reasons outlined in this message:
http://openvswitch.org/pipermail/dev/2013-February/025459.html
One solution to the above issues is to make OVS less Ethernet specific,
meaning that it should accept and work with packets/flows without an
Ethernet header.
At a high level, we would introduce layer 3 (tunnel) vports, and LISP
would be such a vport. Whenever a packet that ingressed on a L2 vport
needs to egress on a L3 vport, we apply the internal pop_eth action
automatically. For packets going from L3 vports to L2 vports, a
push_eth action would add a MAC header, with addresses determined by ARP
resolution in user space.
I attached a patch to this email with proposed changes to the datapath
to make this happen. I didn't use git-send-email since it is still
early work, and I don't expect anyone to apply it, just wanted to get
some early feedback on some of the design decisions.
One such decision is how to handle the flow key. I set all fields in
key->eth to 0, except the type, because we still need to know what kind
of L3 packet do we have. Since a lot of code is accessing
key->eth.type, this is easier than having this information in a
different place, although it would be more elegant to set this field to
0 as well. Now, in order to differentiate flows with mac addresses set
to 0 and flows without an Ethernet header, I added a boolean field to
tun_key, to mark L3 flows. However, if we expect to have non-tunneled
L3 ports (I couldn't find a good reason for this) then we should move it
out into the main flow key structure.
Let me know what you think.
Thanks,
-Lori
diff --git a/datapath/actions.c b/datapath/actions.c
index 0dac658..fcb80a5 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -138,6 +138,26 @@ static int set_eth_addr(struct sk_buff *skb,
return 0;
}
+static int pop_eth(struct sk_buff *skb)
+{
+ skb_pull(skb, skb_network_offset(skb));
+ return 0;
+}
+
+static int push_eth(struct sk_buff *skb, const struct ovs_action_push_eth *ethh)
+{
+ int err;
+
+ skb_push(skb, ETH_HLEN);
+
+ err = set_eth_addr(skb, ðh->addresses);
+ if (unlikely(err))
+ return err;
+
+ eth_hdr(skb)->h_proto = ethh->eth_type;
+ return 0;
+}
+
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
__be32 *addr, __be32 new_addr)
{
@@ -504,6 +524,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_vlan(skb);
break;
+ case OVS_ACTION_ATTR_PUSH_ETH:
+ err = push_eth(skb, nla_data(a));
+ break;
+
+ case OVS_ACTION_ATTR_POP_ETH:
+ err = pop_eth(skb);
+ break;
+
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, nla_data(a));
break;
diff --git a/datapath/flow.c b/datapath/flow.c
index 7f897bd..98346d5 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -628,24 +628,27 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
skb_reset_mac_header(skb);
- /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
- * header in the linear data area.
- */
- eth = eth_hdr(skb);
- memcpy(key->eth.src, eth->h_source, ETH_ALEN);
- memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+ /* Link layer. */
+ if (key->tun_key.is_layer3) {
+ /* The L3 tunnel should set the inner packet protocol on the skb */
+ key->eth.type = skb->protocol;
+ } else {
+ eth = eth_hdr(skb);
+ memcpy(key->eth.src, eth->h_source, ETH_ALEN);
+ memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
- __skb_pull(skb, 2 * ETH_ALEN);
+ __skb_pull(skb, 2 * ETH_ALEN);
- if (vlan_tx_tag_present(skb))
- key->eth.tci = htons(vlan_get_tci(skb));
- else if (eth->h_proto == htons(ETH_P_8021Q))
- if (unlikely(parse_vlan(skb, key)))
- return -ENOMEM;
+ if (vlan_tx_tag_present(skb))
+ key->eth.tci = htons(vlan_get_tci(skb));
+ else if (eth->h_proto == htons(ETH_P_8021Q))
+ if (unlikely(parse_vlan(skb, key)))
+ return -ENOMEM;
- key->eth.type = parse_ethertype(skb);
- if (unlikely(key->eth.type == htons(0)))
- return -ENOMEM;
+ key->eth.type = parse_ethertype(skb);
+ if (unlikely(key->eth.type == htons(0)))
+ return -ENOMEM;
+ }
skb_reset_network_header(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
@@ -1034,6 +1037,9 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_CSUM:
tun_key->tun_flags |= OVS_TNL_F_CSUM;
break;
+ case OVS_TUNNEL_KEY_ATTR_LAYER3:
+ tun_key->is_layer3 = true;
+ break;
default:
return -EINVAL;
@@ -1079,6 +1085,9 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((tun_key->tun_flags & OVS_TNL_F_CSUM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
+ if ((tun_key->is_layer3) &&
+ nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_LAYER3))
+ return -EMSGSIZE;
nla_nest_end(skb, nla);
return 0;
@@ -1140,13 +1149,13 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
}
/* Data attributes. */
- if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+ if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
+ eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+ memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
+ memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
- eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
- memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
- memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
+ attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+ }
if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
@@ -1335,7 +1344,7 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const stru
int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
- struct nlattr *nla, *encap;
+ struct nlattr *nla, *encap = NULL;
if (swkey->phy.priority &&
nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
@@ -1353,6 +1362,9 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
goto nla_put_failure;
+ if (swkey->tun_key.is_layer3)
+ goto noethernet;
+
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
@@ -1374,6 +1386,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
if (swkey->eth.type == htons(ETH_P_802_2))
goto unencap;
+noethernet:
if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type))
goto nla_put_failure;
diff --git a/datapath/flow.h b/datapath/flow.h
index dba66cf..0033fcd 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -57,6 +57,7 @@ struct ovs_key_ipv4_tunnel {
u16 tun_flags;
u8 ipv4_tos;
u8 ipv4_ttl;
+ bool is_layer3;
};
struct sw_flow_key {
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index 3d0a315..a1a5d22 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -197,8 +197,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
struct iphdr *iph, *inner_iph;
struct ovs_key_ipv4_tunnel tun_key;
__be64 key;
- struct ethhdr *ethh;
- __be16 protocol;
lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest);
if (unlikely(!lisp_port))
@@ -224,22 +222,15 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
inner_iph = (struct iphdr *)(lisph + 1);
switch (inner_iph->version) {
case 4:
- protocol = htons(ETH_P_IP);
+ skb->protocol = htons(ETH_P_IP);
break;
case 6:
- protocol = htons(ETH_P_IPV6);
+ skb->protocol = htons(ETH_P_IPV6);
break;
default:
goto error;
}
- /* Add Ethernet header */
- ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
- memset(ethh, 0, ETH_HLEN);
- ethh->h_dest[0] = 0x02;
- ethh->h_source[0] = 0x02;
- ethh->h_proto = protocol;
-
ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key);
goto out;
@@ -361,9 +352,6 @@ error:
static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb)
{
- int tnl_len;
- int network_offset = skb_network_offset(skb);
-
if (unlikely(!OVS_CB(skb)->tun_key))
return -EINVAL;
@@ -371,11 +359,8 @@ static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb)
switch (skb->protocol) {
case htons(ETH_P_IP):
case htons(ETH_P_IPV6):
- /* Pop off "inner" Ethernet header */
- skb_pull(skb, network_offset);
- tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP,
+ return ovs_tnl_send(vport, skb, IPPROTO_UDP,
LISP_HLEN, lisp_build_header);
- return tnl_len > 0 ? tnl_len + network_offset : tnl_len;
default:
kfree_skb(skb);
return 0;
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index e890fd8..dbff58f 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -303,6 +303,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
+ OVS_TUNNEL_KEY_ATTR_LAYER3, /* No argument. Layer 3 tunnel. */
__OVS_TUNNEL_KEY_ATTR_MAX
};
@@ -494,6 +495,16 @@ struct ovs_action_push_vlan {
};
/**
+ * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument.
+ * @addresses: Source and destination MAC addresses.
+ * @eth_type: Ethernet type
+ */
+struct ovs_action_push_eth {
+ struct ovs_key_ethernet addresses;
+ __be16 eth_type;
+};
+
+/**
* enum ovs_action_attr - Action types.
*
* @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
@@ -532,6 +543,8 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
+ OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */
+ OVS_ACTION_ATTR_POP_ETH, /* No argument. */
__OVS_ACTION_ATTR_MAX
};
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev