Add support for MPLS over GRE and MPLS over UDP tunnel types as
described in the next RFCs:
1. https://tools.ietf.org/html/rfc4023
2. https://tools.ietf.org/html/rfc7510
3. https://tools.ietf.org/html/rfc4385

Signed-off-by: Matan Azrad <ma...@mellanox.com>
---
 doc/guides/nics/mlx5.rst     |   4 +-
 drivers/net/mlx5/Makefile    |   5 ++
 drivers/net/mlx5/mlx5.c      |  13 ++++
 drivers/net/mlx5/mlx5.h      |   1 +
 drivers/net/mlx5/mlx5_flow.c | 161 +++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 176 insertions(+), 8 deletions(-)


V2:
Ignore void items between GRE and MPLS tunnels (Nelio suggestion).


diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a7d5c90..2b110f4 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -74,7 +74,7 @@ Features
 - RX interrupts.
 - Statistics query including Basic, Extended and per queue.
 - Rx HW timestamp.
-- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE.
+- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE, MPLSoGRE, MPLSoUDP.
 - Tunnel HW offloads: packet type, inner/outer RSS, IP and UDP checksum 
verification.
 
 Limitations
@@ -113,6 +113,8 @@ Limitations
 
 - VXLAN TSO and checksum offloads are not supported on VM.
 
+- L3 VXLAN and VXLAN-GPE tunnels cannot be supported together with MPLSoGRE 
and MPLSoUDP.
+
 - VF: flow rules created on VF devices can only match traffic targeted at the
   configured MAC addresses (see ``rte_eth_dev_mac_addr_add()``).
 
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 8d64d4c..293144e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -108,6 +108,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
                enum MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS \
                $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
+               HAVE_IBV_DEVICE_MPLS_SUPPORT \
+               infiniband/verbs.h \
+               enum IBV_FLOW_SPEC_MPLS \
+               $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
                HAVE_IBV_WQ_FLAG_RX_END_PADDING \
                infiniband/verbs.h \
                enum IBV_WQ_FLAG_RX_END_PADDING \
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8aa91cc..225ebd4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -690,6 +690,7 @@
        unsigned int mps;
        unsigned int cqe_comp;
        unsigned int tunnel_en = 0;
+       unsigned int mpls_en = 0;
        unsigned int swp = 0;
        unsigned int verb_priorities = 0;
        unsigned int mprq = 0;
@@ -850,6 +851,17 @@
        DRV_LOG(WARNING,
                "tunnel offloading disabled due to old OFED/rdma-core version");
 #endif
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       mpls_en = ((attrs_out.tunnel_offloads_caps &
+                   MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) &&
+                  (attrs_out.tunnel_offloads_caps &
+                   MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP));
+       DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported",
+               mpls_en ? "" : "not ");
+#else
+       DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to"
+               " old OFED/rdma-core version or firmware configuration");
+#endif
        err = mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr);
        if (err) {
                DEBUG("ibv_query_device_ex() failed");
@@ -873,6 +885,7 @@
                        .cqe_comp = cqe_comp,
                        .mps = mps,
                        .tunnel_en = tunnel_en,
+                       .mpls_en = mpls_en,
                        .tx_vec_en = 1,
                        .rx_vec_en = 1,
                        .mpw_hdr_dseg = 0,
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c4c962b..7750832 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -93,6 +93,7 @@ struct mlx5_dev_config {
        unsigned int mps:2; /* Multi-packet send supported mode. */
        unsigned int tunnel_en:1;
        /* Whether tunnel stateless offloads are supported. */
+       unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
        unsigned int flow_counter_en:1; /* Whether flow counter is supported. */
        unsigned int cqe_comp:1; /* CQE compression is enabled. */
        unsigned int tso:1; /* Whether TSO is supported. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 7af1dfa..3af9524 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -101,6 +101,11 @@ struct mlx5_flow_data {
                     const void *default_mask,
                     struct mlx5_flow_data *data);
 
+static int
+mlx5_flow_create_mpls(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     struct mlx5_flow_data *data);
+
 struct mlx5_flow_parse;
 
 static void
@@ -248,12 +253,14 @@ struct rte_flow {
 #define IS_TUNNEL(type) ( \
        (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
        (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
-       (type) == RTE_FLOW_ITEM_TYPE_GRE)
+       (type) == RTE_FLOW_ITEM_TYPE_GRE || \
+       (type) == RTE_FLOW_ITEM_TYPE_MPLS)
 
 const uint32_t flow_ptype[] = {
        [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
        [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
        [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
+       [RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
 };
 
 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
@@ -264,6 +271,10 @@ struct rte_flow {
        [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
                                                  RTE_PTYPE_L4_UDP,
        [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
+       [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
+               RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
+       [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
+               RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
 };
 
 /** Structure to generate a simple graph of layers supported by the NIC. */
@@ -400,7 +411,8 @@ struct mlx5_flow_items {
        },
        [RTE_FLOW_ITEM_TYPE_UDP] = {
                .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
-                              RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
+                              RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
+                              RTE_FLOW_ITEM_TYPE_MPLS),
                .actions = valid_actions,
                .mask = &(const struct rte_flow_item_udp){
                        .hdr = {
@@ -429,7 +441,8 @@ struct mlx5_flow_items {
        [RTE_FLOW_ITEM_TYPE_GRE] = {
                .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
                               RTE_FLOW_ITEM_TYPE_IPV4,
-                              RTE_FLOW_ITEM_TYPE_IPV6),
+                              RTE_FLOW_ITEM_TYPE_IPV6,
+                              RTE_FLOW_ITEM_TYPE_MPLS),
                .actions = valid_actions,
                .mask = &(const struct rte_flow_item_gre){
                        .protocol = -1,
@@ -437,7 +450,26 @@ struct mlx5_flow_items {
                .default_mask = &rte_flow_item_gre_mask,
                .mask_sz = sizeof(struct rte_flow_item_gre),
                .convert = mlx5_flow_create_gre,
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+               .dst_sz = sizeof(struct ibv_flow_spec_gre),
+#else
                .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
+#endif
+       },
+       [RTE_FLOW_ITEM_TYPE_MPLS] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
+                              RTE_FLOW_ITEM_TYPE_IPV4,
+                              RTE_FLOW_ITEM_TYPE_IPV6),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_mpls){
+                       .label_tc_s = "\xff\xff\xf0",
+               },
+               .default_mask = &rte_flow_item_mpls_mask,
+               .mask_sz = sizeof(struct rte_flow_item_mpls),
+               .convert = mlx5_flow_create_mpls,
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+               .dst_sz = sizeof(struct ibv_flow_spec_mpls),
+#endif
        },
        [RTE_FLOW_ITEM_TYPE_VXLAN] = {
                .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
@@ -865,6 +897,7 @@ struct ibv_spec_header {
        struct priv *priv = dev->data->dev_private;
        const struct mlx5_flow_items *cur_item = mlx5_flow_items;
        unsigned int i;
+       unsigned int last_voids = 0;
        int ret = 0;
 
        /* Initialise the offsets to start after verbs attribute. */
@@ -874,8 +907,10 @@ struct ibv_spec_header {
                const struct mlx5_flow_items *token = NULL;
                unsigned int n;
 
-               if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+               if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
+                       last_voids++;
                        continue;
+               }
                for (i = 0;
                     cur_item->items &&
                     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
@@ -896,7 +931,10 @@ struct ibv_spec_header {
                if (ret)
                        goto exit_item_not_supported;
                if (IS_TUNNEL(items->type)) {
-                       if (parser->tunnel) {
+                       if (parser->tunnel &&
+                           !((items - last_voids - 1)->type ==
+                             RTE_FLOW_ITEM_TYPE_GRE && items->type ==
+                             RTE_FLOW_ITEM_TYPE_MPLS)) {
                                rte_flow_error_set(error, ENOTSUP,
                                                   RTE_FLOW_ERROR_TYPE_ITEM,
                                                   items,
@@ -904,6 +942,16 @@ struct ibv_spec_header {
                                                   " tunnel encapsulations.");
                                return -rte_errno;
                        }
+                       if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
+                           !priv->config.mpls_en) {
+                               rte_flow_error_set(error, ENOTSUP,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  items,
+                                                  "MPLS not supported or"
+                                                  " disabled in firmware"
+                                                  " configuration.");
+                               return -rte_errno;
+                       }
                        if (!priv->config.tunnel_en &&
                            parser->rss_conf.level > 1) {
                                rte_flow_error_set(error, ENOTSUP,
@@ -921,6 +969,7 @@ struct ibv_spec_header {
                        for (n = 0; n != hash_rxq_init_n; ++n)
                                parser->queue[n].offset += cur_item->dst_sz;
                }
+               last_voids = 0;
        }
        if (parser->drop) {
                parser->queue[HASH_RXQ_ETH].offset +=
@@ -1878,16 +1927,27 @@ struct ibv_spec_header {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
-                    const void *default_mask __rte_unused,
+mlx5_flow_create_gre(const struct rte_flow_item *item,
+                    const void *default_mask,
                     struct mlx5_flow_data *data)
 {
        struct mlx5_flow_parse *parser = data->parser;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       (void)default_mask;
        unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
        struct ibv_flow_spec_tunnel tunnel = {
                .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
                .size = size,
        };
+#else
+       const struct rte_flow_item_gre *spec = item->spec;
+       const struct rte_flow_item_gre *mask = item->mask;
+       unsigned int size = sizeof(struct ibv_flow_spec_gre);
+       struct ibv_flow_spec_gre tunnel = {
+               .type = parser->inner | IBV_FLOW_SPEC_GRE,
+               .size = size,
+       };
+#endif
        struct ibv_flow_spec_ipv4_ext *ipv4;
        struct ibv_flow_spec_ipv6 *ipv6;
        unsigned int i;
@@ -1899,6 +1959,20 @@ struct ibv_spec_header {
        /* Default GRE to inner RSS. */
        if (!parser->rss_conf.level)
                parser->rss_conf.level = 2;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       if (spec) {
+               if (!mask)
+                       mask = default_mask;
+               tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
+               tunnel.val.protocol = spec->protocol;
+               tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
+               tunnel.mask.protocol = mask->protocol;
+               /* Remove unwanted bits from values. */
+               tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
+               tunnel.val.protocol &= tunnel.mask.protocol;
+               tunnel.val.key &= tunnel.mask.key;
+       }
+#endif
        /* Update encapsulation IP layer protocol. */
        for (i = 0; i != hash_rxq_init_n; ++i) {
                if (!parser->queue[i].ibv_attr)
@@ -1932,6 +2006,79 @@ struct ibv_spec_header {
 }
 
 /**
+ * Convert MPLS item to Verbs specification.
+ * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_create_mpls(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     struct mlx5_flow_data *data)
+{
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       (void)default_mask;
+       return rte_flow_error_set(data->error, ENOTSUP,
+                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                 item,
+                                 "MPLS is not supported by driver");
+#else
+       const struct rte_flow_item_mpls *spec = item->spec;
+       const struct rte_flow_item_mpls *mask = item->mask;
+       struct mlx5_flow_parse *parser = data->parser;
+       unsigned int size = sizeof(struct ibv_flow_spec_mpls);
+       struct ibv_flow_spec_mpls mpls = {
+               .type = IBV_FLOW_SPEC_MPLS,
+               .size = size,
+       };
+
+       parser->inner = IBV_FLOW_SPEC_INNER;
+       if (parser->layer == HASH_RXQ_UDPV4 ||
+           parser->layer == HASH_RXQ_UDPV6) {
+               parser->tunnel =
+                       ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
+               parser->out_layer = parser->layer;
+       } else {
+               parser->tunnel =
+                       ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
+               /* parser->out_layer stays as in GRE out_layer. */
+       }
+       parser->layer = HASH_RXQ_TUNNEL;
+       /*
+        * For MPLS-in-GRE, RSS level should have been set.
+        * For MPLS-in-UDP, use outer RSS.
+        */
+       if (!parser->rss_conf.level)
+               parser->rss_conf.level = 1;
+       if (spec) {
+               if (!mask)
+                       mask = default_mask;
+               /*
+                * The verbs label field includes the entire MPLS header:
+                * bits 0:19 - label value field.
+                * bits 20:22 - traffic class field.
+                * bits 23 - bottom of stack bit.
+                * bits 24:31 - ttl field.
+                */
+               mpls.val.label = *(const uint32_t *)spec;
+               mpls.mask.label = *(const uint32_t *)mask;
+               /* Remove unwanted bits from values. */
+               mpls.val.label &= mpls.mask.label;
+       }
+       mlx5_flow_create_copy(parser, &mpls, size);
+       return 0;
+#endif
+}
+
+/**
  * Convert mark/flag action to Verbs specification.
  *
  * @param parser
-- 
1.9.5

Reply via email to