Still working my way through this patch, but I had a couple comments inline

On 9/5/17, 2:23 AM, "Yuanhan Liu" <y...@fridaylinux.org> wrote:

    From: Finn Christensen <f...@napatech.com>
    
    The basic yet the major part of this patch is to translate the "match"
    to rte flow patterns. And then, we create a rte flow with a MARK action.
    Afterwards, all pkts matches the flow will have the mark id in the mbuf.
    
    For any unsupported flows, such as MPLS, -1 is returned, meaning the
    flow offload is failed and then skipped.
    
    Co-authored-by: Yuanhan Liu <y...@fridaylinux.org>
    Signed-off-by: Finn Christensen <f...@napatech.com>
    Signed-off-by: Yuanhan Liu <y...@fridaylinux.org>
    ---
    
    v2: - convert some macros to functions
        - do not hardcode the max number of flow/action
        - fix L2 patterns for Intel nic
        - add comments for not implemented offload methods
    ---
     lib/netdev-dpdk.c | 421 
+++++++++++++++++++++++++++++++++++++++++++++++++++++-
     1 file changed, 420 insertions(+), 1 deletion(-)
    
    diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
    index 46f9885..37b0f99 100644
    --- a/lib/netdev-dpdk.c
    +++ b/lib/netdev-dpdk.c
    @@ -58,6 +58,7 @@
     #include "smap.h"
     #include "sset.h"
     #include "unaligned.h"
    +#include "uuid.h"
     #include "timeval.h"
     #include "unixctl.h"
     
    @@ -3400,6 +3401,424 @@ get_rte_flow_by_ufid(const ovs_u128 *ufid)
     }
     
     
    +struct flow_patterns {
    +    struct rte_flow_item *items;
    +    int cnt;
    +    int max;
    +};
    +
    +struct flow_actions {
    +    struct rte_flow_action *actions;
    +    int cnt;
    +    int max;
    +};
    +
    +static void
    +add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type 
type,
    +                 const void *spec, const void *mask)
    +{
    +    int cnt = patterns->cnt;
    +
    +    if (cnt == 0) {
    +        patterns->max = 8;
    +        patterns->items = xcalloc(patterns->max, sizeof(struct 
rte_flow_item));
    +    } else if (cnt == patterns->max) {
    +        patterns->max *= 2;
    +        patterns->items = xrealloc(patterns->items, patterns->max *
    +                                   sizeof(struct rte_flow_item));
    +    }
    +
    +    patterns->items[cnt].type = type;
    +    patterns->items[cnt].spec = spec;
    +    patterns->items[cnt].mask = mask;
    +    patterns->items[cnt].last = NULL;
    +    patterns->cnt++;
    +}
    +
    +static void
    +add_flow_action(struct flow_actions *actions, enum rte_flow_action_type 
type,
    +                const void *conf)
    +{
    +    int cnt = actions->cnt;
    +
    +    if (cnt == 0) {
    +        actions->max = 8;
    +        actions->actions = xcalloc(actions->max,
    +                                   sizeof(struct rte_flow_action));
    +    } else if (cnt == actions->max) {
    +        actions->max *= 2;
    +        actions->actions = xrealloc(actions->actions, actions->max *
    +                                    sizeof(struct rte_flow_action));
    +    }
    +
    +    actions->actions[cnt].type = type;
    +    actions->actions[cnt].conf = conf;
    +    actions->cnt++;
    +}
    +
    +static int
    +netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
    +                                 const struct match *match,
    +                                 struct nlattr *nl_actions OVS_UNUSED,
    +                                 size_t actions_len,
    +                                 const ovs_u128 *ufid,
    +                                 struct offload_info *info)
    +{
    +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
    +    const struct rte_flow_attr flow_attr = {
    +        .group = 0,
    +        .priority = 0,
    +        .ingress = 1,
    +        .egress = 0
    +    };
    +    struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
    +    struct flow_actions actions = { .actions = NULL, .cnt = 0 };
    +    struct rte_flow *flow;
    +    struct rte_flow_error error;
    +    uint8_t *ipv4_next_proto_mask = NULL;
    +    int ret = 0;
    +
    +    /* Eth */
    +    struct rte_flow_item_eth eth_spec;
    +    struct rte_flow_item_eth eth_mask;
    +    memset(&eth_mask, 0, sizeof(eth_mask));
    +    if (match->wc.masks.dl_src.be16[0] ||
    +        match->wc.masks.dl_src.be16[1] ||
    +        match->wc.masks.dl_src.be16[2] ||
    +        match->wc.masks.dl_dst.be16[0] ||
    +        match->wc.masks.dl_dst.be16[1] ||
    +        match->wc.masks.dl_dst.be16[2]) {
    +        rte_memcpy(&eth_spec.dst, &match->flow.dl_dst, 
sizeof(eth_spec.dst));
    +        rte_memcpy(&eth_spec.src, &match->flow.dl_src, 
sizeof(eth_spec.src));
    +        eth_spec.type = match->flow.dl_type;
    +
    +        rte_memcpy(&eth_mask.dst, &match->wc.masks.dl_dst,
    +                   sizeof(eth_mask.dst));
    +        rte_memcpy(&eth_mask.src, &match->wc.masks.dl_src,
    +                   sizeof(eth_mask.src));
    +        eth_mask.type = match->wc.masks.dl_type;
    +
    +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH,
    +                         &eth_spec, &eth_mask);
    +    } else {
    +        /*
    +         * If user specifies a flow (like UDP flow) without L2 patterns,
    +         * OVS will at least set the dl_type. Normally, it's enough to
    +         * create an eth pattern just with it. Unluckily, some Intel's
    +         * NIC (such as XL710) doesn't support that. Below is a workaround,
    +         * which simply matches any L2 pkts.
    +         */
    +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
    +    }
    +
    +    /* VLAN */
    +    struct rte_flow_item_vlan vlan_spec;
    +    struct rte_flow_item_vlan vlan_mask;
    +    memset(&vlan_mask, 0, sizeof(vlan_mask));
    +    if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
    +        vlan_spec.tci  = match->flow.vlans[0].tci;
    +        vlan_mask.tci  = match->wc.masks.vlans[0].tci;
    +
    +        /* match any protocols */
    +        vlan_mask.tpid = 0;
    +
    +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
    +                         &vlan_spec, &vlan_mask);
    +    }
    +
    +    /* IP v4 */
    +    uint8_t proto = 0;
    +    struct rte_flow_item_ipv4 ipv4_spec;
    +    struct rte_flow_item_ipv4 ipv4_mask;
    +    memset(&ipv4_mask, 0, sizeof(ipv4_mask));
    +    if ((match->flow.dl_type == ntohs(ETH_TYPE_IP)) &&
    +        (match->wc.masks.nw_src || match->wc.masks.nw_dst ||
    +         match->wc.masks.nw_tos || match->wc.masks.nw_ttl ||
    +         match->wc.masks.nw_proto)) {
    +        ipv4_spec.hdr.type_of_service = match->flow.nw_tos;
    +        ipv4_spec.hdr.time_to_live    = match->flow.nw_tos;

[Darrell] should be TTL; lots of e-mails but I don’t remember seeing this 
mentioned


    +        ipv4_spec.hdr.next_proto_id   = match->flow.nw_proto;
    +        ipv4_spec.hdr.src_addr        = match->flow.nw_src;
    +        ipv4_spec.hdr.dst_addr        = match->flow.nw_dst;
    +
    +        ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos;
    +        ipv4_mask.hdr.time_to_live    = match->wc.masks.nw_tos;

[Darrell] should be TTL; lots of e-mails but I don’t remember seeing this 
mentioned


    +        ipv4_mask.hdr.next_proto_id   = match->wc.masks.nw_proto;
    +        ipv4_mask.hdr.src_addr        = match->wc.masks.nw_src;
    +        ipv4_mask.hdr.dst_addr        = match->wc.masks.nw_dst;
    +
    +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4,
    +                         &ipv4_spec, &ipv4_mask);
    +
    +        /* Save proto for L4 protocol setup */
    +        proto = ipv4_spec.hdr.next_proto_id & ipv4_mask.hdr.next_proto_id;
    +
    +        /* Remember proto mask address for later modification */
    +        ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id;
    +    }
    +
    +    if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP  &&
    +        proto != IPPROTO_SCTP && proto != IPPROTO_TCP  &&
    +        (match->wc.masks.tp_src ||
    +         match->wc.masks.tp_dst ||
    +         match->wc.masks.tcp_flags)) {
    +        VLOG_INFO("L4 Protocol (%u) not supported", proto);
    +        ret = -1;
    +        goto out;
    +    }
    +
    +    struct rte_flow_item_udp udp_spec;
    +    struct rte_flow_item_udp udp_mask;
    +    memset(&udp_mask, 0, sizeof(udp_mask));
    +    if ((proto == IPPROTO_UDP) &&
    +        (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
    +        udp_spec.hdr.src_port = match->flow.tp_src;
    +        udp_spec.hdr.dst_port = match->flow.tp_dst;
    +
    +        udp_mask.hdr.src_port = match->wc.masks.tp_src;
    +        udp_mask.hdr.dst_port = match->wc.masks.tp_dst;
    +
    +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP,
    +                         &udp_spec, &udp_mask);
    +
    +        /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match */
    +        if (ipv4_next_proto_mask) {
    +            *ipv4_next_proto_mask = 0;
    +        }
    +    }
    +
    +    struct rte_flow_item_sctp sctp_spec;
    +    struct rte_flow_item_sctp sctp_mask;
    +    memset(&sctp_mask, 0, sizeof(sctp_mask));
    +    if ((proto == IPPROTO_SCTP) &&
    +        (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
    +        sctp_spec.hdr.src_port = match->flow.tp_src;
    +        sctp_spec.hdr.dst_port = match->flow.tp_dst;
    +
    +        sctp_mask.hdr.src_port = match->wc.masks.tp_src;
    +        sctp_mask.hdr.dst_port = match->wc.masks.tp_dst;
    +
    +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP,
    +                         &sctp_spec, &sctp_mask);
    +
    +        /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match 
*/
    +        if (ipv4_next_proto_mask) {
    +            *ipv4_next_proto_mask = 0;
    +        }
    +    }
    +
    +    struct rte_flow_item_icmp icmp_spec;
    +    struct rte_flow_item_icmp icmp_mask;
    +    memset(&icmp_mask, 0, sizeof(icmp_mask));
    +    if ((proto == IPPROTO_ICMP) &&
    +        (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
    +        icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src);
    +        icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst);
    +
    +        icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match->wc.masks.tp_src);
    +        icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match->wc.masks.tp_dst);
    +
    +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP,
    +                         &icmp_spec, &icmp_mask);
    +
    +        /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match 
*/
    +        if (ipv4_next_proto_mask) {
    +            *ipv4_next_proto_mask = 0;
    +        }
    +    }
    +
    +    struct rte_flow_item_tcp tcp_spec;
    +    struct rte_flow_item_tcp tcp_mask;
    +    memset(&tcp_mask, 0, sizeof(tcp_mask));
    +    if ((proto == IPPROTO_TCP) &&
    +        (match->wc.masks.tp_src ||
    +         match->wc.masks.tp_dst ||
    +         match->wc.masks.tcp_flags)) {
    +        tcp_spec.hdr.src_port  = match->flow.tp_src;
    +        tcp_spec.hdr.dst_port  = match->flow.tp_dst;
    +        tcp_spec.hdr.data_off  = ntohs(match->flow.tcp_flags) >> 8;
    +        tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff;
    +
    +        tcp_mask.hdr.src_port  = match->wc.masks.tp_src;
    +        tcp_mask.hdr.dst_port  = match->wc.masks.tp_dst;
    +        tcp_mask.hdr.data_off  = ntohs(match->wc.masks.tcp_flags) >> 8;
    +        tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff;
    +
    +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP,
    +                         &tcp_spec, &tcp_mask);
    +
    +        /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match */
    +        if (ipv4_next_proto_mask) {
    +            *ipv4_next_proto_mask = 0;
    +        }
    +    }
    +    add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL);
    +
    +    struct rte_flow_action_mark mark;
    +    if (actions_len) {
    +        mark.id = info->flow_mark;
    +        add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark);
    +    } else {
    +        add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_DROP, NULL);
    +        VLOG_INFO("no action given; drop pkts in hardware\n");
    +    }
    +    add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
    +
    +    flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
    +                           actions.actions, &error);
    +    if (!flow) {
    +        VLOG_ERR("rte flow creat error: %u : message : %s\n",
    +                 error.type, error.message);
    +        ret = -1;
    +        goto out;
    +    }
    +    add_ufid_dpdk_flow_mapping(ufid, flow);
    +    VLOG_INFO("installed flow %p by ufid "UUID_FMT"\n",
    +              flow, UUID_ARGS((struct uuid *)ufid));
    +
    +out:
    +    free(patterns.items);
    +    free(actions.actions);
    +    return ret;
    +}
    +
    +/*
    + * Validate for later rte flow offload creation. If any unsupported
    + * flow are specified, return -1.
    + */
    +static int
    +netdev_dpdk_validate_flow(const struct match *match)
    +{
    +    struct match match_zero_wc;
    +
    +    /* Create a wc-zeroed version of flow */
    +    match_init(&match_zero_wc, &match->flow, &match->wc);
    +
    +#define CHECK_NONZERO_BYTES(addr, size) do {    \
    +    uint8_t *padr = (uint8_t *)(addr);          \
    +    int i;                                      \
    +    for (i = 0; i < (size); i++) {              \
    +        if (padr[i] != 0) {                     \
    +            goto err;                           \
    +        }                                       \
    +    }                                           \
    +} while (0)
    +
    +#define CHECK_NONZERO(var)              do {    \
    +    if ((var) != 0) {                           \
    +        goto err;                               \
    +    }                                           \
    +} while (0)
    +
    +    CHECK_NONZERO_BYTES(&match_zero_wc.flow.tunnel,
    +                        sizeof(match_zero_wc.flow.tunnel));
    +    CHECK_NONZERO(match->wc.masks.metadata);
    +    CHECK_NONZERO(match->wc.masks.skb_priority);
    +    CHECK_NONZERO(match->wc.masks.pkt_mark);
    +    CHECK_NONZERO(match->wc.masks.dp_hash);
    +
    +    /* recirc id must be zero */
    +    CHECK_NONZERO(match_zero_wc.flow.recirc_id);
    +
    +    CHECK_NONZERO(match->wc.masks.ct_state);
    +    CHECK_NONZERO(match->wc.masks.ct_zone);
    +    CHECK_NONZERO(match->wc.masks.ct_mark);
    +    CHECK_NONZERO(match->wc.masks.ct_label.u64.hi);
    +    CHECK_NONZERO(match->wc.masks.ct_label.u64.lo);
    +    CHECK_NONZERO(match->wc.masks.ct_nw_proto);
    +    CHECK_NONZERO(match->wc.masks.ct_tp_src);
    +    CHECK_NONZERO(match->wc.masks.ct_tp_dst);
    +    CHECK_NONZERO(match->wc.masks.conj_id);
    +    CHECK_NONZERO(match->wc.masks.actset_output);
    +
    +    /* unsupported L2 */
    +    CHECK_NONZERO_BYTES(&match->wc.masks.mpls_lse,
    +                        sizeof(match_zero_wc.flow.mpls_lse) /
    +                        sizeof(ovs_be32));
    +
    +    /* unsupported L3 */
    +    CHECK_NONZERO_BYTES(&match->wc.masks.ipv6_src, sizeof(struct 
in6_addr));
    +    CHECK_NONZERO_BYTES(&match->wc.masks.ipv6_dst, sizeof(struct 
in6_addr));
    +    CHECK_NONZERO(match->wc.masks.ipv6_label);
    +    CHECK_NONZERO_BYTES(&match->wc.masks.nd_target, sizeof(struct 
in6_addr));
    +    CHECK_NONZERO_BYTES(&match->wc.masks.arp_sha, sizeof(struct eth_addr));
    +    CHECK_NONZERO_BYTES(&match->wc.masks.arp_tha, sizeof(struct eth_addr));
    +
    +    /* If fragmented, then don't HW accelerate - for now */
    +    CHECK_NONZERO(match_zero_wc.flow.nw_frag);
    +
    +    /* unsupported L4 */
    +    CHECK_NONZERO(match->wc.masks.igmp_group_ip4);
    +
    +    return 0;
    +
    +err:
    +    VLOG_INFO("Cannot HW accelerate this flow");
    +    return -1;
    +}
    +
    +static int
    +netdev_dpdk_destroy_rte_flow(struct netdev_dpdk *dev,
    +                             const ovs_u128 *ufid,
    +                             struct rte_flow *rte_flow)
    +{
    +    struct rte_flow_error error;
    +    int ret;
    +
    +    ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
    +    if (ret == 0) {
    +        del_ufid_dpdk_flow_mapping(ufid);
    +        VLOG_INFO("removed rte flow %p associated with ufid " UUID_FMT 
"\n",
    +                  rte_flow, UUID_ARGS((struct uuid *)ufid));
    +    } else {
    +        VLOG_ERR("rte flow destroy error: %u : message : %s\n",
    +                 error.type, error.message);
    +    }
    +
    +    return ret;
    +}
    +
    +static int
    +netdev_dpdk_flow_put(struct netdev *netdev, struct match *match,
    +                     struct nlattr *actions, size_t actions_len,
    +                     const ovs_u128 *ufid, struct offload_info *info,
    +                     struct dpif_flow_stats *stats OVS_UNUSED)
    +{
    +    struct rte_flow *rte_flow;
    +    int ret;
    +
    +    /*
    +     * If an old rte_flow exists, it means it's a flow modification.
    +     * Here destroy the old rte flow first before adding a new one.
    +     */
    +    rte_flow = get_rte_flow_by_ufid(ufid);
    +    if (rte_flow) {
    +        ret = netdev_dpdk_destroy_rte_flow(netdev_dpdk_cast(netdev),
    +                                           ufid, rte_flow);
    +        if (ret < 0)
    +            return ret;
    +    }
    +
    +    ret = netdev_dpdk_validate_flow(match);
    +    if (ret < 0) {
    +        return ret;
    +    }
    +
    +    return netdev_dpdk_add_rte_flow_offload(netdev, match, actions,
    +                                            actions_len, ufid, info);
    +}
    +
    +#define DPDK_FLOW_OFFLOAD_API                                 \
    +    NULL,                   /* flow_flush */                  \
    +    NULL,                   /* flow_dump_create */            \
    +    NULL,                   /* flow_dump_destroy */           \
    +    NULL,                   /* flow_dump_next */              \
    +    netdev_dpdk_flow_put,                                     \
    +    NULL,                   /* flow_get */                    \
    +    NULL,                   /* flow_del */                    \
    +    NULL                    /* init_flow_api */
    +
    +
     #define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT,    \
                               SET_CONFIG, SET_TX_MULTIQ, SEND,    \
                               GET_CARRIER, GET_STATS,             \
    @@ -3472,7 +3891,7 @@ get_rte_flow_by_ufid(const ovs_u128 *ufid)
         RXQ_RECV,                                                 \
         NULL,                       /* rx_wait */                 \
         NULL,                       /* rxq_drain */               \
    -    NO_OFFLOAD_API                                            \
    +    DPDK_FLOW_OFFLOAD_API                                     \
     }
     
     static const struct netdev_class dpdk_class =
    -- 
    2.7.4
    
    



_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to