On Fri, Jul 1, 2022 at 5:58 AM Mike Pattrick <[email protected]> wrote:
>
> From: Flavio Leitner <[email protected]>
>
> The netdev receiving packets is supposed to provide the flags
> indicating if the IP csum was verified and it is OK or BAD,
> otherwise the stack will check when appropriate by software.
>
> If the packet comes with good checksum, then postpone the
> checksum calculation to the egress device if needed.
>
> When encapsulate a packet with that flag, set the checksum
> of the inner IP header since that is not yet supported.
>
> Calculate the IP csum when the packet is going to be sent over
> a device that doesn't support the feature.
>
> Linux devices don't support IP csum offload alone, so the
> support is not enabled.
>
> Signed-off-by: Flavio Leitner <[email protected]>
> Co-authored-by: Mike Pattrick <[email protected]>
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
>  lib/conntrack.c                     | 12 ++---
>  lib/dp-packet.c                     | 12 +++++
>  lib/dp-packet.h                     | 70 +++++++++++++++++++++----
>  lib/dpif.h                          |  2 +-
>  lib/flow.c                          | 15 ++++--
>  lib/ipf.c                           |  9 ++--
>  lib/netdev-dpdk.c                   | 79 +++++++++++++++--------------
>  lib/netdev-dummy.c                  | 23 +++++++++
>  lib/netdev-native-tnl.c             | 19 +++++--
>  lib/netdev.c                        | 22 ++++++++
>  lib/odp-execute.c                   | 21 ++++++--
>  lib/packets.c                       | 34 ++++++++++---
>  ofproto/ofproto-dpif-upcall.c       | 14 +++--
>  tests/automake.mk                   |  1 +
>  tests/system-userspace-offload.at   | 79 +++++++++++++++++++++++++++++
>  tests/system-userspace-testsuite.at |  1 +
>  16 files changed, 330 insertions(+), 83 deletions(-)
>  create mode 100644 tests/system-userspace-offload.at
>

bfd_put_packet could use dp_packet_ip_set_header_csum() (after a
missing dp_packet_set_l3 call).


> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 70157d45a..11768da00 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -2089,16 +2089,14 @@ conn_key_extract(struct conntrack *ct, struct 
> dp_packet *pkt, ovs_be16 dl_type,
>      ctx->key.dl_type = dl_type;
>
>      if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
> -        bool hwol_bad_l3_csum = dp_packet_ol_ip_checksum_bad(pkt);
> -        if (hwol_bad_l3_csum) {
> +        if (dp_packet_ol_ip_checksum_bad(pkt)) {
>              ok = false;
>              COVERAGE_INC(conntrack_l3csum_err);
>          } else {
> -            bool hwol_good_l3_csum = dp_packet_ol_ip_checksum_good(pkt)
> -                                     || dp_packet_ol_tx_ipv4(pkt);
>              /* Validate the checksum only when hwol is not supported. */

I'd update this comment, to reflect that it only matters when csum
quality is unknown.


>              ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
> -                                 !hwol_good_l3_csum);
> +                                 !dp_packet_ol_tx_ipv4(pkt) &&

I don't see the need for this check on packet being IPv4.
We know it is a ipv4 packet via dl_type.


> +                                 !dp_packet_ol_ip_checksum_good(pkt));

On return of extract_l3_ipv4, if ok is true, we could mark this packet
l3 csum as good.
This avoids reevaluating later.


>          }
>      } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
>          ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
> @@ -3402,7 +3400,9 @@ handle_ftp_ctl(struct conntrack *ct, const struct 
> conn_lookup_ctx *ctx,
>                  }
>                  if (seq_skew) {
>                      ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
> -                    if (!dp_packet_ol_tx_ipv4(pkt)) {
> +                    if (dp_packet_ol_tx_ip_csum(pkt)) {

I see this pattern in the rest of the patch, so I would introduce some
dp_packet_ level API like:

+static inline void
+dp_packet_ip_csum_recalc16(struct dp_packet *p, ovs_be16 old_u16,
+                           ovs_be16 new_u16);
+{
+    if (dp_packet_ol_tx_ip_csum(p)) {
+        dp_packet_ol_reset_ip_csum_good(p);
+    } else {
+        struct ip_header *ip = dp_packet_l3(p);
+
+        ovs_assert(ip);
+        ip->ip_csum = recalc_csum16(ip->ip_csum, old_u16, new_u16);
+    }
+}

And same with csum32.


> +                        dp_packet_ol_reset_ip_csum_good(pkt);
> +                    } else {
>                          l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
>                                                          l3_hdr->ip_tot_len,
>                                                          htons(ip_len));
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index 9728565dc..94d3b1277 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -21,6 +21,7 @@
>  #include "dp-packet.h"
>  #include "netdev-afxdp.h"
>  #include "netdev-dpdk.h"
> +#include "netdev-provider.h"
>  #include "openvswitch/dynamic-string.h"
>  #include "util.h"
>
> @@ -507,3 +508,14 @@ dp_packet_resize_l2(struct dp_packet *p, int increment)
>      dp_packet_adjust_layer_offset(&p->l2_5_ofs, increment);
>      return dp_packet_data(p);
>  }
> +
> +/* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags'
> + * and if not, update the packet with the software fall back. */
> +void
> +dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) {

{ on newline.

> +    if (!dp_packet_ol_ip_checksum_good(p) && dp_packet_ol_tx_ip_csum(p)
> +        && !(flags & NETDEV_OFFLOAD_TX_IPV4_CSUM)) {
> +        dp_packet_ip_set_header_csum(p);
> +        dp_packet_ol_set_ip_csum_good(p);

DPDK drivers may refuse some mbuf in rte_eth_tx_prepare, because it
has a leftover TX offload flag that is not supported.
So if the netdev which we call this helper for, does not support tx ip
csum, we should reset the flag (with a dp_packet_ol_reset_tx_ip_csum()
helper).
This also makes it possible in netdev-dpdk to skip filling l2_len and
other tx offload flags in netdev_dpdk_prep_ol_packet (see comment on
the latter function).


> +    }
> +}
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 633b4ef38..94aaa40a3 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -25,6 +25,7 @@
>  #include <rte_mbuf.h>
>  #endif
>
> +#include "csum.h"
>  #include "netdev-afxdp.h"
>  #include "netdev-dpdk.h"
>  #include "openvswitch/list.h"
> @@ -77,24 +78,27 @@ enum dp_packet_offload_mask {
>      DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, RTE_MBUF_F_TX_IPV4, 0x80),
>      /* Offloaded packet is IPv6. */
>      DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, RTE_MBUF_F_TX_IPV6, 0x100),
> +    /* Offload IP checksum. */
> +    DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x200),
>      /* Offload TCP checksum. */
> -    DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, RTE_MBUF_F_TX_TCP_CKSUM, 0x200),
> +    DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, RTE_MBUF_F_TX_TCP_CKSUM, 0x400),
>      /* Offload UDP checksum. */
> -    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, RTE_MBUF_F_TX_UDP_CKSUM, 0x400),
> +    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, RTE_MBUF_F_TX_UDP_CKSUM, 0x800),
>      /* Offload SCTP checksum. */
> -    DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800),
> +    DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x1000),
>      /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
>  };
>
> -#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH         | \
> -                                     DP_PACKET_OL_FLOW_MARK        | \
> +#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH        | \
> +                                     DP_PACKET_OL_FLOW_MARK       | \
>                                       DP_PACKET_OL_RX_L4_CSUM_BAD  | \
>                                       DP_PACKET_OL_RX_IP_CSUM_BAD  | \
>                                       DP_PACKET_OL_RX_L4_CSUM_GOOD | \
>                                       DP_PACKET_OL_RX_IP_CSUM_GOOD | \
> -                                     DP_PACKET_OL_TX_TCP_SEG       | \
> -                                     DP_PACKET_OL_TX_IPV4          | \
> -                                     DP_PACKET_OL_TX_IPV6          | \
> +                                     DP_PACKET_OL_TX_TCP_SEG      | \
> +                                     DP_PACKET_OL_TX_IPV4         | \
> +                                     DP_PACKET_OL_TX_IPV6         | \
> +                                     DP_PACKET_OL_TX_IP_CSUM      | \
>                                       DP_PACKET_OL_TX_TCP_CSUM     | \
>                                       DP_PACKET_OL_TX_UDP_CSUM     | \
>                                       DP_PACKET_OL_TX_SCTP_CSUM)
> @@ -235,6 +239,7 @@ void *dp_packet_steal_data(struct dp_packet *);
>
>  static inline bool dp_packet_equal(const struct dp_packet *,
>                                     const struct dp_packet *);
> +void dp_packet_ol_send_prepare(struct dp_packet *, const uint64_t);
>
>
>  /* Frees memory that 'p' points to, as well as 'p' itself. */
> @@ -979,7 +984,7 @@ dp_packet_ol_tcp_seg(const struct dp_packet *a)
>      return !!(*dp_packet_ol_flags_ptr(a) & DP_PACKET_OL_TX_TCP_SEG);
>  }
>
> -/* Returns 'true' if packet 'a' is marked for IPv4 checksum offloading. */
> +/* Returns 'true' if packet 'a' is marked as IPv4. */
>  static inline bool
>  dp_packet_ol_tx_ipv4(const struct dp_packet *a)
>  {
> @@ -1010,20 +1015,34 @@ dp_packet_ol_tx_sctp_csum(struct dp_packet *a)
>              DP_PACKET_OL_TX_SCTP_CSUM;
>  }
>
> -/* Mark packet 'a' for IPv4 checksum offloading. */
> +/* Mark packet 'a' as IPv4. */
>  static inline void
>  dp_packet_ol_set_tx_ipv4(struct dp_packet *a)
>  {
>      *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV4;
>  }
>
> -/* Mark packet 'a' for IPv6 checksum offloading. */
> +/* Mark packet 'a' as IPv6. */
>  static inline void
>  dp_packet_ol_set_tx_ipv6(struct dp_packet *a)
>  {
>      *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6;
>  }
>
> +/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
> +static inline bool
> +dp_packet_ol_tx_ip_csum(const struct dp_packet *p)
> +{
> +    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IP_CSUM);
> +}
> +
> +/* Marks packet 'p' for IPv4 checksum offloading. */
> +static inline void
> +dp_packet_ol_set_tx_ip_csum(struct dp_packet *p)
> +{

We can enforce: ovs_assert(dp_packet_ol_set_ip_csum_good(p)); here.


> +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IP_CSUM;
> +}
> +
>  /* Mark packet 'a' for TCP checksum offloading.  It implies that either
>   * the packet 'a' is marked for IPv4 or IPv6 checksum offloading. */
>  static inline void
> @@ -1057,6 +1076,8 @@ dp_packet_ol_set_tcp_seg(struct dp_packet *a)
>      *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_TCP_SEG;
>  }
>
> +/* Returns 'true' is the IP has good integrity and the
> + * checksum in it is complete. */
>  static inline bool
>  dp_packet_ol_ip_checksum_good(const struct dp_packet *p)
>  {
> @@ -1064,6 +1085,22 @@ dp_packet_ol_ip_checksum_good(const struct dp_packet 
> *p)
>              DP_PACKET_OL_RX_IP_CSUM_GOOD;
>  }
>
> +/* Marks packet 'p' with good IPv4 checksum. */
> +static inline void
> +dp_packet_ol_set_ip_csum_good(const struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_BAD;
> +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_IP_CSUM_GOOD;
> +}

Once the csum is good in the packet, we can call
dp_packet_ol_set_tx_ip_csum and saves csum updates during this packet
processing.


> +
> +/* Resets IP good checksum flag in packet 'p'. */
> +static inline void
> +dp_packet_ol_reset_ip_csum_good(const struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_GOOD;
> +}
> +
> +/* Marks packet 'p' with bad IPv4 checksum. */
>  static inline bool
>  dp_packet_ol_ip_checksum_bad(const struct dp_packet *p)
>  {
> @@ -1071,6 +1108,17 @@ dp_packet_ol_ip_checksum_bad(const struct dp_packet *p)
>              DP_PACKET_OL_RX_IP_CSUM_BAD;
>  }
>
> +/* Calculate and set the IPv4 header checksum in packet 'p'. */
> +static inline void
> +dp_packet_ip_set_header_csum(struct dp_packet *p)
> +{
> +    struct ip_header *ip = dp_packet_l3(p);
> +
> +    ovs_assert(ip);
> +    ip->ip_csum = 0;
> +    ip->ip_csum = csum(ip, sizeof *ip);
> +}

We can mark this packet csum as good.
Callers then don't need to bother with dp_packet_ol_set_ip_csum_good.

If we cascade dp_packet_ol_set_tx_ip_csum as mentionned above,
OVS-crafted packets will get a SW computed csum at creation (as it was
done before) and we can save updating the csum if later processing
required it.

> +
>  static inline bool
>  dp_packet_ol_l4_checksum_good(const struct dp_packet *p)
>  {
> diff --git a/lib/dpif.h b/lib/dpif.h
> index 6cb4dae6d..33e991b84 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -869,7 +869,7 @@ void dpif_register_dp_purge_cb(struct dpif *, 
> dp_purge_callback *, void *aux);
>   *
>   * Returns 0 if successful, ENOSPC if the flow limit has been reached and no
>   * flow should be installed, or some otherwise a positive errno value. */
> -typedef int upcall_callback(const struct dp_packet *packet,
> +typedef int upcall_callback(struct dp_packet *packet,

I am not familiar with the upcall handling, but this change scares me.
We open a possibility of changing data where it was not possible so far.

Can't the csum be "resolved" earlier in the processing of the packet?


>                              const struct flow *flow,
>                              ovs_u128 *ufid,
>                              unsigned pmd_id,
> diff --git a/lib/flow.c b/lib/flow.c
> index 8ab9df3fc..7efb2dd00 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -907,6 +907,10 @@ miniflow_extract(struct dp_packet *packet, struct 
> miniflow *dst)
>          nw_proto = nh->ip_proto;
>          nw_frag = ipv4_get_nw_frag(nh);
>          data_pull(&data, &size, ip_len);
> +        dp_packet_ol_set_tx_ipv4(packet);
> +        if (dp_packet_ol_ip_checksum_good(packet)) {
> +            dp_packet_ol_set_tx_ip_csum(packet);
> +        }
>      } else if (dl_type == htons(ETH_TYPE_IPV6)) {
>          const struct ovs_16aligned_ip6_hdr *nh = data;
>          ovs_be32 tc_flow;
> @@ -920,6 +924,7 @@ miniflow_extract(struct dp_packet *packet, struct 
> miniflow *dst)
>          }
>          data_pull(&data, &size, sizeof *nh);
>
> +        dp_packet_ol_set_tx_ipv6(packet);
>          plen = ntohs(nh->ip6_plen);
>          dp_packet_set_l2_pad_size(packet, size - plen);
>          size = plen;   /* Never pull padding. */
> @@ -3217,9 +3222,12 @@ packet_expand(struct dp_packet *p, const struct flow 
> *flow, size_t size)
>              struct ip_header *ip = dp_packet_l3(p);
>
>              ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
> -            ip->ip_csum = 0;
> -            ip->ip_csum = csum(ip, sizeof *ip);
> -
> +            if (dp_packet_ol_tx_ip_csum(p)) {
> +                dp_packet_ol_reset_ip_csum_good(p);
> +            } else {
> +                dp_packet_ip_set_header_csum(p);
> +                dp_packet_ol_set_ip_csum_good(p);
> +            }
>              pseudo_hdr_csum = packet_csum_pseudoheader(ip);
>          } else { /* ETH_TYPE_IPV6 */
>              struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
> @@ -3309,6 +3317,7 @@ flow_compose(struct dp_packet *p, const struct flow 
> *flow,
>          /* Checksum has already been zeroed by put_zeros call. */
>          ip->ip_csum = csum(ip, sizeof *ip);
>
> +        dp_packet_ol_set_ip_csum_good(p);

Use dp_packet_ip_set_header_csum() ?


>          pseudo_hdr_csum = packet_csum_pseudoheader(ip);
>          flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
>      } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
> diff --git a/lib/ipf.c b/lib/ipf.c
> index 4f635de11..5462eec53 100644
> --- a/lib/ipf.c
> +++ b/lib/ipf.c
> @@ -433,7 +433,9 @@ ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
>      len += rest_len;
>      l3 = dp_packet_l3(pkt);
>      ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
> -    if (!dp_packet_ol_tx_ipv4(pkt)) {
> +    if (dp_packet_ol_tx_ip_csum(pkt)) {
> +        dp_packet_ol_reset_ip_csum_good(pkt);
> +    } else {

Use dp_packet_ip_recalc_csum16.


>          l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
>                                      new_ip_frag_off);
>          l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
> @@ -609,7 +611,6 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet 
> *pkt)
>      }
>
>      if (OVS_UNLIKELY(!dp_packet_ol_ip_checksum_good(pkt)
> -                     && !dp_packet_ol_tx_ipv4(pkt)
>                       && csum(l3, ip_hdr_len) != 0)) {
>          COVERAGE_INC(ipf_l3csum_err);
>          goto invalid_pkt;
> @@ -1185,7 +1186,9 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
>                      } else {
>                          struct ip_header *l3_frag = 
> dp_packet_l3(frag_i->pkt);
>                          struct ip_header *l3_reass = dp_packet_l3(pkt);
> -                        if (!dp_packet_ol_tx_ipv4(frag_i->pkt)) {
> +                        if (dp_packet_ol_tx_ip_csum(frag_i->pkt)) {
> +                            dp_packet_ol_reset_ip_csum_good(frag_i->pkt);
> +                        } else {

Use dp_packet_ip_recalc_csum32.


>                              ovs_be32 reass_ip =
>                                  get_16aligned_be32(&l3_reass->ip_src);
>                              ovs_be32 frag_ip =
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 3172c766b..1eb2954ab 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -397,8 +397,9 @@ enum dpdk_hw_ol_features {
>      NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
>      NETDEV_RX_HW_CRC_STRIP = 1 << 1,
>      NETDEV_RX_HW_SCATTER = 1 << 2,
> -    NETDEV_TX_TSO_OFFLOAD = 1 << 3,
> -    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
> +    NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
> +    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
> +    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
>  };
>
>  /*
> @@ -984,6 +985,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int 
> n_rxq, int n_txq)
>          conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC;
>      }
>
> +    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
> +    }
> +
>      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
>          conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
>          if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
> @@ -1124,6 +1129,12 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
>      }
>
> +    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +    }
> +
>      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>      if (userspace_tso_enabled()) {
>          if ((info.tx_offload_capa & tx_tso_offload_capa)
> @@ -1693,16 +1704,12 @@ netdev_dpdk_get_config(const struct netdev *netdev, 
> struct smap *args)
>                          dev->requested_txq_size);
>          smap_add_format(args, "configured_txq_descriptors", "%d",
>                          dev->txq_size);
> -        if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
> -            smap_add(args, "rx_csum_offload", "true");
> -        } else {
> -            smap_add(args, "rx_csum_offload", "false");
> -        }
> -        if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> -            smap_add(args, "tx_tso_offload", "true");
> -        } else {
> -            smap_add(args, "tx_tso_offload", "false");
> -        }
> +#define HWOL_SMAP_ADD(FIELD, FLAG) \
> +        smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
> +        HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
> +#undef HWOL_SMAP_ADD

Nice :-).


>          smap_add(args, "lsc_interrupt_mode",
>                   dev->lsc_interrupt_mode ? "true" : "false");
>
> @@ -2145,12 +2152,10 @@ netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, 
> struct rte_mbuf *mbuf)
>  {
>      struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);

We should have check against a mask of all supported tx offloads first
in this helper, and leave early if none is set.

>
> -    if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> -        mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char 
> *)dp_packet_eth(pkt);
> -        mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
> -        mbuf->outer_l2_len = 0;
> -        mbuf->outer_l3_len = 0;
> -    }
> +    mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
> +    mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
> +    mbuf->outer_l2_len = 0;
> +    mbuf->outer_l3_len = 0;
>
>      if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
>          struct tcp_header *th = dp_packet_l4(pkt);
> @@ -2210,13 +2215,11 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int 
> qid,
>      uint32_t nb_tx = 0;
>      uint16_t nb_tx_prep = cnt;
>
> -    if (userspace_tso_enabled()) {
> -        nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> -        if (nb_tx_prep != cnt) {
> -            VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> -                         "Only %u/%u are valid: %s", dev->up.name, 
> nb_tx_prep,
> -                         cnt, rte_strerror(rte_errno));
> -        }
> +    nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> +    if (nb_tx_prep != cnt) {
> +        VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> +                     "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
> +                     cnt, rte_strerror(rte_errno));
>      }
>
>      while (nb_tx != nb_tx_prep) {
> @@ -2558,7 +2561,7 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_dpdk 
> *dev,
>          sw_stats->tx_failure_drops      += sw_stats_add->tx_failure_drops;
>          sw_stats->tx_mtu_exceeded_drops += 
> sw_stats_add->tx_mtu_exceeded_drops;
>          sw_stats->tx_qos_drops          += sw_stats_add->tx_qos_drops;
> -        sw_stats->tx_invalid_ol_drops += sw_stats_add->tx_invalid_ol_drops;
> +        sw_stats->tx_invalid_ol_drops   += sw_stats_add->tx_invalid_ol_drops;
>      }
>  }
>
> @@ -2656,12 +2659,10 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, 
> struct dp_packet *pkt_orig)
>      memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
>             sizeof(struct dp_packet) - offsetof(struct dp_packet, 
> l2_pad_size));
>
> -    if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> -        mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
> -                                - (char *)dp_packet_eth(pkt_dest);
> -        mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
> -                                - (char *) dp_packet_l3(pkt_dest);
> -    }
> +    mbuf_dest->l2_len = (char *) dp_packet_l3(pkt_dest)
> +                            - (char *) dp_packet_eth(pkt_dest);
> +    mbuf_dest->l3_len = (char *) dp_packet_l4(pkt_dest)
> +                            - (char *) dp_packet_l3(pkt_dest);

l2_len and l3_len are used by dpdk drivers to program hw (that
supports associated tx offloads).
Those infos are set in netdev_dpdk_prep_ol_packet, I don't see why we
need to copy those.


>
>      return pkt_dest;
>  }
> @@ -2718,11 +2719,9 @@ netdev_dpdk_common_send(struct netdev *netdev, struct 
> dp_packet_batch *batch,
>      pkt_cnt = cnt;
>
>      /* Prepare each mbuf for hardware offloading. */
> -    if (userspace_tso_enabled()) {
> -        cnt = netdev_dpdk_prep_ol_batch(dev, pkts, pkt_cnt);
> -        stats->tx_invalid_ol_drops += pkt_cnt - cnt;
> -        pkt_cnt = cnt;
> -    }
> +    cnt = netdev_dpdk_prep_ol_batch(dev, pkts, pkt_cnt);
> +    stats->tx_invalid_ol_drops += pkt_cnt - cnt;
> +    pkt_cnt = cnt;
>
>      /* Apply Quality of Service policy. */
>      cnt = netdev_dpdk_qos_run(dev, pkts, pkt_cnt, true);
> @@ -4936,6 +4935,12 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
>      }
>
>      err = dpdk_eth_dev_init(dev);
> +    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> +        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> +    } else {
> +        netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
> +    }
> +
>      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
>          netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
>          netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index 6e62447f1..f33be55fe 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -148,6 +148,11 @@ struct netdev_dummy {
>      int requested_n_txq OVS_GUARDED;
>      int requested_n_rxq OVS_GUARDED;
>      int requested_numa_id OVS_GUARDED;
> +
> +    /* Enable netdev IP csum offload. */
> +    bool ol_ip_csum OVS_GUARDED;
> +    /* Flag RX packet with good csum. */
> +    bool ol_ip_csum_set_good OVS_GUARDED;
>  };
>
>  /* Max 'recv_queue_len' in struct netdev_dummy. */
> @@ -910,6 +915,13 @@ netdev_dummy_set_config(struct netdev *netdev_, const 
> struct smap *args,
>          }
>      }
>
> +    netdev->ol_ip_csum_set_good = smap_get_bool(args, "ol_ip_csum_set_good",
> +                                                false);
> +    netdev->ol_ip_csum = smap_get_bool(args, "ol_ip_csum", false);
> +    if (netdev->ol_ip_csum) {
> +        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> +    }
> +
>      netdev_change_seq_changed(netdev_);
>
>      /* 'dummy-pmd' specific config. */
> @@ -1088,6 +1100,10 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct 
> dp_packet_batch *batch,
>      netdev->rxq_stats[rxq_->queue_id].bytes += dp_packet_size(packet);
>      netdev->custom_stats[0].value++;
>      netdev->custom_stats[1].value++;
> +    if (netdev->ol_ip_csum_set_good) {
> +        /* The netdev hardware sets the flag when the packet has good csum. 
> */
> +        dp_packet_ol_set_ip_csum_good(packet);
> +    }
>      ovs_mutex_unlock(&netdev->mutex);
>
>      dp_packet_batch_init_packet(batch, packet);
> @@ -1170,6 +1186,13 @@ netdev_dummy_send(struct netdev *netdev, int qid,
>          }
>
>          ovs_mutex_lock(&dev->mutex);
> +        if (dp_packet_ol_tx_ip_csum(packet)) {
> +            if (!dp_packet_ol_ip_checksum_good(packet)) {
> +                dp_packet_ip_set_header_csum(packet);
> +                dp_packet_ol_set_ip_csum_good(packet);
> +            }
> +        }
> +
>          dev->stats.tx_packets++;
>          dev->txq_stats[qid].packets++;
>          dev->stats.tx_bytes += size;
> diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
> index 8a0b34fc4..d9e2d7e5d 100644
> --- a/lib/netdev-native-tnl.c
> +++ b/lib/netdev-native-tnl.c
> @@ -88,7 +88,10 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, 
> struct flow_tnl *tnl,
>
>          ovs_be32 ip_src, ip_dst;
>
> -        if (OVS_UNLIKELY(!dp_packet_ol_ip_checksum_good(packet))) {
> +        /* A packet coming from a network device might have the
> +         * csum already checked. In this case, skip the check. */
> +        if (!dp_packet_ol_ip_checksum_good(packet)
> +            && !dp_packet_ol_tx_ip_csum(packet)) {
>              if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
>                  VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
>                  return NULL;

Once checked, we can mark csum as good.

> @@ -142,7 +145,8 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, 
> struct flow_tnl *tnl,
>   *
>   * This function sets the IP header's ip_tot_len field (which should be 
> zeroed
>   * as part of 'header') and puts its value into '*ip_tot_size' as well.  Also
> - * updates IP header checksum, as well as the l3 and l4 offsets in 'packet'.
> + * updates IP header checksum if not offloaded, as well as the l3 and l4
> + * offsets in 'packet'.
>   *
>   * Return pointer to the L4 header added to 'packet'. */
>  void *
> @@ -167,11 +171,16 @@ netdev_tnl_push_ip_header(struct dp_packet *packet,
>          *ip_tot_size -= IPV6_HEADER_LEN;
>          ip6->ip6_plen = htons(*ip_tot_size);
>          packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
> +        dp_packet_ol_set_tx_ipv6(packet);
> +        dp_packet_ol_reset_ip_csum_good(packet);
>          return ip6 + 1;
>      } else {
>          ip = netdev_tnl_ip_hdr(eth);
>          ip->ip_tot_len = htons(*ip_tot_size);
> -        ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
> +        /* Postpone checksum to when the packet is pushed to the port. */
> +        dp_packet_ol_set_tx_ipv4(packet);
> +        dp_packet_ol_set_tx_ip_csum(packet);
> +        dp_packet_ol_reset_ip_csum_good(packet);
>          *ip_tot_size -= IP_HEADER_LEN;
>          packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
>          return ip + 1;
> @@ -297,8 +306,8 @@ netdev_tnl_ip_build_header(struct ovs_action_push_tnl 
> *data,
>          ip->ip_frag_off = (params->flow->tunnel.flags & 
> FLOW_TNL_F_DONT_FRAGMENT) ?
>                            htons(IP_DF) : 0;
>
> -        /* Checksum has already been zeroed by eth_build_header. */
> -        ip->ip_csum = csum(ip, sizeof *ip);
> +        /* The checksum will be calculated when the headers are pushed
> +         * to the packet if offloading is not enabled. */
>
>          data->header_len += IP_HEADER_LEN;
>          return ip + 1;
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 5eecacd9e..b222a5e64 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -799,6 +799,14 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
>              return false;
>      }
>
> +    /* Packet with IP csum offloading enabled was received with verified 
> csum.
> +     * Leave the IP csum offloading enabled even with good checksum to the
> +     * netdev to decide what would be the best to do.
> +     * Provide a software fallback in case the device doesn't support IP csum
> +     * offloading. Note: Encapsulated packet must have the inner IP header
> +     * csum already calculated. */
> +    dp_packet_ol_send_prepare(packet, netdev_flags);
> +
>      if (dp_packet_ol_l4_mask(packet)) {
>          if (dp_packet_ol_tx_tcp_csum(packet)) {
>              if (!(netdev_flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
> @@ -966,7 +974,21 @@ netdev_push_header(const struct netdev *netdev,
>                           "not supported: packet dropped",
>                           netdev_get_name(netdev));
>          } else {
> +            /* The packet is going to be encapsulated and there is
> +             * no support yet for inner network header csum offloading. */
> +            if (dp_packet_ol_tx_ip_csum(packet)
> +                && !dp_packet_ol_ip_checksum_good(packet)) {
> +                dp_packet_ip_set_header_csum(packet);
> +            }
> +
>              netdev->netdev_class->push_header(netdev, packet, data);
> +            if (dp_packet_ol_tx_ip_csum(packet)) {
> +                dp_packet_ol_reset_ip_csum_good(packet);
> +            } else if (dp_packet_ol_tx_ipv4(packet)) {
> +                dp_packet_ip_set_header_csum(packet);
> +                dp_packet_ol_set_ip_csum_good(packet);
> +            }
> +

I don't understand this part.
Could you explain in which cases we need this?


>              pkt_metadata_init(&packet->md, data->out_port);
>              dp_packet_batch_refill(batch, packet, i);
>          }
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 7da56793d..ab2271a66 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -167,9 +167,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct 
> ovs_key_ipv4 *key,
>          new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
>
>          if (nh->ip_tos != new_tos) {
> -            nh->ip_csum = recalc_csum16(nh->ip_csum,
> -                                        htons((uint16_t) nh->ip_tos),
> -                                        htons((uint16_t) new_tos));
> +            if (dp_packet_ol_tx_ip_csum(packet)) {
> +                dp_packet_ol_reset_ip_csum_good(packet);
> +            } else {
> +                nh->ip_csum = recalc_csum16(nh->ip_csum,
> +                                            htons((uint16_t) nh->ip_tos),
> +                                            htons((uint16_t) new_tos));
> +            }
> +

Use dp_packet_ip_recalc_csum16.

>              nh->ip_tos = new_tos;
>          }
>      }
> @@ -178,8 +183,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct 
> ovs_key_ipv4 *key,
>          new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
>
>          if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
> -            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
> -                                        htons(new_ttl << 8));
> +            if (dp_packet_ol_tx_ip_csum(packet)) {
> +                dp_packet_ol_reset_ip_csum_good(packet);
> +            } else {
> +                nh->ip_csum = recalc_csum16(nh->ip_csum,
> +                                            htons(nh->ip_ttl << 8),
> +                                            htons(new_ttl << 8));
> +            }
> +

Use dp_packet_ip_recalc_csum16.

>              nh->ip_ttl = new_ttl;
>          }
>      }
> diff --git a/lib/packets.c b/lib/packets.c
> index 5404f69f3..ab6e4f766 100644
> --- a/lib/packets.c
> +++ b/lib/packets.c
> @@ -1145,7 +1145,12 @@ packet_set_ipv4_addr(struct dp_packet *packet,
>              }
>          }
>      }
> -    nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
> +
> +    if (dp_packet_ol_tx_ip_csum(packet)) {
> +        dp_packet_ol_reset_ip_csum_good(packet);
> +    } else {
> +        nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
> +    }

Use dp_packet_ip_recalc_csum32.

>      put_16aligned_be32(addr, new_addr);
>  }
>
> @@ -1310,16 +1315,26 @@ packet_set_ipv4(struct dp_packet *packet, ovs_be32 
> src, ovs_be32 dst,
>      if (nh->ip_tos != tos) {
>          uint8_t *field = &nh->ip_tos;
>
> -        nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
> -                                    htons((uint16_t) tos));
> +        if (dp_packet_ol_tx_ip_csum(packet)) {
> +            dp_packet_ol_reset_ip_csum_good(packet);
> +        } else {
> +            nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) 
> *field),
> +                                        htons((uint16_t) tos));
> +        }
> +

Use dp_packet_ip_recalc_csum16.

>          *field = tos;
>      }
>
>      if (nh->ip_ttl != ttl) {
>          uint8_t *field = &nh->ip_ttl;
>
> -        nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
> -                                    htons(ttl << 8));
> +        if (dp_packet_ol_tx_ip_csum(packet)) {
> +            dp_packet_ol_reset_ip_csum_good(packet);
> +        } else {
> +            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
> +                                        htons(ttl << 8));
> +        }
> +

Use dp_packet_ip_recalc_csum16.

>          *field = ttl;
>      }
>  }
> @@ -1928,8 +1943,13 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
>
>          tos |= IP_ECN_CE;
>          if (nh->ip_tos != tos) {
> -            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
> -                                        htons((uint16_t) tos));
> +            if (dp_packet_ol_tx_ip_csum(pkt)) {
> +                dp_packet_ol_reset_ip_csum_good(pkt);
> +            } else {
> +                nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
> +                                            htons((uint16_t) tos));
> +            }
> +

Use dp_packet_ip_recalc_csum16.

>              nh->ip_tos = tos;
>          }
>      }
> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> index 57f94df54..164738503 100644
> --- a/ofproto/ofproto-dpif-upcall.c
> +++ b/ofproto/ofproto-dpif-upcall.c
> @@ -215,7 +215,7 @@ struct upcall {
>      enum odp_key_fitness fitness;  /* Fitness of 'flow' relative to ODP key. 
> */
>      const ovs_u128 *ufid;          /* Unique identifier for 'flow'. */
>      unsigned pmd_id;               /* Datapath poll mode driver id. */
> -    const struct dp_packet *packet;   /* Packet associated with this upcall. 
> */
> +    struct dp_packet *packet;      /* Packet associated with this upcall. */
>      ofp_port_t ofp_in_port;        /* OpenFlow in port, or OFPP_NONE. */
>      uint16_t mru;                  /* If !0, Maximum receive unit of
>                                        fragmented IP packet */
> @@ -395,7 +395,7 @@ static void delete_op_init(struct udpif *udpif, struct 
> ukey_op *op,
>                             struct udpif_key *ukey);
>
>  static int upcall_receive(struct upcall *, const struct dpif_backer *,
> -                          const struct dp_packet *packet, enum 
> dpif_upcall_type,
> +                          struct dp_packet *packet, enum dpif_upcall_type,
>                            const struct nlattr *userdata, const struct flow *,
>                            const unsigned int mru,
>                            const ovs_u128 *ufid, const unsigned pmd_id);
> @@ -1140,7 +1140,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out 
> *xout,
>   * since the 'upcall->put_actions' remains uninitialized. */
>  static int
>  upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
> -               const struct dp_packet *packet, enum dpif_upcall_type type,
> +               struct dp_packet *packet, enum dpif_upcall_type type,
>                 const struct nlattr *userdata, const struct flow *flow,
>                 const unsigned int mru,
>                 const ovs_u128 *ufid, const unsigned pmd_id)
> @@ -1336,7 +1336,7 @@ should_install_flow(struct udpif *udpif, struct upcall 
> *upcall)
>  }
>
>  static int
> -upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 
> *ufid,
> +upcall_cb(struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
>            unsigned pmd_id, enum dpif_upcall_type type,
>            const struct nlattr *userdata, struct ofpbuf *actions,
>            struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
> @@ -1446,7 +1446,7 @@ static int
>  process_upcall(struct udpif *udpif, struct upcall *upcall,
>                 struct ofpbuf *odp_actions, struct flow_wildcards *wc)
>  {
> -    const struct dp_packet *packet = upcall->packet;
> +    struct dp_packet *packet = upcall->packet;
>      const struct flow *flow = upcall->flow;
>      size_t actions_len = 0;
>
> @@ -1524,6 +1524,10 @@ process_upcall(struct udpif *udpif, struct upcall 
> *upcall,
>                  break;
>              }
>
> +            /* The packet is going to be encapsulated and sent to
> +             * the controller. */
> +            dp_packet_ol_send_prepare(packet, 0);

Having to touch the packet data this late is scary, but I don't know a
better place for now.



> +
>              const struct frozen_state *state = &recirc_node->state;
>
>              struct ofproto_async_msg *am = xmalloc(sizeof *am);
> diff --git a/tests/automake.mk b/tests/automake.mk
> index b29cb783e..1b6296411 100644
> --- a/tests/automake.mk
> +++ b/tests/automake.mk
> @@ -161,6 +161,7 @@ SYSTEM_KMOD_TESTSUITE_AT = \
>  SYSTEM_USERSPACE_TESTSUITE_AT = \
>         tests/system-userspace-testsuite.at \
>         tests/system-userspace-macros.at \
> +       tests/system-userspace-offload.at \
>         tests/system-userspace-packet-type-aware.at \
>         tests/system-route.at
>
> diff --git a/tests/system-userspace-offload.at 
> b/tests/system-userspace-offload.at
> new file mode 100644
> index 000000000..4d7f3ef89
> --- /dev/null
> +++ b/tests/system-userspace-offload.at
> @@ -0,0 +1,79 @@
> +AT_BANNER([userspace offload])
> +
> +AT_SETUP([userspace offload - ip csum offload])
> +OVS_VSWITCHD_START(
> +  [add-br br1 -- set bridge br1 datapath-type=dummy -- \
> +   add-port br1 p1 -- \
> +       set Interface p1 type=dummy -- \
> +   add-port br1 p2 -- \
> +       set Interface p2 type=dummy --])
> +
> +# Modify the ip_dst addr to force changing the IP csum.
> +AT_CHECK([ovs-ofctl add-flow br1 
> in_port=p1,actions=mod_nw_dst:192.168.1.1,output:p2])
> +
> +# Check if no offload remains ok.
> +AT_CHECK([ovs-vsctl set Interface p2 options:tx_pcap=p2.pcap])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Checksum should change to 0x990 with ip_dst changed to 192.168.1.1
> +# by the datapath while processing the packet.
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Check if packets entering the datapath with csum offloading
> +# enabled gets the csum updated properly by egress handling
> +# in the datapath and not by the netdev.
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Check if packets entering the datapath with csum offloading
> +# enabled gets the csum updated properly by netdev and not
> +# by the datapath.
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Push a packet with bad csum and offloading disabled to check
> +# if the datapath updates the csum, but does not fix the issue.
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060904c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Push a packet with bad csum and offloading enabled to check
> +# if the driver updates and fixes the csum.
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +OVS_VSWITCHD_STOP
> +AT_CLEANUP
> diff --git a/tests/system-userspace-testsuite.at 
> b/tests/system-userspace-testsuite.at
> index 2e9659a67..1021b4ad4 100644
> --- a/tests/system-userspace-testsuite.at
> +++ b/tests/system-userspace-testsuite.at
> @@ -25,5 +25,6 @@ m4_include([tests/system-common-macros.at])
>  m4_include([tests/system-traffic.at])
>  m4_include([tests/system-layer3-tunnels.at])
>  m4_include([tests/system-interface.at])
> +m4_include([tests/system-userspace-offload.at])
>  m4_include([tests/system-userspace-packet-type-aware.at])
>  m4_include([tests/system-route.at])
> --
> 2.31.1
>


-- 
David Marchand


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to