Re: [ovs-dev] [PATCH v13 06/11] dp-packet: Add support for data "linearization".

Lam, Tiago Sat, 12 Jan 2019 11:45:40 -0800

Hi,

I will send another revision that addresses the concerns listed here.
More specifically:
- dp_packet_linearize() will only be called from process_one() in
conntrack.c;
- dp_packet_linearize() checks if a packet is linear by itself and exits
earlier if so. That means other places in the code will benefit from
less changes;
- You mentioned function headers previously, I can see now you meant
function header *comments*. I'll add those as well.


Please let me know your thoughts.

Tiago.

On 11/01/2019 20:01, Darrell Ball wrote:
> 
> 
> On Fri, Jan 11, 2019 at 1:43 AM Lam, Tiago <[email protected]
> <mailto:[email protected]>> wrote:
> 
>     On 11/01/2019 01:34, Darrell Ball wrote:
>     > thanks; not a full review
> 
>     Thanks, responses in-line.
> 
>     >
>     > On Wed, Jan 9, 2019 at 10:06 AM Tiago Lam <[email protected]
>     <mailto:[email protected]>
>     > <mailto:[email protected] <mailto:[email protected]>>> wrote:
>     >
>     >     Previous commits have added support to the dp_packet API to handle
>     >     multi-segmented packets, where data is not stored contiguously in
>     >     memory. However, in some cases, it is inevitable and data must be
>     >     provided contiguously. Examples of such cases are when
>     performing csums
>     >     over the entire packet data, or when write()'ing to a file
>     descriptor
>     >     (for a tap interface, for example). For such cases, the
>     dp_packet API
>     >     has been extended to provide a way to transform a multi-segmented
>     >     DPBUF_DPDK packet into a DPBUF_MALLOC system packet (at the
>     expense of a
>     >     copy of memory). If the packet's data is already stored in memory
>     >     contigously then there's no need to convert the packet.
>     >
>     >     Thus, the main use cases that were assuming that a dp_packet's
>     data is
>     >     always held contiguously in memory were changed to make use of
>     the new
>     >     "linear functions" in the dp_packet API when there's a need to
>     traverse
>     >     the entire's packet data. Per the example above, when the
>     packet's data
>     >     needs to be write() to the tap's file descriptor, or when the
>     conntrack
>     >     module needs to verify a packet's checksum, the data is now
>     linearized.
>     >
>     >     Additionally, the miniflow_extract() function has been
>     modified to check
>     >     if the respective packet headers don't span across multiple
>     mbufs. This
>     >     requirement is needed to guarantee that callers can assume
>     headers are
>     >     always in contiguous memory.
>     >
>     >     Signed-off-by: Tiago Lam <[email protected]
>     <mailto:[email protected]>
>     >     <mailto:[email protected] <mailto:[email protected]>>>
>     >     ---
>     >      lib/bfd.c                     |   3 +-
>     >      lib/conntrack-private.h       |   4 +-
>     >      lib/conntrack.c               |  43 +++++--
>     >      lib/crc32c.c                  |  35 ++++--
>     >      lib/crc32c.h                  |   2 +
>     >      lib/dp-packet.c               |  18 +++
>     >      lib/dp-packet.h               | 267
>     >     ++++++++++++++++++++++++++++++------------
>     >      lib/dpif-netdev.c             |  13 +-
>     >      lib/dpif-netlink.c            |   5 +
>     >      lib/dpif.c                    |   9 ++
>     >      lib/flow.c                    |  97 ++++++++++++---
>     >      lib/flow.h                    |   2 +-
>     >      lib/mcast-snooping.c          |   2 +
>     >      lib/netdev-bsd.c              |   5 +
>     >      lib/netdev-dummy.c            |  13 +-
>     >      lib/netdev-linux.c            |  13 +-
>     >      lib/netdev-native-tnl.c       |  26 ++--
>     >      lib/odp-execute.c             |  12 +-
>     >      lib/ovs-lldp.c                |   3 +-
>     >      lib/packets.c                 |  85 ++++++++++++--
>     >      lib/packets.h                 |   7 ++
>     >      ofproto/ofproto-dpif-upcall.c |  20 +++-
>     >      ofproto/ofproto-dpif-xlate.c  |  32 ++++-
>     >      tests/test-rstp.c             |   8 +-
>     >      tests/test-stp.c              |   8 +-
>     >      25 files changed, 579 insertions(+), 153 deletions(-)
>     >
>     >     diff --git a/lib/bfd.c b/lib/bfd.c
>     >     index cc8c685..12e076a 100644
>     >     --- a/lib/bfd.c
>     >     +++ b/lib/bfd.c
>     >     @@ -721,7 +721,8 @@ bfd_process_packet(struct bfd *bfd, const
>     struct
>     >     flow *flow,
>     >          if (!msg) {
>     >              VLOG_INFO_RL(&rl, "%s: Received too-short BFD control
>     >     message (only "
>     >                           "%"PRIdPTR" bytes long, at least %d
>     required).",
>     >     -                     bfd->name, (uint8_t *) dp_packet_tail(p)
>     - l7,
>     >     +                     bfd->name, dp_packet_size(p) -
>     >     +                     (l7 - (uint8_t *) dp_packet_data(p)),
>     >                           BFD_PACKET_LEN);
>     >              goto out;
>     >          }
>     >     diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
>     >     index a344801..1be2df1 100644
>     >     --- a/lib/conntrack-private.h
>     >     +++ b/lib/conntrack-private.h
>     >     @@ -159,8 +159,8 @@ tcp_payload_length(struct dp_packet *pkt)
>     >      {
>     >          const char *tcp_payload = dp_packet_get_tcp_payload(pkt);
>     >          if (tcp_payload) {
>     >     -        return ((char *) dp_packet_tail(pkt) -
>     >     dp_packet_l2_pad_size(pkt)
>     >     -                - tcp_payload);
>     >     +        return dp_packet_l4_size(pkt) -
>     >     +               (tcp_payload - (char *) dp_packet_l4(pkt));
>     >
>     >
>     > why was this change needed ?
> 
>     `dp_packet_get_tcp_payload()` already makes sure the payload is within
>     the a segment, returning NULL otherwise. So, while this change itself
>     isn't needed to make this code path correct, it helps with readability
>     in the future now with the adoption with multi-segment mbufs. Above, the
>     resulting addresses of `dp_packet_tail(pkt)` and `tcp_payload` may not
>     be in the same segment when using multi-segment mbufs.
> 
> 
> Please revert this change as it is not necessary and involves extra
> instructions
> At this point, the packet should be linearized.
> 
> Furthermore, assuming this patchset goes ahead (I have serious
> reservations in general),
> the linearization check should be done at first entering conntrack in
> 'ONE' place.
> 
> Please make sure you address this comment.
> 
> 
>     >  
>     >
>     >          } else {
>     >              return 0;
>     >          }
>     >     diff --git a/lib/conntrack.c b/lib/conntrack.c
>     >     index 6f6021a..0dd2dcc 100644
>     >     --- a/lib/conntrack.c
>     >     +++ b/lib/conntrack.c
>     >     @@ -636,12 +636,22 @@ reverse_pat_packet(struct dp_packet *pkt,
>     >     const struct conn *conn)
>     >      static void
>     >      reverse_nat_packet(struct dp_packet *pkt, const struct conn
>     *conn)
>     >      {
>     >     -    char *tail = dp_packet_tail(pkt);
>     >     -    char pad = dp_packet_l2_pad_size(pkt);
>     >     +    char *tail;
>     >     +    char pad;
>     >          struct conn_key inner_key;
>     >          const char *inner_l4 = NULL;
>     >     -    uint16_t orig_l3_ofs = pkt->l3_ofs;
>     >     -    uint16_t orig_l4_ofs = pkt->l4_ofs;
>     >     +    uint16_t orig_l3_ofs;
>     >     +    uint16_t orig_l4_ofs;
>     >     +
>     >     +    /* We need the whole packet to parse the packet below */
>     >     +    if (!dp_packet_is_linear(pkt)) {
>     >     +        dp_packet_linearize(pkt);
>     >     +    }
>     >     +
>     >     +    tail = dp_packet_tail(pkt);
>     >     +    pad = dp_packet_l2_pad_size(pkt);
>     >     +    orig_l3_ofs = pkt->l3_ofs;
>     >     +    orig_l4_ofs = pkt->l4_ofs;
>     >
>     >
>     >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     >              struct ip_header *nh = dp_packet_l3(pkt);
>     >     @@ -1323,6 +1333,7 @@ conntrack_execute(struct conntrack *ct,
>     struct
>     >     dp_packet_batch *pkt_batch,
>     >                  write_ct_md(packet, zone, NULL, NULL, NULL);
>     >                  continue;
>     >              }
>     >     +
>     >
>     >
>     > pls don't make whitespace changes, especially for large patchsets
> 
>     Sure. Will remove.
> 
>     >  
>     >
>     >              process_one(ct, packet, &ctx, zone, force, commit, now,
>     >     setmark,
>     >                          setlabel, nat_action_info, tp_src,
>     tp_dst, helper);
>     >          }
>     >     @@ -1904,9 +1915,18 @@ static bool
>     >      conn_key_extract(struct conntrack *ct, struct dp_packet *pkt,
>     >     ovs_be16 dl_type,
>     >                       struct conn_lookup_ctx *ctx, uint16_t zone)
>     >      {
>     >     -    const struct eth_header *l2 = dp_packet_eth(pkt);
>     >     -    const struct ip_header *l3 = dp_packet_l3(pkt);
>     >     -    const char *l4 = dp_packet_l4(pkt);
>     >     +    const struct eth_header *l2;
>     >     +    const struct ip_header *l3;
>     >     +    const char *l4;
>     >     +
>     >     +    /* We need the whole packet to parse the packet below */
>     >     +    if (!dp_packet_is_linear(pkt)) {
>     >     +        dp_packet_linearize(pkt);
>     >     +    }
>     >     +
>     >     +    l2 = dp_packet_eth(pkt);
>     >     +    l3 = dp_packet_l3(pkt);
>     >     +    l4 = dp_packet_l4(pkt);
>     >
>     >
>     > iirc, the minimum segment size is 2k
>     > it seems a wrapper around csum_continue() is what is needed; is
>     > packet_csum_continue()  the right api ?
> 
>     Likely. The analysis and implementation of moving conntrack.c to be
>     compliant with multi-segment mbufs hasn't been done for this patchset -
>     other than "linearize the packet once it gets to conntrack". This is
>     expected to have a performance penalty, if one uses the Userspace
>     connection tracker *and enables* multi-segment mbufs. This is also
>     mentioned in Documentation/topics/dpdk/jumbo-frames.rst.
> 
> 
> Furthermore, assuming this patchset goes ahead (I have serious
> reservations in general),
> the linearization should be done at first entering conntrack in 'ONE' place.
> 
> Please make sure you address this comment.
> 
> The same comment applies to the rest of the code; I will check but I
> guess the 'is linear'  check does not
> need to be on so many places and can be consolidated in fewer checks.
>  
> 
> 
>     >  
>     >
>     >
>     >          memset(ctx, 0, sizeof *ctx);
>     >
>     >     @@ -3174,7 +3194,7 @@ handle_ftp_ctl(struct conntrack *ct, const
>     >     struct conn_lookup_ctx *ctx,
>     >                     const struct conn *conn_for_expectation,
>     >                     long long now, enum ftp_ctl_pkt ftp_ctl, bool nat)
>     >      {
>     >     -    struct ip_header *l3_hdr = dp_packet_l3(pkt);
>     >     +    struct ip_header *l3_hdr;
>     >          ovs_be32 v4_addr_rep = 0;
>     >          struct ct_addr v6_addr_rep;
>     >          size_t addr_offset_from_ftp_data_start = 0;
>     >     @@ -3183,6 +3203,13 @@ handle_ftp_ctl(struct conntrack *ct, const
>     >     struct conn_lookup_ctx *ctx,
>     >          bool do_seq_skew_adj = true;
>     >          enum ct_alg_mode mode = CT_FTP_MODE_ACTIVE;
>     >
>     >     +    /* We need the whole packet to parse the packet below */
>     >     +    if (!dp_packet_is_linear(pkt)) {
>     >     +        dp_packet_linearize(pkt);
>     >     +    }
>     >     +
>     >     +    l3_hdr = dp_packet_l3(pkt);
>     >     +
>     >          if (detect_ftp_ctl_type(ctx, pkt) != ftp_ctl) {
>     >              return;
>     >          }
>     >     diff --git a/lib/crc32c.c b/lib/crc32c.c
>     >     index e8dd6ee..dd5bb9a 100644
>     >     --- a/lib/crc32c.c
>     >     +++ b/lib/crc32c.c
>     >     @@ -132,28 +132,39 @@ static const uint32_t crc32Table[256] = {
>     >          0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
>     >      };
>     >
>     >     -/*
>     >     - * Compute a CRC32c checksum as per the SCTP requirements in
>     >     RFC4960. This
>     >     - * includes beginning with a checksum of all ones, and returning
>     >     the negated
>     >     - * CRC. Unlike the RFC, we return the checksum in network
>     byte-order.
>     >     - */
>     >     -ovs_be32
>     >     -crc32c(const uint8_t *data, size_t size)
>     >     +uint32_t
>     >     +crc32c_continue(uint32_t partial, const uint8_t *data, size_t
>     size)
>     >      {
>     >     -    uint32_t crc = 0xffffffffL;
>     >     -
>     >          while (size--) {
>     >     -        crc = crc32Table[(crc ^ *data++) & 0xff] ^ (crc >> 8);
>     >     +        partial = crc32Table[(partial ^ *data++) & 0xff] ^
>     (partial
>     >     >> 8);
>     >          }
>     >
>     >     +    return partial;
>     >     +}
>     >     +
>     >     +ovs_be32
>     >     +crc32c_finish(uint32_t partial)
>     >     +{
>     >          /* The result of this CRC calculation provides us a value in
>     >     the reverse
>     >           * byte-order as compared with our architecture. On
>     big-endian
>     >     systems,
>     >           * this is opposite to our return type. So, to return a
>     big-endian
>     >           * value, we must swap the byte-order. */
>     >      #if defined(WORDS_BIGENDIAN)
>     >     -    crc = uint32_byteswap(crc);
>     >     +    crc = uint32_byteswap(partial);
>     >      #endif
>     >
>     >          /* Our value is in network byte-order. OVS_FORCE keeps sparse
>     >     happy. */
>     >     -    return (OVS_FORCE ovs_be32) ~crc;
>     >     +    return (OVS_FORCE ovs_be32) ~partial;
>     >     +}
>     >     +
>     >     +/*
>     >     + * Compute a CRC32c checksum as per the SCTP requirements in
>     >     RFC4960. This
>     >     + * includes beginning with a checksum of all ones, and returning
>     >     the negated
>     >     + * CRC. Unlike the RFC, we return the checksum in network
>     byte-order.
>     >     + */
>     >     +ovs_be32
>     >     +crc32c(const uint8_t *data, size_t size)
>     >     +{
>     >     +    uint32_t crc = 0xffffffffL;
>     >     +    return crc32c_finish(crc32c_continue(crc, data, size));
>     >      }
>     >     diff --git a/lib/crc32c.h b/lib/crc32c.h
>     >     index 92c7d7f..17c8190 100644
>     >     --- a/lib/crc32c.h
>     >     +++ b/lib/crc32c.h
>     >     @@ -20,6 +20,8 @@
>     >
>     >      #include "openvswitch/types.h"
>     >
>     >     +uint32_t crc32c_continue(uint32_t partial, const uint8_t *data,
>     >     size_t size);
>     >     +ovs_be32 crc32c_finish(uint32_t partial);
>     >      ovs_be32 crc32c(const uint8_t *data, size_t);
>     >
>     >      #endif /* crc32c.h */
>     >     diff --git a/lib/dp-packet.c b/lib/dp-packet.c
>     >     index 4e4b8fb..32cc881 100644
>     >     --- a/lib/dp-packet.c
>     >     +++ b/lib/dp-packet.c
>     >     @@ -103,6 +103,9 @@ void
>     >      dp_packet_init_dpdk(struct dp_packet *b)
>     >      {
>     >          b->source = DPBUF_DPDK;
>     >     +#ifdef DPDK_NETDEV
>     >     +    b->mstate = NULL;
>     >     +#endif
>     >      }
>     >
>     >      /* Initializes 'b' as an empty dp_packet with an initial capacity
>     >     of 'size'
>     >     @@ -120,6 +123,21 @@ dp_packet_uninit(struct dp_packet *b)
>     >          if (b) {
>     >              if (b->source == DPBUF_MALLOC) {
>     >                  free(dp_packet_base(b));
>     >     +
>     >     +#ifdef DPDK_NETDEV
>     >     +            /* Packet has been "linearized" */
>     >     +            if (b->mstate) {
>     >     +                b->source = DPBUF_DPDK;
>     >     +                b->mbuf.buf_addr = b->mstate->addr;
>     >     +                b->mbuf.buf_len = b->mstate->len;
>     >     +                b->mbuf.data_off = b->mstate->off;
>     >     +
>     >     +                free(b->mstate);
>     >     +                b->mstate = NULL;
>     >     +
>     >     +                free_dpdk_buf((struct dp_packet *) b);
>     >     +            }
>     >     +#endif
>     >              } else if (b->source == DPBUF_DPDK) {
>     >      #ifdef DPDK_NETDEV
>     >                  /* If this dp_packet was allocated by DPDK it
>     must have
>     >     been
>     >     diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>     >     index a003546..970aaf2 100644
>     >     --- a/lib/dp-packet.h
>     >     +++ b/lib/dp-packet.h
>     >     @@ -27,7 +27,6 @@
>     >
>     >      #include "netdev-dpdk.h"
>     >      #include "openvswitch/list.h"
>     >     -#include "packets.h"
>     >      #include "util.h"
>     >      #include "flow.h"
>     >
>     >     @@ -46,6 +45,16 @@ enum OVS_PACKED_ENUM dp_packet_source {
>     >
>     >      #define DP_PACKET_CONTEXT_SIZE 64
>     >
>     >     +#ifdef DPDK_NETDEV
>     >     +/* Struct to save data for when a DPBUF_DPDK packet is
>     converted to
>     >     + * DPBUF_MALLOC. */
>     >     +struct mbuf_state {
>     >     +    void *addr;
>     >     +    uint16_t len;
>     >     +    uint16_t off;
>     >     +};
>     >     +#endif
>     >     +
>     >      /* Buffer for holding packet data.  A dp_packet is automatically
>     >     reallocated
>     >       * as necessary if it grows too large for the available memory.
>     >       * By default the packet type is set to Ethernet (PT_ETH).
>     >     @@ -53,6 +62,7 @@ enum OVS_PACKED_ENUM dp_packet_source {
>     >      struct dp_packet {
>     >      #ifdef DPDK_NETDEV
>     >          struct rte_mbuf mbuf;       /* DPDK mbuf */
>     >     +    struct mbuf_state *mstate;  /* Used when packet has been
>     >     "linearized" */
>     >      #else
>     >          void *base_;                /* First byte of allocated
>     space. */
>     >          uint16_t allocated_;        /* Number of bytes allocated. */
>     >     @@ -85,6 +95,9 @@ static inline void dp_packet_set_data(struct
>     >     dp_packet *, void *);
>     >      static inline void *dp_packet_base(const struct dp_packet *);
>     >      static inline void dp_packet_set_base(struct dp_packet *,
>     void *);
>     >
>     >     +static inline bool dp_packet_is_linear(const struct dp_packet *);
>     >     +static inline void dp_packet_linearize(struct dp_packet *);
>     >     +
>     >      static inline uint32_t dp_packet_size(const struct dp_packet *);
>     >      static inline void dp_packet_set_size(struct dp_packet *,
>     uint32_t);
>     >
>     >     @@ -101,6 +114,7 @@ static inline void *dp_packet_l2_5(const
>     struct
>     >     dp_packet *);
>     >      static inline void dp_packet_set_l2_5(struct dp_packet *,
>     void *);
>     >      static inline void *dp_packet_l3(const struct dp_packet *);
>     >      static inline void dp_packet_set_l3(struct dp_packet *, void *);
>     >     +static inline size_t dp_packet_l3_size(const struct dp_packet *);
>     >      static inline void *dp_packet_l4(const struct dp_packet *);
>     >      static inline void dp_packet_set_l4(struct dp_packet *, void *);
>     >      static inline size_t dp_packet_l4_size(const struct dp_packet *);
>     >     @@ -137,6 +151,11 @@ static inline void *dp_packet_at(const struct
>     >     dp_packet *, size_t offset,
>     >                                       size_t size);
>     >      static inline void *dp_packet_at_assert(const struct dp_packet *,
>     >                                              size_t offset, size_t
>     size);
>     >     +
>     >     +static inline void
>     >     +dp_packet_copy_from_offset(const struct dp_packet *b, size_t
>     offset,
>     >     +                           size_t size, void *buf);
>     >     +
>     >      #ifdef DPDK_NETDEV
>     >      static inline const struct rte_mbuf *
>     >      dp_packet_mbuf_from_offset(const struct dp_packet *b, size_t
>     *offset);
>     >     @@ -175,21 +194,22 @@ void *dp_packet_steal_data(struct
>     dp_packet *);
>     >      static inline bool dp_packet_equal(const struct dp_packet *,
>     >                                         const struct dp_packet *);
>     >
>     >     +static inline ssize_t
>     >     +dp_packet_read_data(const struct dp_packet *b, size_t offset,
>     >     size_t size,
>     >     +                    void **ptr, void *buf);
>     >     +
>     >     +
>     >
>     >      /* Frees memory that 'b' points to, as well as 'b' itself. */
>     >      static inline void
>     >      dp_packet_delete(struct dp_packet *b)
>     >      {
>     >          if (b) {
>     >     -        if (b->source == DPBUF_DPDK) {
>     >     -            /* If this dp_packet was allocated by DPDK it
>     must have
>     >     been
>     >     -             * created as a dp_packet */
>     >     -            free_dpdk_buf((struct dp_packet*) b);
>     >     -            return;
>     >     -        }
>     >     -
>     >              dp_packet_uninit(b);
>     >     -        free(b);
>     >     +
>     >     +        if (b->source != DPBUF_DPDK) {
>     >     +            free(b);
>     >     +        }
>     >          }
>     >      }
>     >
>     >     @@ -205,7 +225,9 @@ dp_packet_copy_common_members(struct dp_packet
>     >     *new_b,
>     >      }
>     >
>     >      /* If 'b' contains at least 'offset + size' bytes of data,
>     returns
>     >     a pointer to
>     >     - * byte 'offset'.  Otherwise, returns a null pointer. */
>     >     + * byte 'offset'.  Otherwise, returns a null pointer. For DPDK
>     >     packets, this
>     >     + * means the 'offset' + 'size' must fall within the same mbuf
>     (not
>     >     necessarily
>     >     + * the first mbuf), otherwise null is returned */
>     >      static inline void *
>     >      dp_packet_at(const struct dp_packet *b, size_t offset, size_t
>     size)
>     >      {
>     >     @@ -215,18 +237,15 @@ dp_packet_at(const struct dp_packet *b,
>     size_t
>     >     offset, size_t size)
>     >
>     >      #ifdef DPDK_NETDEV
>     >          if (b->source == DPBUF_DPDK) {
>     >     -        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *,
>     &b->mbuf);
>     >     +        const struct rte_mbuf *mbuf =
>     dp_packet_mbuf_from_offset(b,
>     >     &offset);
>     >
>     >     -        while (buf && offset > buf->data_len) {
>     >     -            offset -= buf->data_len;
>     >     -
>     >     -            buf = buf->next;
>     >     +        if (!mbuf || offset + size > mbuf->data_len) {
>     >     +            return NULL;
>     >              }
>     >
>     >     -        return buf ? rte_pktmbuf_mtod_offset(buf, char *,
>     offset) :
>     >     NULL;
>     >     +        return rte_pktmbuf_mtod_offset(mbuf, char *, offset);
>     >          }
>     >      #endif
>     >     -
>     >          return (char *) dp_packet_data(b) + offset;
>     >      }
>     >
>     >     @@ -325,20 +344,24 @@ dp_packet_pull(struct dp_packet *b,
>     size_t size)
>     >          return data;
>     >      }
>     >
>     >     -#ifdef DPDK_NETDEV
>     >      /* Similar to dp_packet_try_pull() but doesn't actually pull any
>     >     data, only
>     >     - * checks if it could and returns true or false accordingly.
>     >     - *
>     >     - * Valid for dp_packets carrying mbufs only. */
>     >     + * checks if it could and returns 'true' or 'false', accordingly.
>     >     For DPDK
>     >     + * packets, 'true' is only returned in case the 'offset' + 'size'
>     >     falls within
>     >     + * the first mbuf, otherwise 'false' is returned */
>     >      static inline bool
>     >     -dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
>     >     -    if (size > b->mbuf.data_len) {
>     >     +dp_packet_may_pull(const struct dp_packet *b, uint16_t offset,
>     >     size_t size)
>     >     +{
>     >     +    if (offset == UINT16_MAX) {
>     >     +        return false;
>     >     +    }
>     >     +#ifdef DPDK_NETDEV
>     >     +    /* Offset needs to be within the first mbuf */
>     >     +    if (offset + size > b->mbuf.data_len) {
>     >              return false;
>     >          }
>     >     -
>     >     -    return true;
>     >     -}
>     >      #endif
>     >     +    return (offset + size > dp_packet_size(b)) ? false : true;
>     >     +}
>     >
>     >      /* If 'b' has at least 'size' bytes of data, removes that many
>     >     bytes from the
>     >       * head end of 'b' and returns the first byte removed. 
>     Otherwise,
>     >     returns a
>     >     @@ -347,7 +370,7 @@ static inline void *
>     >      dp_packet_try_pull(struct dp_packet *b, size_t size)
>     >      {
>     >      #ifdef DPDK_NETDEV
>     >     -    if (!dp_packet_mbuf_may_pull(b, size)) {
>     >     +    if (!dp_packet_may_pull(b, 0, size)) {
>     >              return NULL;
>     >          }
>     >      #endif
>     >     @@ -356,6 +379,39 @@ dp_packet_try_pull(struct dp_packet *b,
>     size_t
>     >     size)
>     >              ? dp_packet_pull(b, size) : NULL;
>     >      }
>     >
>     >     +/* Reads 'size' bytes from 'offset' in 'b', linearly, to
>     'ptr', if
>     >     'buf' is
>     >     + * NULL. Otherwise, if a 'buf' is provided, it must have 'size'
>     >     bytes, and the
>     >     + * data will be copied there, iff it is found to be
>     non-linear. */
>     >     +static inline ssize_t
>     >     +dp_packet_read_data(const struct dp_packet *b, size_t offset,
>     >     size_t size,
>     >     +                    void **ptr, void *buf) {
>     >     +    /* Zero copy */
>     >     +    if ((*ptr = dp_packet_at(b, offset, size)) != NULL) {
>     >     +        return 0;
>     >     +    }
>     >     +
>     >     +    /* Copy available linear data */
>     >     +    if (buf == NULL) {
>     >     +#ifdef DPDK_NETDEV
>     >     +        size_t mofs = offset;
>     >     +        const struct rte_mbuf *mbuf =
>     dp_packet_mbuf_from_offset(b,
>     >     &mofs);
>     >     +        *ptr = dp_packet_at(b, offset, mbuf->data_len - mofs);
>     >     +
>     >     +        return size - (mbuf->data_len - mofs);
>     >     +#else
>     >     +        /* Non-DPDK dp_packets should always hit the above
>     condition */
>     >     +        ovs_assert(1);
>     >     +#endif
>     >     +    }
>     >     +
>     >     +    /* Copy all data */
>     >     +
>     >     +    *ptr = buf;
>     >     +    dp_packet_copy_from_offset(b, offset, size, buf);
>     >     +
>     >     +    return 0;
>     >     +}
>     >     +
>     >      static inline bool
>     >      dp_packet_is_eth(const struct dp_packet *b)
>     >      {
>     >     @@ -398,12 +454,6 @@ dp_packet_set_l2_pad_size(struct
>     dp_packet *b,
>     >     uint8_t pad_size)
>     >      static inline void *
>     >      dp_packet_l2_5(const struct dp_packet *b)
>     >      {
>     >     -#ifdef DPDK_NETDEV
>     >     -    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
>     >     -        return NULL;
>     >     -    }
>     >     -#endif
>     >     -
>     >          return b->l2_5_ofs != UINT16_MAX
>     >                 ? (char *) dp_packet_data(b) + b->l2_5_ofs
>     >                 : NULL;
>     >     @@ -420,12 +470,6 @@ dp_packet_set_l2_5(struct dp_packet *b,
>     void *l2_5)
>     >      static inline void *
>     >      dp_packet_l3(const struct dp_packet *b)
>     >      {
>     >     -#ifdef DPDK_NETDEV
>     >     -    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
>     >     -        return NULL;
>     >     -    }
>     >     -#endif
>     >     -
>     >          return b->l3_ofs != UINT16_MAX
>     >                 ? (char *) dp_packet_data(b) + b->l3_ofs
>     >                 : NULL;
>     >     @@ -437,15 +481,29 @@ dp_packet_set_l3(struct dp_packet *b,
>     void *l3)
>     >          b->l3_ofs = l3 ? (char *) l3 - (char *) dp_packet_data(b) :
>     >     UINT16_MAX;
>     >      }
>     >
>     >     -static inline void *
>     >     -dp_packet_l4(const struct dp_packet *b)
>     >     +/* Returns the size of the l3 header. Caller must make sure both
>     >     l3_ofs and
>     >     + * l4_ofs are set*/
>     >     +static inline size_t
>     >     +dp_packet_l3h_size(const struct dp_packet *b)
>     >      {
>     >     -#ifdef DPDK_NETDEV
>     >     -    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>     >     -        return NULL;
>     >     +    return b->l4_ofs - b->l3_ofs;
>     >     +}
>     >     +
>     >     +static inline size_t
>     >     +dp_packet_l3_size(const struct dp_packet *b)
>     >     +{
>     >     +    if (!dp_packet_may_pull(b, b->l3_ofs, 0)) {
>     >     +        return 0;
>     >          }
>     >     -#endif
>     >
>     >     +    size_t l3_size = dp_packet_size(b) - b->l3_ofs;
>     >     +
>     >     +    return l3_size - dp_packet_l2_pad_size(b);
>     >     +}
>     >     +
>     >     +static inline void *
>     >     +dp_packet_l4(const struct dp_packet *b)
>     >     +{
>     >          return b->l4_ofs != UINT16_MAX
>     >                 ? (char *) dp_packet_data(b) + b->l4_ofs
>     >                 : NULL;
>     >     @@ -460,31 +518,13 @@ dp_packet_set_l4(struct dp_packet *b,
>     void *l4)
>     >      static inline size_t
>     >      dp_packet_l4_size(const struct dp_packet *b)
>     >      {
>     >     -#ifdef DPDK_NETDEV
>     >     -    if (b->source == DPBUF_DPDK) {
>     >     -        if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>     >     -            return 0;
>     >     -        }
>     >     -
>     >     -        struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *,
>     >     &b->mbuf);
>     >     -        size_t l4_size = mbuf->data_len - b->l4_ofs;
>     >     -
>     >     -        mbuf = mbuf->next;
>     >     -        while (mbuf) {
>     >     -            l4_size += mbuf->data_len;
>     >     -
>     >     -            mbuf = mbuf->next;
>     >     -        }
>     >     +    if (!dp_packet_may_pull(b, b->l4_ofs, 0)) {
>     >     +        return 0;
>     >     +    }
>     >
>     >     -        l4_size -= dp_packet_l2_pad_size(b);
>     >     +    size_t l4_size = dp_packet_size(b) - b->l4_ofs;
>     >
>     >     -        return l4_size;
>     >     -    }
>     >     -#endif
>     >     -    return b->l4_ofs != UINT16_MAX
>     >     -        ? (const char *)dp_packet_tail(b) - (const char
>     >     *)dp_packet_l4(b)
>     >     -        - dp_packet_l2_pad_size(b)
>     >     -        : 0;
>     >     +    return l4_size - dp_packet_l2_pad_size(b);
>     >      }
>     >
>     >      static inline const void *
>     >     @@ -497,7 +537,8 @@ dp_packet_get_tcp_payload(const struct
>     dp_packet *b)
>     >              int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
>     >
>     >              if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <=
>     >     l4_size)) {
>     >     -            return (const char *)tcp + tcp_len;
>     >     +            tcp = dp_packet_at(b, b->l4_ofs, tcp_len);
>     >     +            return (tcp == NULL) ? NULL : tcp + tcp_len;
>     >              }
>     >          }
>     >          return NULL;
>     >     @@ -507,21 +548,24 @@ static inline const void *
>     >      dp_packet_get_udp_payload(const struct dp_packet *b)
>     >      {
>     >          return OVS_LIKELY(dp_packet_l4_size(b) >= UDP_HEADER_LEN)
>     >     -        ? (const char *)dp_packet_l4(b) + UDP_HEADER_LEN : NULL;
>     >     +        ? (const char *) dp_packet_l4(b) + UDP_HEADER_LEN
>     >     +        : NULL;
>     >      }
>     >
>     >      static inline const void *
>     >      dp_packet_get_sctp_payload(const struct dp_packet *b)
>     >      {
>     >          return OVS_LIKELY(dp_packet_l4_size(b) >= SCTP_HEADER_LEN)
>     >     -        ? (const char *)dp_packet_l4(b) + SCTP_HEADER_LEN : NULL;
>     >     +        ? (const char *) dp_packet_l4(b) + SCTP_HEADER_LEN
>     >     +        : NULL;
>     >      }
>     >
>     >      static inline const void *
>     >      dp_packet_get_icmp_payload(const struct dp_packet *b)
>     >      {
>     >          return OVS_LIKELY(dp_packet_l4_size(b) >= ICMP_HEADER_LEN)
>     >     -        ? (const char *)dp_packet_l4(b) + ICMP_HEADER_LEN : NULL;
>     >     +        ? (const char *) dp_packet_l4(b) + ICMP_HEADER_LEN
>     >     +        : NULL;
>     >      }
>     >
>     >      static inline const void *
>     >     @@ -769,6 +813,7 @@ dp_packet_mbuf_init(struct dp_packet *p)
>     >          p->mbuf.ol_flags = p->mbuf.tx_offload =
>     p->mbuf.packet_type = 0;
>     >          p->mbuf.nb_segs = 1;
>     >          p->mbuf.next = NULL;
>     >     +    p->mstate = NULL;
>     >      }
>     >
>     >      static inline bool
>     >     @@ -817,6 +862,66 @@ dp_packet_has_flow_mark(struct dp_packet *p,
>     >     uint32_t *mark)
>     >          return false;
>     >      }
>     >
>     >     +static inline void
>     >     +dp_packet_copy_from_offset(const struct dp_packet *b, size_t
>     offset,
>     >     +                           size_t size, void *buf) {
>     >     +    if (dp_packet_is_linear(b)) {
>     >     +        memcpy(buf, (char *)dp_packet_data(b) + offset, size);
>     >     +    } else {
>     >     +        const struct rte_mbuf *mbuf =
>     dp_packet_mbuf_from_offset(b,
>     >     &offset);
>     >     +        rte_pktmbuf_read(mbuf, offset, size, buf);
>     >     +    }
>     >     +}
>     >     +
>     >     +static inline bool
>     >     +dp_packet_is_linear(const struct dp_packet *b)
>     >     +{
>     >     +    if (b->source == DPBUF_DPDK) {
>     >     +        return rte_pktmbuf_is_contiguous(&b->mbuf);
>     >
>     >
>     > check seems lightweight when  RTE_LIBRTE_MBUF_DEBUG is off (by
>     default)
>     >
>     > static inline int rte_pktmbuf_is_contiguous
>     >
>     
> <https://doc.dpdk.org/api/rte__mbuf_8h.html#a511a116ae4822037d4f1fb561aa4ffcf>(const
>     > struct rte_mbuf <https://doc.dpdk.org/api/structrte__mbuf.html> *m)
>     > 2100 {
>     > 2101  __rte_mbuf_sanity_check
>     >
>     
> <https://doc.dpdk.org/api/rte__mbuf_8h.html#a93da89e4fcf908c0615bcc71c2998413>(m,
>     > 1);
>     > 2102  return !!(m->nb_segs
>     >
>     
> <https://doc.dpdk.org/api/structrte__mbuf.html#a44fb29a9283000c42a77b76e9ef0b070>
>     > == 1);
>     > 2103 }
>     >
>     >
>     > #ifdef RTE_LIBRTE_MBUF_DEBUG
>     > 867 
>     > 869 #define __rte_mbuf_sanity_check(m, is_h)
>     rte_mbuf_sanity_check(m, is_h)
>     > 870 
>     > 871 #else /* RTE_LIBRTE_MBUF_DEBUG */
>     > 872 
>     > 874
>     >
>     
> <https://doc.dpdk.org/api/rte__mbuf_8h.html#a93da89e4fcf908c0615bcc71c2998413>
>  #define
>     > __rte_mbuf_sanity_check(m, is_h) do { } while (0)
>     >
>     >
>     >
>     > /* do some sanity checks on a mbuf: panic if it fails */ void
>     > rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header) { const
>     > struct rte_mbuf *m_seg; unsigned int nb_segs; if (m == NULL)
>     > rte_panic("mbuf is NULL\n"); /* generic checks */ if (m->pool == NULL)
>     > rte_panic("bad mbuf pool\n"); if (m->buf_iova == 0) rte_panic("bad IO
>     > addr\n"); if (m->buf_addr == NULL) rte_panic("bad virt addr\n");
>     > uint16_t cnt = rte_mbuf_refcnt_read(m); if ((cnt == 0) || (cnt ==
>     > UINT16_MAX)) rte_panic("bad ref cnt\n"); /* nothing to check for
>     > sub-segments */ if (is_header == 0) return; nb_segs = m->nb_segs;
>     m_seg
>     > = m; while (m_seg && nb_segs != 0) { m_seg = m_seg->next; nb_segs--; }
>     > if (nb_segs != 0) rte_panic("bad nb_segs\n"); }
>     >
>     >  
>     >
>     >     +    }
>     >     +
>     >     +    return true;
>     >     +}
>     >     +
>     >     +/* Linearizes the data on packet 'b', by copying the data into
>     >     system's memory.
>     >     + * After this the packet is effectively a DPBUF_MALLOC
>     packet. The
>     >     caller is
>     >     + * responsible * for ensuring 'b' needs linearization, by calling
>     >     + * dp_packet_is_linear().
>     >     + *
>     >     + * This is an expensive operation which should only be
>     performed as
>     >     a last
>     >     + * resort, when multi-segments are under use but data must be
>     accessed
>     >     + * linearly. */
>     >     +static inline void
>     >     +dp_packet_linearize(struct dp_packet *b)
>     >     +{
>     >     +    struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *,
>     &b->mbuf);
>     >     +    struct dp_packet *pkt = CONST_CAST(struct dp_packet *, b);
>     >     +    uint32_t pkt_len = dp_packet_size(pkt);
>     >     +    struct mbuf_state *mstate = NULL;
>     >     +    void *dst = xmalloc(pkt_len);
>     >     +
>     >     +    /* Copy packet's data to system's memory */
>     >     +    if (!rte_pktmbuf_read(mbuf, 0, pkt_len, dst)) {
>     >     +        return;
>     >     +    }
>     >     +
>     >     +    /* Free all mbufs except for the first */
>     >     +    dp_packet_clear(pkt);
>     >     +
>     >     +    /* Save mbuf's buf_addr to restore later */
>     >     +    mstate = xmalloc(sizeof(*mstate));
>     >     +    mstate->addr = pkt->mbuf.buf_addr;
>     >     +    mstate->len = pkt->mbuf.buf_len;
>     >     +    mstate->off = pkt->mbuf.data_off;
>     >     +    pkt->mstate = mstate;
>     >     +
>     >     +    /* Tranform DPBUF_DPDK packet into a DPBUF_MALLOC packet */
>     >     +    pkt->source = DPBUF_MALLOC;
>     >     +    pkt->mbuf.buf_addr = dst;
>     >     +    pkt->mbuf.buf_len = pkt_len;
>     >     +    pkt->mbuf.data_off = 0;
>     >     +    dp_packet_set_size(pkt, pkt_len);
>     >     +}
>     >      #else /* DPDK_NETDEV */
>     >      static inline bool
>     >      dp_packet_equal(const struct dp_packet *a, const struct
>     dp_packet *b)
>     >     @@ -945,6 +1050,24 @@ dp_packet_has_flow_mark(struct dp_packet *p
>     >     OVS_UNUSED,
>     >      {
>     >          return false;
>     >      }
>     >     +
>     >     +static inline void
>     >     +dp_packet_copy_from_offset(const struct dp_packet *b, size_t
>     offset,
>     >     +                           size_t size, void *buf)
>     >     +{
>     >     +    memcpy(buf, (char *)dp_packet_data(b) + offset, size);
>     >     +}
>     >     +
>     >     +static inline bool
>     >     +dp_packet_is_linear(const struct dp_packet *b OVS_UNUSED)
>     >     +{
>     >     +    return true;
>     >     +}
>     >     +
>     >     +static inline void
>     >     +dp_packet_linearize(struct dp_packet *b OVS_UNUSED)
>     >     +{
>     >     +}
>     >      #endif /* DPDK_NETDEV */
>     >
>     >      static inline void
>     >     diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>     >     index 1564db9..1f1188d 100644
>     >     --- a/lib/dpif-netdev.c
>     >     +++ b/lib/dpif-netdev.c
>     >     @@ -5710,6 +5710,11 @@ dp_netdev_upcall(struct
>     dp_netdev_pmd_thread
>     >     *pmd, struct dp_packet *packet_,
>     >                  .support = dp_netdev_support,
>     >              };
>     >
>     >     +        /* Gather the whole data for printing the packet (if
>     debug
>     >     enabled) */
>     >     +        if (!dp_packet_is_linear(packet_)) {
>     >     +            dp_packet_linearize(packet_);
>     >     +        }
>     >     +
>     >              ofpbuf_init(&key, 0);
>     >              odp_flow_key_from_flow(&odp_parms, &key);
>     >              packet_str = ofp_dp_packet_to_string(packet_);
>     >     @@ -5954,6 +5959,7 @@ dfc_processing(struct
>     dp_netdev_pmd_thread *pmd,
>     >          bool smc_enable_db;
>     >          size_t map_cnt = 0;
>     >          bool batch_enable = true;
>     >     +    int error;
>     >
>     >          atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
>     >          atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
>     >     @@ -6001,7 +6007,12 @@ dfc_processing(struct
>     dp_netdev_pmd_thread *pmd,
>     >                  }
>     >              }
>     >
>     >     -        miniflow_extract(packet, &key->mf);
>     >     +        error = miniflow_extract(packet, &key->mf);
>     >     +        if (OVS_UNLIKELY(error)) {
>     >     +            dp_packet_delete(packet);
>     >     +            continue;
>     >     +        }
>     >     +
>     >              key->len = 0; /* Not computed yet. */
>     >              /* If EMC and SMC disabled skip hash computation */
>     >              if (smc_enable_db == true || cur_min != 0) {
>     >     diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>     >     index 0347060..54f60db 100644
>     >     --- a/lib/dpif-netlink.c
>     >     +++ b/lib/dpif-netlink.c
>     >     @@ -1849,6 +1849,11 @@ dpif_netlink_operate__(struct
>     dpif_netlink *dpif,
>     >                      }
>     >                      n_ops = i;
>     >                  } else {
>     >     +                /* We will need to pass the whole to encode the
>     >     message */
>     >     +                if (!dp_packet_is_linear(op->execute.packet)) {
>     >     +                    dp_packet_linearize(op->execute.packet);
>     >     +                }
>     >     +
>     >                      dpif_netlink_encode_execute(dpif->dp_ifindex,
>     >     &op->execute,
>     >                                                  &aux->request);
>     >                  }
>     >     diff --git a/lib/dpif.c b/lib/dpif.c
>     >     index e35f111..b04c4a0 100644
>     >     --- a/lib/dpif.c
>     >     +++ b/lib/dpif.c
>     >     @@ -1244,6 +1244,7 @@ dpif_execute_helper_cb(void *aux_, struct
>     >     dp_packet_batch *packets_,
>     >              execute.probe = false;
>     >              execute.mtu = 0;
>     >              aux->error = dpif_execute(aux->dpif, &execute);
>     >     +
>     >              log_execute_message(aux->dpif, &this_module, &execute,
>     >                                  true, aux->error);
>     >
>     >     @@ -1407,6 +1408,7 @@ dpif_operate(struct dpif *dpif, struct
>     dpif_op
>     >     **ops, size_t n_ops,
>     >
>     >                      case DPIF_OP_EXECUTE:
>     >                          COVERAGE_INC(dpif_execute);
>     >     +
>     >                          log_execute_message(dpif, &this_module,
>     >     &op->execute,
>     >                                              false, error);
>     >                          break;
>     >     @@ -1834,6 +1836,13 @@ log_execute_message(const struct dpif
>     *dpif,
>     >              uint64_t stub[1024 / 8];
>     >              struct ofpbuf md = OFPBUF_STUB_INITIALIZER(stub);
>     >
>     >     +        /* We will need the whole data for logging */
>     >     +        struct dp_packet *p = CONST_CAST(struct dp_packet *,
>     >     +                                         execute->packet);
>     >     +        if (!dp_packet_is_linear(p)) {
>     >     +            dp_packet_linearize(p);
>     >     +        }
>     >     +
>     >              packet =
>     ofp_packet_to_string(dp_packet_data(execute->packet),
>     >                                          
>      dp_packet_size(execute->packet),
>     >                                          
>      execute->packet->packet_type);
>     >     diff --git a/lib/flow.c b/lib/flow.c
>     >     index c60446f..9d3e788 100644
>     >     --- a/lib/flow.c
>     >     +++ b/lib/flow.c
>     >     @@ -694,7 +694,7 @@ ipv6_sanity_check(const struct
>     >     ovs_16aligned_ip6_hdr *nh, size_t size)
>     >
>     >      /* Caller is responsible for initializing 'dst' with enough
>     storage for
>     >       * FLOW_U64S * 8 bytes. */
>     >     -void
>     >     +int
>     >      miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>     >      {
>     >          const struct pkt_metadata *md = &packet->md;
>     >     @@ -816,6 +816,13 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >              int ip_len;
>     >              uint16_t tot_len;
>     >
>     >     +        /* Check if header is in first mbuf, otherwise return
>     error */
>     >     +        if (!dp_packet_is_linear(packet)) {
>     >     +            if (!dp_packet_may_pull(packet, packet->l3_ofs,
>     sizeof
>     >     *nh)) {
>     >     +                return -1;
>     >     +            }
>     >     +        }
>     >     +
>     >              if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len,
>     >     &tot_len))) {
>     >                  goto out;
>     >              }
>     >     @@ -846,6 +853,12 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >              ovs_be32 tc_flow;
>     >              uint16_t plen;
>     >
>     >     +        if (!dp_packet_is_linear(packet)) {
>     >     +            if (!dp_packet_may_pull(packet, packet->l3_ofs,
>     sizeof
>     >     *nh)) {
>     >     +                return -1;
>     >     +            }
>     >     +        }
>     >     +
>     >              if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
>     >                  goto out;
>     >              }
>     >     @@ -889,6 +902,14 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >              if (dl_type == htons(ETH_TYPE_ARP) ||
>     >                  dl_type == htons(ETH_TYPE_RARP)) {
>     >                  struct eth_addr arp_buf[2];
>     >     +
>     >     +            if (!dp_packet_is_linear(packet)) {
>     >     +                if (!dp_packet_may_pull(packet, packet->l3_ofs,
>     >     +                                        ARP_ETH_HEADER_LEN)) {
>     >     +                    return -1;
>     >     +                }
>     >     +            }
>     >     +
>     >                  const struct arp_eth_header *arp = (const struct
>     >     arp_eth_header *)
>     >                      data_try_pull(&data, &size, ARP_ETH_HEADER_LEN);
>     >
>     >     @@ -936,6 +957,13 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >                  if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
>     >                      const struct tcp_header *tcp = data;
>     >
>     >     +                if (!dp_packet_is_linear(packet)) {
>     >     +                    if (!dp_packet_may_pull(packet,
>     packet->l4_ofs,
>     >     +                                            TCP_HEADER_LEN)) {
>     >     +                        return -1;
>     >     +                    }
>     >     +                }
>     >     +
>     >                      miniflow_push_be32(mf, arp_tha.ea[2], 0);
>     >                      miniflow_push_be32(mf, tcp_flags,
>     >                                         TCP_FLAGS_BE32(tcp->tcp_ctl));
>     >     @@ -948,6 +976,13 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >                  if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
>     >                      const struct udp_header *udp = data;
>     >
>     >     +                if (!dp_packet_is_linear(packet)) {
>     >     +                    if (!dp_packet_may_pull(packet,
>     packet->l4_ofs,
>     >     +                                            UDP_HEADER_LEN)) {
>     >     +                        return -1;
>     >     +                    }
>     >     +                }
>     >     +
>     >                      miniflow_push_be16(mf, tp_src, udp->udp_src);
>     >                      miniflow_push_be16(mf, tp_dst, udp->udp_dst);
>     >                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>     >     @@ -957,6 +992,13 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >                  if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
>     >                      const struct sctp_header *sctp = data;
>     >
>     >     +                if (!dp_packet_is_linear(packet)) {
>     >     +                    if (!dp_packet_may_pull(packet,
>     packet->l4_ofs,
>     >     +                                            SCTP_HEADER_LEN)) {
>     >     +                        return -1;
>     >     +                    }
>     >     +                }
>     >     +
>     >                      miniflow_push_be16(mf, tp_src, sctp->sctp_src);
>     >                      miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
>     >                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>     >     @@ -966,6 +1008,13 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >                  if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
>     >                      const struct icmp_header *icmp = data;
>     >
>     >     +                if (!dp_packet_is_linear(packet)) {
>     >     +                    if (!dp_packet_may_pull(packet,
>     packet->l4_ofs,
>     >     +                                            ICMP_HEADER_LEN)) {
>     >     +                        return -1;
>     >     +                    }
>     >     +                }
>     >     +
>     >                      miniflow_push_be16(mf, tp_src,
>     htons(icmp->icmp_type));
>     >                      miniflow_push_be16(mf, tp_dst,
>     htons(icmp->icmp_code));
>     >                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>     >     @@ -975,6 +1024,13 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >                  if (OVS_LIKELY(size >= IGMP_HEADER_LEN)) {
>     >                      const struct igmp_header *igmp = data;
>     >
>     >     +                if (!dp_packet_is_linear(packet)) {
>     >     +                    if (!dp_packet_may_pull(packet,
>     packet->l4_ofs,
>     >     +                                            IGMP_HEADER_LEN)) {
>     >     +                        return -1;
>     >     +                    }
>     >     +                }
>     >     +
>     >                      miniflow_push_be16(mf, tp_src,
>     htons(igmp->igmp_type));
>     >                      miniflow_push_be16(mf, tp_dst,
>     htons(igmp->igmp_code));
>     >                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>     >     @@ -987,8 +1043,18 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >                  if (OVS_LIKELY(size >= sizeof(struct icmp6_hdr))) {
>     >                      const struct in6_addr *nd_target;
>     >                      struct eth_addr arp_buf[2];
>     >     -                const struct icmp6_hdr *icmp =
>     data_pull(&data, &size,
>     >     -                                                       
>      sizeof *icmp);
>     >     +                const struct icmp6_hdr *icmp;
>     >     +
>     >     +                if (!dp_packet_is_linear(packet)) {
>     >     +                    if (!dp_packet_may_pull(packet,
>     packet->l4_ofs,
>     >     +                                            sizeof *icmp)) {
>     >     +                        return -1;
>     >     +                    }
>     >     +                }
>     >     +
>     >     +                icmp = data_pull(&data, &size, sizeof *icmp);
>     >     +
>     >     +
>     >                      if (parse_icmpv6(&data, &size, icmp, &nd_target,
>     >     arp_buf)) {
>     >                          if (nd_target) {
>     >                              miniflow_push_words(mf, nd_target,
>     nd_target,
>     >     @@ -1011,6 +1077,7 @@ miniflow_extract(struct dp_packet *packet,
>     >     struct miniflow *dst)
>     >          }
>     >       out:
>     >          dst->map = mf.map;
>     >     +    return 0;
>     >      }
>     >
>     >      ovs_be16
>     >     @@ -3007,7 +3074,8 @@ static void
>     >      flow_compose_l4_csum(struct dp_packet *p, const struct flow
>     *flow,
>     >                           uint32_t pseudo_hdr_csum)
>     >      {
>     >     -    size_t l4_len = (char *) dp_packet_tail(p) - (char *)
>     >     dp_packet_l4(p);
>     >     +    //size_t l4_len = (char *) dp_packet_tail(p) - (char *)
>     >     dp_packet_l4(p);
>     >     +    size_t l4_len = dp_packet_l4_size(p);
>     >
>     >          if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
>     >              || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
>     >     @@ -3015,30 +3083,31 @@ flow_compose_l4_csum(struct dp_packet *p,
>     >     const struct flow *flow,
>     >                  struct tcp_header *tcp = dp_packet_l4(p);
>     >
>     >                  tcp->tcp_csum = 0;
>     >     -            tcp->tcp_csum =
>     csum_finish(csum_continue(pseudo_hdr_csum,
>     >     -                                                      tcp,
>     l4_len));
>     >     +            tcp->tcp_csum = csum_finish(
>     >     +                packet_csum_continue(p, pseudo_hdr_csum,
>     p->l4_ofs,
>     >     l4_len));
>     >              } else if (flow->nw_proto == IPPROTO_UDP) {
>     >                  struct udp_header *udp = dp_packet_l4(p);
>     >
>     >                  udp->udp_csum = 0;
>     >     -            udp->udp_csum =
>     csum_finish(csum_continue(pseudo_hdr_csum,
>     >     -                                                      udp,
>     l4_len));
>     >     +            udp->udp_csum = csum_finish(
>     >     +                packet_csum_continue(p, pseudo_hdr_csum,
>     p->l4_ofs,
>     >     l4_len));
>     >              } else if (flow->nw_proto == IPPROTO_ICMP) {
>     >                  struct icmp_header *icmp = dp_packet_l4(p);
>     >
>     >                  icmp->icmp_csum = 0;
>     >     -            icmp->icmp_csum = csum(icmp, l4_len);
>     >     +            icmp->icmp_csum = packet_csum(p, p->l4_ofs, l4_len);
>     >              } else if (flow->nw_proto == IPPROTO_IGMP) {
>     >                  struct igmp_header *igmp = dp_packet_l4(p);
>     >
>     >                  igmp->igmp_csum = 0;
>     >     -            igmp->igmp_csum = csum(igmp, l4_len);
>     >     +            igmp->igmp_csum = packet_csum(p, p->l4_ofs, l4_len);
>     >              } else if (flow->nw_proto == IPPROTO_ICMPV6) {
>     >                  struct icmp6_hdr *icmp = dp_packet_l4(p);
>     >
>     >                  icmp->icmp6_cksum = 0;
>     >                  icmp->icmp6_cksum = (OVS_FORCE uint16_t)
>     >     -                csum_finish(csum_continue(pseudo_hdr_csum, icmp,
>     >     l4_len));
>     >     +                csum_finish(packet_csum_continue(p,
>     >     pseudo_hdr_csum, p->l4_ofs,
>     >     +                            l4_len));
>     >              }
>     >          }
>     >      }
>     >     @@ -3064,12 +3133,12 @@ packet_expand(struct dp_packet *p, const
>     >     struct flow *flow, size_t size)
>     >              eth->eth_type = htons(dp_packet_size(p));
>     >          } else if (dl_type_is_ip_any(flow->dl_type)) {
>     >              uint32_t pseudo_hdr_csum;
>     >     -        size_t l4_len = (char *) dp_packet_tail(p) - (char *)
>     >     dp_packet_l4(p);
>     >     +        size_t l4_len = dp_packet_l4_size(p);
>     >
>     >              if (flow->dl_type == htons(ETH_TYPE_IP)) {
>     >                  struct ip_header *ip = dp_packet_l3(p);
>     >
>     >     -            ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs +
>     l4_len);
>     >     +            ip->ip_tot_len = htons(dp_packet_l3_size(p));
>     >                  ip->ip_csum = 0;
>     >                  ip->ip_csum = csum(ip, sizeof *ip);
>     >
>     >     @@ -3153,7 +3222,7 @@ flow_compose(struct dp_packet *p, const
>     struct
>     >     flow *flow,
>     >              l4_len = flow_compose_l4(p, flow, l7, l7_len);
>     >
>     >              ip = dp_packet_l3(p);
>     >     -        ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
>     >     +        ip->ip_tot_len = htons(dp_packet_l3_size(p));
>     >              /* Checksum has already been zeroed by put_zeros call. */
>     >              ip->ip_csum = csum(ip, sizeof *ip);
>     >
>     >     diff --git a/lib/flow.h b/lib/flow.h
>     >     index 5ebdb1f..75b0062 100644
>     >     --- a/lib/flow.h
>     >     +++ b/lib/flow.h
>     >     @@ -539,7 +539,7 @@ struct pkt_metadata;
>     >      /* The 'dst' must follow with buffer space for FLOW_U64S
>     64-bit units.
>     >       * 'dst->map' is ignored on input and set on output to indicate
>     >     which fields
>     >       * were extracted. */
>     >     -void miniflow_extract(struct dp_packet *packet, struct
>     miniflow *dst);
>     >     +int miniflow_extract(struct dp_packet *packet, struct
>     miniflow *dst);
>     >      void miniflow_map_init(struct miniflow *, const struct flow *);
>     >      void flow_wc_map(const struct flow *, struct flowmap *);
>     >      size_t miniflow_alloc(struct miniflow *dsts[], size_t n,
>     >     diff --git a/lib/mcast-snooping.c b/lib/mcast-snooping.c
>     >     index 6730301..875b7a1 100644
>     >     --- a/lib/mcast-snooping.c
>     >     +++ b/lib/mcast-snooping.c
>     >     @@ -455,6 +455,7 @@ mcast_snooping_add_report(struct
>     mcast_snooping *ms,
>     >          if (!igmpv3) {
>     >              return 0;
>     >          }
>     >     +    offset = (char *) igmpv3 - (char *) dp_packet_data(p);
>     >          ngrp = ntohs(igmpv3->ngrp);
>     >          offset += IGMPV3_HEADER_LEN;
>     >          while (ngrp--) {
>     >     @@ -507,6 +508,7 @@ mcast_snooping_add_mld(struct
>     mcast_snooping *ms,
>     >          if (!mld) {
>     >              return 0;
>     >          }
>     >     +    offset = (char *) mld - (char *) dp_packet_data(p);
>     >          ngrp = ntohs(mld->ngrp);
>     >          offset += MLD_HEADER_LEN;
>     >          addr = dp_packet_at(p, offset, sizeof(struct in6_addr));
>     >     diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
>     >     index 46698d5..278c8a9 100644
>     >     --- a/lib/netdev-bsd.c
>     >     +++ b/lib/netdev-bsd.c
>     >     @@ -700,6 +700,11 @@ netdev_bsd_send(struct netdev *netdev_,
>     int qid
>     >     OVS_UNUSED,
>     >          }
>     >
>     >          DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>     >     +        /* We need the whole data to send the packet on the
>     device */
>     >     +        if (!dp_packet_is_linear(packet)) {
>     >     +            dp_packet_linearize(packet);
>     >     +        }
>     >     +
>     >              const void *data = dp_packet_data(packet);
>     >              size_t size = dp_packet_size(packet);
>     >
>     >     diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
>     >     index 72b4f7a..c56c86b 100644
>     >     --- a/lib/netdev-dummy.c
>     >     +++ b/lib/netdev-dummy.c
>     >     @@ -233,7 +233,13 @@ dummy_packet_stream_run(struct netdev_dummy
>     >     *dev, struct dummy_packet_stream *s)
>     >
>     >              ASSIGN_CONTAINER(txbuf_node, ovs_list_front(&s->txq),
>     >     list_node);
>     >              txbuf = txbuf_node->pkt;
>     >     -        retval = stream_send(s->stream, dp_packet_data(txbuf),
>     >     dp_packet_size(txbuf));
>     >     +
>     >     +        if (!dp_packet_is_linear(txbuf)) {
>     >     +            dp_packet_linearize(txbuf);
>     >     +        }
>     >     +
>     >     +        retval = stream_send(s->stream, dp_packet_data(txbuf),
>     >     +                             dp_packet_size(txbuf));
>     >
>     >              if (retval > 0) {
>     >                  dp_packet_pull(txbuf, retval);
>     >     @@ -1087,6 +1093,11 @@ netdev_dummy_send(struct netdev
>     *netdev, int
>     >     qid OVS_UNUSED,
>     >
>     >          struct dp_packet *packet;
>     >          DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
>     >     +        /* We need the whole data to send the packet on the
>     device */
>     >     +        if (!dp_packet_is_linear(packet)) {
>     >     +            dp_packet_linearize(packet);
>     >     +        }
>     >     +
>     >              const void *buffer = dp_packet_data(packet);
>     >              size_t size = dp_packet_size(packet);
>     >
>     >     diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>     >     index f86dcd0..fa79b2a 100644
>     >     --- a/lib/netdev-linux.c
>     >     +++ b/lib/netdev-linux.c
>     >     @@ -1379,6 +1379,11 @@ netdev_linux_sock_batch_send(int sock, int
>     >     ifindex,
>     >
>     >          struct dp_packet *packet;
>     >          DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>     >     +        /* We need the whole data to send the packet on the
>     device */
>     >     +        if (!dp_packet_is_linear(packet)) {
>     >     +            dp_packet_linearize(packet);
>     >     +        }
>     >     +
>     >              iov[i].iov_base = dp_packet_data(packet);
>     >              iov[i].iov_len = dp_packet_size(packet);
>     >              mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
>     >     @@ -1432,8 +1437,14 @@ netdev_linux_tap_batch_send(struct netdev
>     >     *netdev_,
>     >              ssize_t retval;
>     >              int error;
>     >
>     >     +        /* We need the whole data to send the packet on the
>     device */
>     >     +        if (!dp_packet_is_linear(packet)) {
>     >     +            dp_packet_linearize(packet);
>     >     +        }
>     >     +
>     >              do {
>     >     -            retval = write(netdev->tap_fd,
>     dp_packet_data(packet),
>     >     size);
>     >     +            retval = write(netdev->tap_fd,
>     dp_packet_data(packet),
>     >     +                           size);
>     >                  error = retval < 0 ? errno : 0;
>     >              } while (error == EINTR);
>     >
>     >     diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
>     >     index 56baaa2..285b927 100644
>     >     --- a/lib/netdev-native-tnl.c
>     >     +++ b/lib/netdev-native-tnl.c
>     >     @@ -65,7 +65,7 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet
>     >     *packet, struct flow_tnl *tnl,
>     >          void *nh;
>     >          struct ip_header *ip;
>     >          struct ovs_16aligned_ip6_hdr *ip6;
>     >     -    void *l4;
>     >     +    char *l4;
>     >          int l3_size;
>     >
>     >          nh = dp_packet_l3(packet);
>     >     @@ -79,15 +79,15 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet
>     >     *packet, struct flow_tnl *tnl,
>     >
>     >          *hlen = sizeof(struct eth_header);
>     >
>     >     -    l3_size = dp_packet_size(packet) -
>     >     -              ((char *)nh - (char *)dp_packet_data(packet));
>     >     +    l3_size = dp_packet_l3_size(packet);
>     >
>     >          if (IP_VER(ip->ip_ihl_ver) == 4) {
>     >
>     >              ovs_be32 ip_src, ip_dst;
>     >
>     >              if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(packet))) {
>     >     -            if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
>     >     +            if (packet_csum(packet, packet->l3_ofs,
>     >     +                            IP_IHL(ip->ip_ihl_ver) * 4)) {
>     >                      VLOG_WARN_RL(&err_rl, "ip packet has invalid
>     >     checksum");
>     >                      return NULL;
>     >                  }
>     >     @@ -196,10 +196,8 @@ udp_extract_tnl_md(struct dp_packet *packet,
>     >     struct flow_tnl *tnl,
>     >                      csum =
>     packet_csum_pseudoheader(dp_packet_l3(packet));
>     >                  }
>     >
>     >     -            csum = csum_continue(csum, udp,
>     dp_packet_size(packet) -
>     >     -                                 ((const unsigned char *)udp -
>     >     -                                  (const unsigned char
>     >     *)dp_packet_eth(packet)
>     >     -                                 ));
>     >     +            csum = packet_csum_continue(packet, csum,
>     packet->l4_ofs,
>     >     +                                       
>     dp_packet_l4_size(packet));
>     >                  if (csum_finish(csum)) {
>     >                      return NULL;
>     >                  }
>     >     @@ -236,7 +234,7 @@ netdev_tnl_push_udp_header(const struct netdev
>     >     *netdev OVS_UNUSED,
>     >                  csum =
>     >   
>      packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet)));
>     >              }
>     >
>     >     -        csum = csum_continue(csum, udp, ip_tot_size);
>     >     +        csum = packet_csum_continue(packet, csum, packet->l4_ofs,
>     >     ip_tot_size);
>     >              udp->udp_csum = csum_finish(csum);
>     >
>     >              if (!udp->udp_csum) {
>     >     @@ -373,9 +371,8 @@ parse_gre_header(struct dp_packet *packet,
>     >          if (greh->flags & htons(GRE_CSUM)) {
>     >              ovs_be16 pkt_csum;
>     >
>     >     -        pkt_csum = csum(greh, dp_packet_size(packet) -
>     >     -                              ((const unsigned char *)greh -
>     >     -                               (const unsigned char
>     >     *)dp_packet_eth(packet)));
>     >     +        pkt_csum = packet_csum(packet, packet->l4_ofs,
>     >     +                               dp_packet_l4_size(packet));
>     >              if (pkt_csum) {
>     >                  return -EINVAL;
>     >              }
>     >     @@ -448,8 +445,9 @@ netdev_gre_push_header(const struct netdev
>     *netdev,
>     >          greh = netdev_tnl_push_ip_header(packet, data->header,
>     >     data->header_len, &ip_tot_size);
>     >
>     >          if (greh->flags & htons(GRE_CSUM)) {
>     >     -        ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
>     >     -        *csum_opt = csum(greh, ip_tot_size);
>     >     +        greh = dp_packet_l4(packet);
>     >     +        ovs_be16 *csum_opt = (ovs_be16 *) greh;
>     >     +        *csum_opt = packet_csum(packet, packet->l4_ofs,
>     ip_tot_size);
>     >          }
>     >
>     >          if (greh->flags & htons(GRE_SEQ)) {
>     >     diff --git a/lib/odp-execute.c b/lib/odp-execute.c
>     >     index 3b6890e..2c33d6a 100644
>     >     --- a/lib/odp-execute.c
>     >     +++ b/lib/odp-execute.c
>     >     @@ -231,8 +231,16 @@ static void
>     >      odp_set_nd(struct dp_packet *packet, const struct ovs_key_nd
>     *key,
>     >                 const struct ovs_key_nd *mask)
>     >      {
>     >     -    const struct ovs_nd_msg *ns = dp_packet_l4(packet);
>     >     -    const struct ovs_nd_lla_opt *lla_opt =
>     >     dp_packet_get_nd_payload(packet);
>     >     +    const struct ovs_nd_msg *ns;
>     >     +    const struct ovs_nd_lla_opt *lla_opt;
>     >     +
>     >     +    /* To orocess neighbor discovery options, we need the whole
>     >     packet */
>     >     +    if (!dp_packet_is_linear(packet)) {
>     >     +        dp_packet_linearize(packet);
>     >     +    }
>     >     +
>     >     +    ns = dp_packet_l4(packet);
>     >     +    lla_opt = dp_packet_get_nd_payload(packet);
>     >
>     >          if (OVS_LIKELY(ns && lla_opt)) {
>     >              int bytes_remain = dp_packet_l4_size(packet) -
>     sizeof(*ns);
>     >     diff --git a/lib/ovs-lldp.c b/lib/ovs-lldp.c
>     >     index 05c1dd4..39d677a 100644
>     >     --- a/lib/ovs-lldp.c
>     >     +++ b/lib/ovs-lldp.c
>     >     @@ -668,7 +668,8 @@ lldp_process_packet(struct lldp *lldp, const
>     >     struct dp_packet *p)
>     >      {
>     >          if (lldp) {
>     >              lldpd_recv(lldp->lldpd,
>     lldpd_first_hardware(lldp->lldpd),
>     >     -                   (char *) dp_packet_data(p),
>     dp_packet_size(p));
>     >     +                   (char *) dp_packet_data(p),
>     >     +                   dp_packet_size(p));
>     >          }
>     >      }
>     >
>     >     diff --git a/lib/packets.c b/lib/packets.c
>     >     index 2d6f77b..d95c8cc 100644
>     >     --- a/lib/packets.c
>     >     +++ b/lib/packets.c
>     >     @@ -1007,12 +1007,20 @@ packet_rh_present(struct dp_packet
>     *packet,
>     >     uint8_t *nexthdr)
>     >          const struct ovs_16aligned_ip6_hdr *nh;
>     >          size_t len;
>     >          size_t remaining;
>     >     -    uint8_t *data = dp_packet_l3(packet);
>     >     +    uint8_t *data;
>     >
>     >     -    remaining = packet->l4_ofs - packet->l3_ofs;
>     >     +    remaining = dp_packet_l3h_size(packet);
>     >          if (remaining < sizeof *nh) {
>     >              return false;
>     >          }
>     >     +
>     >     +    /* We will need the whole data for processing the headers
>     below */
>     >     +    if (!dp_packet_is_linear(packet)) {
>     >     +        dp_packet_linearize(packet);
>     >
>     >
>     > dp_packet_linearize() could check for dp_packet_is_linear() and if
>     > linear return early without doing anything
> 
>     Sure, that would be another way to go. It has been mentioned before, and
>     this has become the final form because, 1. `dp_packet_linearize()`
>     changes the address space of the packet and would be easy to misuse it,
> 
> 
> That does not make sense; dp_packet_linearize() will do nothing if the
> packet
> is already linear. Presumably the comment would make that clear.
> Would it help, it I wrote the function ?
> 
>  
> 
>     2. I feel this gives a little more context, instead of a single
>     `dp_packet_linearize()` on its own.
> 
> 
> you are adding 4 lines of code in many files all over the place; this
> should not be so intrusive.
> I am trying to find a way this makes sense, but it is getting harder.
> The above comment about consolidating the check in less places applies
> as well.
> 
> 
>     >
>     >  
>     >
>     >     +    }
>     >     +
>     >     +    data = dp_packet_l3(packet);
>     >     +
>     >          nh = ALIGNED_CAST(struct ovs_16aligned_ip6_hdr *, data);
>     >          data += sizeof *nh;
>     >          remaining -= sizeof *nh;
>     >     @@ -1254,12 +1262,12 @@ packet_set_sctp_port(struct dp_packet
>     >     *packet, ovs_be16 src, ovs_be16 dst)
>     >
>     >          old_csum = get_16aligned_be32(&sh->sctp_csum);
>     >          put_16aligned_be32(&sh->sctp_csum, 0);
>     >     -    old_correct_csum = crc32c((void *)sh, tp_len);
>     >     +    old_correct_csum = packet_crc32c(packet, packet->l4_ofs,
>     tp_len);
>     >
>     >          sh->sctp_src = src;
>     >          sh->sctp_dst = dst;
>     >
>     >     -    new_csum = crc32c((void *)sh, tp_len);
>     >     +    new_csum = packet_crc32c(packet, packet->l4_ofs, tp_len);
>     >          put_16aligned_be32(&sh->sctp_csum, old_csum ^
>     old_correct_csum
>     >     ^ new_csum);
>     >      }
>     >
>     >     @@ -1293,6 +1301,11 @@ packet_set_nd(struct dp_packet *packet,
>     const
>     >     struct in6_addr *target,
>     >              return;
>     >          }
>     >
>     >     +    /* To process neighbor discovery options, we need the whole
>     >     packet */
>     >     +    if (!dp_packet_is_linear(packet)) {
>     >     +        dp_packet_linearize(packet);
>     >     +    }
>     >     +
>     >          ns = dp_packet_l4(packet);
>     >          opt = &ns->options[0];
>     >          bytes_remain -= sizeof(*ns);
>     >     @@ -1515,8 +1528,8 @@ compose_nd_ns(struct dp_packet *b, const
>     >     struct eth_addr eth_src,
>     >
>     >          ns->icmph.icmp6_cksum = 0;
>     >          icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
>     >     -    ns->icmph.icmp6_cksum = csum_finish(
>     >     -        csum_continue(icmp_csum, ns, ND_MSG_LEN +
>     ND_LLA_OPT_LEN));
>     >     +    ns->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
>     >     +        b, icmp_csum, b->l4_ofs, ND_MSG_LEN + ND_LLA_OPT_LEN));
>     >      }
>     >
>     >      /* Compose an IPv6 Neighbor Discovery Neighbor Advertisement
>     >     message. */
>     >     @@ -1546,8 +1559,8 @@ compose_nd_na(struct dp_packet *b,
>     >
>     >          na->icmph.icmp6_cksum = 0;
>     >          icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
>     >     -    na->icmph.icmp6_cksum = csum_finish(csum_continue(
>     >     -        icmp_csum, na, ND_MSG_LEN + ND_LLA_OPT_LEN));
>     >     +    na->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
>     >     +        b, icmp_csum, b->l4_ofs, ND_MSG_LEN + ND_LLA_OPT_LEN));
>     >      }
>     >
>     >      /* Compose an IPv6 Neighbor Discovery Router Advertisement
>     message with
>     >     @@ -1597,8 +1610,8 @@ compose_nd_ra(struct dp_packet *b,
>     >
>     >          ra->icmph.icmp6_cksum = 0;
>     >          uint32_t icmp_csum =
>     packet_csum_pseudoheader6(dp_packet_l3(b));
>     >     -    ra->icmph.icmp6_cksum = csum_finish(csum_continue(
>     >     -        icmp_csum, ra, RA_MSG_LEN + ND_LLA_OPT_LEN +
>     mtu_opt_len));
>     >     +    ra->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
>     >     +        b, icmp_csum, b->l4_ofs, RA_MSG_LEN + ND_LLA_OPT_LEN +
>     >     mtu_opt_len));
>     >      }
>     >
>     >      /* Append an IPv6 Neighbor Discovery Prefix Information
>     option to a
>     >     @@ -1627,8 +1640,8 @@ packet_put_ra_prefix_opt(struct
>     dp_packet *b,
>     >          struct ovs_ra_msg *ra = dp_packet_l4(b);
>     >          ra->icmph.icmp6_cksum = 0;
>     >          uint32_t icmp_csum =
>     packet_csum_pseudoheader6(dp_packet_l3(b));
>     >     -    ra->icmph.icmp6_cksum = csum_finish(csum_continue(
>     >     -        icmp_csum, ra, prev_l4_size + ND_PREFIX_OPT_LEN));
>     >     +    ra->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
>     >     +        b, icmp_csum, b->l4_ofs, prev_l4_size +
>     ND_PREFIX_OPT_LEN));
>     >      }
>     >
>     >      uint32_t
>     >     @@ -1680,6 +1693,54 @@ packet_csum_upperlayer6(const struct
>     >     ovs_16aligned_ip6_hdr *ip6,
>     >      }
>     >      #endif
>     >
>     >
>     > It is a proposed 'general' API; a function header is needed
>     > same for other proposed apis
> 
>     It is there in packets.h, or is that not what you're referring to?
> 
> 
> 
> This is what I see in packet.h ....
> 
> index 09a0ac3..9e7f5a1 100644
> 
> --- a/lib/packets.h
> 
> +++ b/lib/packets.h
> 
> @@ -1573,6 +1573,13 @@ void packet_put_ra_prefix_opt(struct dp_packet *,
> 
>                                ovs_be32 preferred_lifetime,
> 
>                                const ovs_be128 router_prefix);
> 
> uint32_t packet_csum_pseudoheader(const struct ip_header *);
> 
> +uint32_t
> 
> +packet_csum_continue(const struct dp_packet *b, uint32_t partial,
> 
> +                     uint16_t offset, size_t n);
> 
> 
> >
> 
>     Tiago.
> 
>     >
>     >  
>     >
>     >     +uint32_t
>     >     +packet_csum_continue(const struct dp_packet *b, uint32_t partial,
>     >     +                     uint16_t offset, size_t n)
>     >     +{
>     >     +    char *ptr = NULL;
>     >     +    size_t rem = 0;
>     >     +    size_t size = 0;
>     >     +
>     >     +    while (n > 1) {
>     >     +        rem = dp_packet_read_data(b, offset, n, (void *)&ptr,
>     NULL);
>     >     +
>     >     +        size = n - rem;
>     >     +        partial = csum_continue(partial, ptr, size);
>     >     +
>     >     +        offset += size;
>     >     +        n = rem;
>     >     +    }
>     >     +
>     >     +    return partial;
>     >     +}
>     >     +
>     >     +ovs_be16
>     >     +packet_csum(const struct dp_packet *b, uint16_t offset, size_t n)
>     >     +{
>     >     +    return csum_finish(packet_csum_continue(b, 0, offset, n));
>     >     +}
>     >     +
>     >     +ovs_be32
>     >     +packet_crc32c(const struct dp_packet *b, uint16_t offset,
>     size_t n)
>     >     +{
>     >     +    char *ptr = NULL;
>     >     +    size_t rem = 0;
>     >     +    size_t size = 0;
>     >     +    uint32_t partial = 0xffffffffL;
>     >     +
>     >     +    while (n > 1) {
>     >     +        rem = dp_packet_read_data(b, offset, n, (void *)&ptr,
>     NULL);
>     >     +
>     >     +        size = n - rem;
>     >     +        partial = crc32c_continue(partial, (uint8_t *) ptr,
>     size);
>     >     +
>     >     +        offset += size;
>     >     +        n = rem;
>     >     +    }
>     >     +
>     >     +    return crc32c_finish(partial);
>     >     +}
>     >     +
>     >      void
>     >      IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
>     >      {
>     >     diff --git a/lib/packets.h b/lib/packets.h
>     >     index 09a0ac3..9e7f5a1 100644
>     >     --- a/lib/packets.h
>     >     +++ b/lib/packets.h
>     >     @@ -1573,6 +1573,13 @@ void packet_put_ra_prefix_opt(struct
>     dp_packet *,
>     >                                    ovs_be32 preferred_lifetime,
>     >                                    const ovs_be128 router_prefix);
>     >      uint32_t packet_csum_pseudoheader(const struct ip_header *);
>     >     +uint32_t
>     >     +packet_csum_continue(const struct dp_packet *b, uint32_t partial,
>     >     +                     uint16_t offset, size_t n);
>     >     +ovs_be16
>     >     +packet_csum(const struct dp_packet *b, uint16_t offset,
>     size_t n);
>     >     +ovs_be32
>     >     +packet_crc32c(const struct dp_packet *b, uint16_t offset,
>     size_t n);
>     >      void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
>     >
>     >      #define DNS_HEADER_LEN 12
>     >     diff --git a/ofproto/ofproto-dpif-upcall.c
>     >     b/ofproto/ofproto-dpif-upcall.c
>     >     index dc30824..ed1c1ac 100644
>     >     --- a/ofproto/ofproto-dpif-upcall.c
>     >     +++ b/ofproto/ofproto-dpif-upcall.c
>     >     @@ -1417,12 +1417,18 @@ process_upcall(struct udpif *udpif, struct
>     >     upcall *upcall,
>     >          case SFLOW_UPCALL:
>     >              if (upcall->sflow) {
>     >                  struct dpif_sflow_actions sflow_actions;
>     >     +            struct dp_packet *p = CONST_CAST(struct dp_packet *,
>     >     packet);
>     >
>     >                  memset(&sflow_actions, 0, sizeof sflow_actions);
>     >
>     >                  actions_len = dpif_read_actions(udpif, upcall, flow,
>     >                                                  upcall->type,
>     >     &sflow_actions);
>     >     -            dpif_sflow_received(upcall->sflow, packet, flow,
>     >     +            /* Gather the whole data */
>     >     +            if (!dp_packet_is_linear(p)) {
>     >     +                dp_packet_linearize(p);
>     >     +            }
>     >     +
>     >     +            dpif_sflow_received(upcall->sflow, p, flow,
>     >                                      flow->in_port.odp_port,
>     >     &upcall->cookie,
>     >                                      actions_len > 0 ?
>     &sflow_actions :
>     >     NULL);
>     >              }
>     >     @@ -1483,6 +1489,12 @@ process_upcall(struct udpif *udpif, struct
>     >     upcall *upcall,
>     >
>     >                  const struct frozen_state *state =
>     &recirc_node->state;
>     >
>     >     +            /* Gather the whole data */
>     >     +            struct dp_packet *p = CONST_CAST(struct dp_packet *,
>     >     packet);
>     >     +            if (!dp_packet_is_linear(p)) {
>     >     +                dp_packet_linearize(p);
>     >     +            }
>     >     +
>     >                  struct ofproto_async_msg *am = xmalloc(sizeof *am);
>     >                  *am = (struct ofproto_async_msg) {
>     >                      .controller_id =
>     cookie->controller.controller_id,
>     >     @@ -1490,9 +1502,9 @@ process_upcall(struct udpif *udpif, struct
>     >     upcall *upcall,
>     >                      .pin = {
>     >                          .up = {
>     >                              .base = {
>     >     -                            .packet =
>     xmemdup(dp_packet_data(packet),
>     >     -                                             
>     dp_packet_size(packet)),
>     >     -                            .packet_len = dp_packet_size(packet),
>     >     +                            .packet = xmemdup(dp_packet_data(p),
>     >     +                                              dp_packet_size(p)),
>     >     +                            .packet_len = dp_packet_size(p),
>     >                                  .reason = cookie->controller.reason,
>     >                                  .table_id = state->table_id,
>     >                                  .cookie = get_32aligned_be64(
>     >     diff --git a/ofproto/ofproto-dpif-xlate.c
>     b/ofproto/ofproto-dpif-xlate.c
>     >     index 839fddd..b2c31b7 100644
>     >     --- a/ofproto/ofproto-dpif-xlate.c
>     >     +++ b/ofproto/ofproto-dpif-xlate.c
>     >     @@ -1737,7 +1737,8 @@ stp_process_packet(const struct xport
>     *xport,
>     >     const struct dp_packet *packet)
>     >          }
>     >
>     >          if (dp_packet_try_pull(&payload, ETH_HEADER_LEN +
>     >     LLC_HEADER_LEN)) {
>     >     -        stp_received_bpdu(sp, dp_packet_data(&payload),
>     >     dp_packet_size(&payload));
>     >     +        stp_received_bpdu(sp, dp_packet_data(&payload),
>     >     +                          dp_packet_size(&payload));
>     >          }
>     >      }
>     >
>     >     @@ -1788,7 +1789,8 @@ rstp_process_packet(const struct xport
>     *xport,
>     >     const struct dp_packet *packet)
>     >          }
>     >
>     >          if (dp_packet_try_pull(&payload, ETH_HEADER_LEN +
>     >     LLC_HEADER_LEN)) {
>     >     -        rstp_port_received_bpdu(xport->rstp_port,
>     >     dp_packet_data(&payload),
>     >     +        rstp_port_received_bpdu(xport->rstp_port,
>     >     +                                dp_packet_data(&payload),
>     >                                      dp_packet_size(&payload));
>     >          }
>     >      }
>     >     @@ -2563,6 +2565,7 @@ update_mcast_snooping_table4__(const struct
>     >     xlate_ctx *ctx,
>     >
>     >          offset = (char *) dp_packet_l4(packet) - (char *)
>     >     dp_packet_data(packet);
>     >          igmp = dp_packet_at(packet, offset, IGMP_HEADER_LEN);
>     >     +
>     >          if (!igmp || csum(igmp, dp_packet_l4_size(packet)) != 0) {
>     >              xlate_report_debug(ctx, OFT_DETAIL,
>     >                                 "multicast snooping received bad
>     IGMP "
>     >     @@ -2975,6 +2978,13 @@ xlate_normal(struct xlate_ctx *ctx)
>     >              && is_ip_any(flow)) {
>     >              struct mcast_snooping *ms = ctx->xbridge->ms;
>     >              struct mcast_group *grp = NULL;
>     >     +        struct dp_packet *p = CONST_CAST(struct dp_packet *,
>     >     +                                         ctx->xin->packet);
>     >     +
>     >     +        /* We will need the whole data for processing the packet
>     >     below */
>     >     +        if (p && !dp_packet_is_linear(p)) {
>     >     +            dp_packet_linearize(p);
>     >     +        }
>     >
>     >              if (is_igmp(flow, wc)) {
>     >                  /*
>     >     @@ -3279,7 +3289,8 @@ process_special(struct xlate_ctx *ctx, const
>     >     struct xport *xport)
>     >          const struct flow *flow = &ctx->xin->flow;
>     >          struct flow_wildcards *wc = ctx->wc;
>     >          const struct xbridge *xbridge = ctx->xbridge;
>     >     -    const struct dp_packet *packet = ctx->xin->packet;
>     >     +    struct dp_packet *packet = CONST_CAST(struct dp_packet *,
>     >     +                                          ctx->xin->packet);
>     >          enum slow_path_reason slow;
>     >
>     >          if (!xport) {
>     >     @@ -3291,6 +3302,11 @@ process_special(struct xlate_ctx *ctx,
>     const
>     >     struct xport *xport)
>     >              slow = SLOW_CFM;
>     >          } else if (xport->bfd && bfd_should_process_flow(xport->bfd,
>     >     flow, wc)) {
>     >              if (packet) {
>     >     +            /* Gather the whole data for further processing */
>     >     +            if (!dp_packet_is_linear(packet)) {
>     >     +                dp_packet_linearize(packet);
>     >     +            }
>     >     +
>     >                  bfd_process_packet(xport->bfd, flow, packet);
>     >                  /* If POLL received, immediately sends FINAL back. */
>     >                  if (bfd_should_send_packet(xport->bfd)) {
>     >     @@ -3307,6 +3323,11 @@ process_special(struct xlate_ctx *ctx,
>     const
>     >     struct xport *xport)
>     >          } else if ((xbridge->stp || xbridge->rstp) &&
>     >                     stp_should_process_flow(flow, wc)) {
>     >              if (packet) {
>     >     +            /* Gather the whole data for further processing */
>     >     +            if (!dp_packet_is_linear(packet)) {
>     >     +                dp_packet_linearize(packet);
>     >     +            }
>     >     +
>     >                  xbridge->stp
>     >                      ? stp_process_packet(xport, packet)
>     >                      : rstp_process_packet(xport, packet);
>     >     @@ -3314,6 +3335,11 @@ process_special(struct xlate_ctx *ctx,
>     const
>     >     struct xport *xport)
>     >              slow = SLOW_STP;
>     >          } else if (xport->lldp &&
>     lldp_should_process_flow(xport->lldp,
>     >     flow)) {
>     >              if (packet) {
>     >     +            /* Gather the whole data for further processing */
>     >     +            if (!dp_packet_is_linear(packet)) {
>     >     +                dp_packet_linearize(packet);
>     >     +            }
>     >     +
>     >                  lldp_process_packet(xport->lldp, packet);
>     >              }
>     >              slow = SLOW_LLDP;
>     >     diff --git a/tests/test-rstp.c b/tests/test-rstp.c
>     >     index 01aeaf8..48f1cfa 100644
>     >     --- a/tests/test-rstp.c
>     >     +++ b/tests/test-rstp.c
>     >     @@ -86,8 +86,12 @@ send_bpdu(struct dp_packet *pkt, void *port_,
>     >     void *b_)
>     >          assert(port_no < b->n_ports);
>     >          lan = b->ports[port_no];
>     >          if (lan) {
>     >     -        const void *data = dp_packet_l3(pkt);
>     >     -        size_t size = (char *) dp_packet_tail(pkt) - (char *)
>     data;
>     >     +        if (!dp_packet_is_linear(pkt)) {
>     >     +            dp_packet_linearize(pkt);
>     >     +        }
>     >     +
>     >     +        const char *data = dp_packet_l3(pkt);
>     >     +        size_t size = dp_packet_size(pkt) - pkt->l3_ofs;
>     >              int i;
>     >
>     >              for (i = 0; i < lan->n_conns; i++) {
>     >     diff --git a/tests/test-stp.c b/tests/test-stp.c
>     >     index c85c99d..fd5bfad 100644
>     >     --- a/tests/test-stp.c
>     >     +++ b/tests/test-stp.c
>     >     @@ -94,8 +94,12 @@ send_bpdu(struct dp_packet *pkt, int port_no,
>     >     void *b_)
>     >          assert(port_no < b->n_ports);
>     >          lan = b->ports[port_no];
>     >          if (lan) {
>     >     -        const void *data = dp_packet_l3(pkt);
>     >     -        size_t size = (char *) dp_packet_tail(pkt) - (char *)
>     data;
>     >     +        if (!dp_packet_is_linear(pkt)) {
>     >     +            dp_packet_linearize(pkt);
>     >     +        }
>     >     +
>     >     +        const char *data = dp_packet_l3(pkt);
>     >     +        size_t size = dp_packet_size(pkt) - pkt->l3_ofs;
>     >              int i;
>     >
>     >              for (i = 0; i < lan->n_conns; i++) {
>     >     --
>     >     2.7.4
>     >
> 
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v13 06/11] dp-packet: Add support for data "linearization".

Reply via email to