On 10/31/23 20:51, Mike Pattrick wrote:
> From: Flavio Leitner <[email protected]>
>
> Currently OVS will calculate the segment size based on the
> MTU of the egress port. That usually happens to be correct
> when the ports share the same MTU, but that is not always true.
>
> Therefore, if the segment size is provided, then use that and
> make sure the over sized packets are dropped.
>
> Signed-off-by: Flavio Leitner <[email protected]>
> Co-authored-by: Mike Pattrick <[email protected]>
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
> lib/dp-packet.c | 3 ++
> lib/dp-packet.h | 26 ++++++++++++++++
> lib/netdev-dpdk.c | 12 +++++++-
> lib/netdev-linux.c | 76 ++++++++++++++++++++++++++++++++--------------
> 4 files changed, 94 insertions(+), 23 deletions(-)
>
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index ed004c3b9..920402369 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -34,6 +34,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated,
> enum dp_packet_source so
> pkt_metadata_init(&b->md, 0);
> dp_packet_reset_cutlen(b);
> dp_packet_reset_offload(b);
> + dp_packet_set_tso_segsz(b, 0);
> /* Initialize implementation-specific fields of dp_packet. */
> dp_packet_init_specific(b);
> /* By default assume the packet type to be Ethernet. */
> @@ -203,6 +204,8 @@ dp_packet_clone_with_headroom(const struct dp_packet
> *buffer, size_t headroom)
> *dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer);
> *dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK;
>
> + dp_packet_set_tso_segsz(new_buffer, dp_packet_get_tso_segsz(buffer));
> +
> if (dp_packet_rss_valid(buffer)) {
> dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer));
> }
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 70ddf8aa4..6a924f3ff 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -126,6 +126,7 @@ struct dp_packet {
> uint32_t ol_flags; /* Offloading flags. */
> uint32_t rss_hash; /* Packet hash. */
> uint32_t flow_mark; /* Packet flow mark. */
> + uint16_t tso_segsz; /* TCP TSO segment size. */
> #endif
> enum dp_packet_source source; /* Source of memory allocated as 'base'.
> */
>
> @@ -166,6 +167,9 @@ static inline void dp_packet_set_size(struct dp_packet *,
> uint32_t);
> static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
> static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
>
> +static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *);
> +static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t);
> +
> void *dp_packet_resize_l2(struct dp_packet *, int increment);
> void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
> static inline void *dp_packet_eth(const struct dp_packet *);
> @@ -644,6 +648,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
> b->mbuf.buf_len = s;
> }
>
> +static inline uint16_t
> +dp_packet_get_tso_segsz(const struct dp_packet *p)
> +{
> + return p->mbuf.tso_segsz;
> +}
> +
> +static inline void
> +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
> +{
> + p->mbuf.tso_segsz = s;
> +}
> #else /* DPDK_NETDEV */
>
> static inline void
> @@ -700,6 +715,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
> b->allocated_ = s;
> }
>
> +static inline uint16_t
> +dp_packet_get_tso_segsz(const struct dp_packet *p)
> +{
> + return p->tso_segsz;
> +}
> +
> +static inline void
> +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
> +{
> + p->tso_segsz = s;
> +}
> #endif /* DPDK_NETDEV */
>
> static inline void
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 55700250d..9f20cc689 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2453,6 +2453,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev,
> struct rte_mbuf *mbuf)
>
> if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
> struct tcp_header *th = dp_packet_l4(pkt);
> + int hdr_len;
>
> if (!th) {
> VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
> @@ -2462,7 +2463,15 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev,
> struct rte_mbuf *mbuf)
>
> mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
> mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
> + hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
> mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
> + if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len))
> {
> + VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. "
> + "hdr: %"PRIu32", gso: %"PRIu32", max len:
> %"PRIu32"",
> + dev->up.name, hdr_len, mbuf->tso_segsz,
> + dev->max_packet_len);
> + return false;
> + }
>
> if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
> mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
> @@ -2737,7 +2746,8 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev,
> struct rte_mbuf **pkts,
> int cnt = 0;
> struct rte_mbuf *pkt;
>
> - /* Filter oversized packets, unless are marked for TSO. */
> + /* Filter oversized packets. The TSO packets are filtered out
> + * during the offloading preparation for performance reasons. */
> for (i = 0; i < pkt_cnt; i++) {
> pkt = pkts[i];
> if (OVS_UNLIKELY((pkt->pkt_len > dev->max_packet_len)
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index a46f5127f..4d91391fb 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -539,7 +539,7 @@ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
> static bool tap_supports_vnet_hdr = true;
>
> static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
> -static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> +static int netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
> int cmd, const char *cmd_name);
> static int get_flags(const struct netdev *, unsigned int *flags);
> @@ -1593,9 +1593,10 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
> }
>
> static int
> -netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
> - struct dp_packet_batch *batch)
> +netdev_linux_sock_batch_send(struct netdev *netdev_, int sock, int ifindex,
> + bool tso, int mtu, struct dp_packet_batch
> *batch)
> {
> + struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> const size_t size = dp_packet_batch_size(batch);
> /* We don't bother setting most fields in sockaddr_ll because the
> * kernel ignores them for SOCK_RAW. */
> @@ -1604,26 +1605,35 @@ netdev_linux_sock_batch_send(int sock, int ifindex,
> bool tso, int mtu,
>
> struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size);
> struct iovec *iov = xmalloc(sizeof(*iov) * size);
> -
> struct dp_packet *packet;
> + int cnt = 0;
> +
> DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> if (tso) {
> - netdev_linux_prepend_vnet_hdr(packet, mtu);
> - }
> + int ret = netdev_linux_prepend_vnet_hdr(packet, mtu);
> +
> + if (OVS_UNLIKELY(ret)) {
> + netdev->tx_dropped += 1;
> + VLOG_WARN_RL(&rl, "%s: Packet dropped. %s",
Nit: maybe make this error message the same as for tap?
It's missing some context.
> + netdev_get_name(netdev_), ovs_strerror(ret));
> + continue;
> + }
> + }
>
> - iov[i].iov_base = dp_packet_data(packet);
> - iov[i].iov_len = dp_packet_size(packet);
> - mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
> - .msg_namelen = sizeof sll,
> - .msg_iov = &iov[i],
> - .msg_iovlen = 1 };
> + iov[cnt].iov_base = dp_packet_data(packet);
> + iov[cnt].iov_len = dp_packet_size(packet);
> + mmsg[cnt].msg_hdr = (struct msghdr) { .msg_name = &sll,
> + .msg_namelen = sizeof sll,
> + .msg_iov = &iov[cnt],
> + .msg_iovlen = 1 };
> + cnt++;
> }
>
> int error = 0;
> - for (uint32_t ofs = 0; ofs < size; ) {
> + for (uint32_t ofs = 0; ofs < cnt;) {
> ssize_t retval;
> do {
> - retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0);
> + retval = sendmmsg(sock, mmsg + ofs, cnt - ofs, 0);
> error = retval < 0 ? errno : 0;
> } while (error == EINTR);
> if (error) {
> @@ -1665,7 +1675,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
> int mtu,
> int error;
>
> if (OVS_LIKELY(tap_supports_vnet_hdr)) {
> - netdev_linux_prepend_vnet_hdr(packet, mtu);
> + error = netdev_linux_prepend_vnet_hdr(packet, mtu);
> + if (OVS_UNLIKELY(error)) {
> + netdev->tx_dropped++;
> + VLOG_WARN_RL(&rl, "%s: Prepend vnet hdr failed, packet "
> + "dropped. %s", netdev_get_name(netdev_),
> + ovs_strerror(error));
> + continue;
> + }
> }
>
> size = dp_packet_size(packet);
> @@ -1795,7 +1812,8 @@ netdev_linux_send(struct netdev *netdev_, int qid
> OVS_UNUSED,
> goto free_batch;
> }
>
> - error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
> + error = netdev_linux_sock_batch_send(netdev_, sock, ifindex, tso,
> mtu,
> + batch);
> } else {
> error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
> }
> @@ -7089,8 +7107,7 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
> switch (vnet->gso_type) {
> case VIRTIO_NET_HDR_GSO_TCPV4:
> case VIRTIO_NET_HDR_GSO_TCPV6:
> - /* FIXME: The packet has offloaded TCP segmentation. The gso_size
> - * is given and needs to be respected. */
> + dp_packet_set_tso_segsz(b, (OVS_FORCE uint16_t) vnet->gso_size);
> dp_packet_hwol_set_tcp_seg(b);
> break;
>
> @@ -7112,18 +7129,32 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
> return ret;
> }
>
> -static void
> +/* Prepends struct virtio_net_hdr to packet 'b'.
> + * Returns 0 if successful, otherwise a positive errno value.
> + * Returns EMSGSIZE if the packet 'b' cannot be sent over MTU 'mtu'. */
> +static int
> netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
> {
> struct virtio_net_hdr v;
> struct virtio_net_hdr *vnet = &v;
>
> if (dp_packet_hwol_is_tso(b)) {
> - uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char
> *)dp_packet_eth(b))
> - + TCP_HEADER_LEN;
> + uint16_t tso_segsz = dp_packet_get_tso_segsz(b);
> + struct tcp_header *tcp = dp_packet_l4(b);
> + int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
> + int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b))
> + + tcp_hdr_len;
> + int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN;
> +
> + if (OVS_UNLIKELY((hdr_len + tso_segsz) > max_packet_len)) {
> + VLOG_WARN_RL(&rl, "Oversized TSO packet. hdr_len: %"PRIu32", "
> + "gso: %"PRIu16", max length: %"PRIu32".", hdr_len,
> + tso_segsz, max_packet_len);
> + return EMSGSIZE;
> + }
>
> vnet->hdr_len = (OVS_FORCE __virtio16)hdr_len;
> - vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
> + vnet->gso_size = (OVS_FORCE __virtio16)(tso_segsz);
> if (dp_packet_hwol_is_ipv4(b)) {
> vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
> } else if (dp_packet_hwol_tx_ipv6(b)) {
> @@ -7213,4 +7244,5 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int
> mtu)
> }
>
> dp_packet_push(b, vnet, sizeof *vnet);
> + return 0;
> }
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev