Hi Ian,

Agreed with all that's said below and will work on the changes, just a
small comment in-line.

On 11/01/2019 12:26, Ian Stokes wrote:
> On 1/10/2019 4:58 PM, Tiago Lam wrote:
>> Previously, TSO was being explicity disabled on vhost interfaces,
>> meaning the guests wouldn't have TSO support negotiated in. With TSO
>> negotiated and enabled, packets are now marked for TSO, through the
>> PKT_TX_TCP_SEG flag.
>>
>> In order to deal with this type of packets, a new function,
>> netdev_dpdk_prep_tso_packet(), has been introduced, with the main
>> purpose of setting correctly the l2, l3 and l4 length members of the
>> mbuf struct, and the appropriate ol_flags. This function supports TSO
>> both in IPv4 and IPv6.
>>
>> netdev_dpdk_prep_tso_packet() is then only called when packets are
>> marked with the PKT_TX_TCP_SEG flag, meaning they have been marked for
>> TSO, and when the packet will be traversing the NIC.
>>
>> Additionally, if a packet is marked for TSO but the egress netdev
>> doesn't support it, the packet is dropped.
>>
> 
> Hi Tiago,
> 
> a high level first pass, I still need to test this in more detail so not 
> a full review yet but some minor issues to be addressed below.
> 
>> Co-authored-by: Mark Kavanagh <[email protected]>
>>
>> Signed-off-by: Mark Kavanagh <[email protected]>
>> Signed-off-by: Tiago Lam <[email protected]>
>> ---
>>   lib/dp-packet.h    |  14 +++++++
>>   lib/netdev-bsd.c   |  11 ++++-
>>   lib/netdev-dpdk.c  | 121 
>> ++++++++++++++++++++++++++++++++++++++++++-----------
>>   lib/netdev-dummy.c |  11 ++++-
>>   lib/netdev-linux.c |  15 +++++++
>>   5 files changed, 146 insertions(+), 26 deletions(-)
>>
>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>> index 970aaf2..c384416 100644
>> --- a/lib/dp-packet.h
>> +++ b/lib/dp-packet.h
>> @@ -104,6 +104,8 @@ static inline void dp_packet_set_size(struct dp_packet 
>> *, uint32_t);
>>   static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
>>   static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
>>   
>> +static inline bool dp_packet_is_tso(struct dp_packet *b);
>> +
>>   void *dp_packet_resize_l2(struct dp_packet *, int increment);
>>   void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
>>   static inline void *dp_packet_eth(const struct dp_packet *);
>> @@ -761,6 +763,12 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
>>       b->mbuf.buf_len = s;
>>   }
>>   
>> +static inline bool
>> +dp_packet_is_tso(struct dp_packet *b)
>> +{
>> +    return (b->mbuf.ol_flags & PKT_TX_TCP_SEG) ? true : false;
>> +}
>> +
>>   static inline void
>>   dp_packet_copy_mbuf_flags(struct dp_packet *dst, const struct dp_packet 
>> *src)
>>   {
>> @@ -972,6 +980,12 @@ dp_packet_get_allocated(const struct dp_packet *b)
>>       return b->allocated_;
>>   }
>>   
>> +static inline bool
>> +dp_packet_is_tso(struct dp_packet *b)
>> +{
> This will cause a unused parameter warning. Need to pass OVS_UNUSED 
> above also.
> 
>> +    return false;
>> +}
>> +
>>   static inline void
>>   dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
>>   {
>> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
>> index 278c8a9..5e8c5cc 100644
>> --- a/lib/netdev-bsd.c
>> +++ b/lib/netdev-bsd.c
>> @@ -700,13 +700,22 @@ netdev_bsd_send(struct netdev *netdev_, int qid 
>> OVS_UNUSED,
>>       }
>>   
>>       DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>> +        size_t size = dp_packet_size(packet);
>> +
>> +        /* TSO not supported in BSD netdev */
>> +        if (dp_packet_is_tso(packet)) {
>> +            VLOG_WARN_RL(&rl, "%s: No TSO enabled on port, TSO packet 
>> dropped "
>> +                         "%" PRIu32 " ", name, size);
>> +
>> +            continue;
>> +        }
>> +
>>           /* We need the whole data to send the packet on the device */
>>           if (!dp_packet_is_linear(packet)) {
>>               dp_packet_linearize(packet);
>>           }
>>   
>>           const void *data = dp_packet_data(packet);
>> -        size_t size = dp_packet_size(packet);
>>   
>>           while (!error) {
>>               ssize_t retval;
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 77d04fc..ad7223a 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -1375,14 +1375,16 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
>>           goto out;
>>       }
>>   
>> -    err = rte_vhost_driver_disable_features(dev->vhost_id,
>> -                                1ULL << VIRTIO_NET_F_HOST_TSO4
>> -                                | 1ULL << VIRTIO_NET_F_HOST_TSO6
>> -                                | 1ULL << VIRTIO_NET_F_CSUM);
>> -    if (err) {
>> -        VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
>> -                 "port: %s\n", name);
>> -        goto out;
>> +    if (!dpdk_multi_segment_mbufs) {
>> +        err = rte_vhost_driver_disable_features(dev->vhost_id,
>> +                                    1ULL << VIRTIO_NET_F_HOST_TSO4
>> +                                    | 1ULL << VIRTIO_NET_F_HOST_TSO6
>> +                                    | 1ULL << VIRTIO_NET_F_CSUM);
>> +        if (err) {
>> +            VLOG_ERR("rte_vhost_driver_disable_features failed for vhost 
>> user "
>> +                     "client port: %s\n", dev->up.name);
>> +            goto out;
>> +        }
>>       }
>>   
>>       err = rte_vhost_driver_start(dev->vhost_id);
>> @@ -2027,6 +2029,44 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
>>       rte_free(rx);
>>   }
>>   
>> +/* Should only be called if PKT_TX_TCP_SEG is set in ol_flags.
>> + * Furthermore, it also sets the PKT_TX_TCP_CKSUM and PKT_TX_IP_CKSUM flags,
>> + * and PKT_TX_IPV4 and PKT_TX_IPV6 in case the packet is IPv4 or IPv6,
>> + * respectiveoly. */
> Minor typo above 'respectively'.
>> +static void
>> +netdev_dpdk_prep_tso_packet(struct rte_mbuf *mbuf, int mtu)
>> +{
>> +    struct dp_packet *pkt;
>> +    struct tcp_header *th;
>> +
>> +    pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
>> +    mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
>> +    mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
>> +    th = dp_packet_l4(pkt);
>> +    /* There's no layer 4 in the packet */
> Minor: period at the end of a comment, for comments below also..
>> +    if (!th) {
>> +        return;
>> +    }
>> +    mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
>> +    mbuf->outer_l2_len = 0;
>> +    mbuf->outer_l3_len = 0;
>> +
>> +    /* Reset packet RX RSS flag to reuse in egress * > +    
>> dp_packet_mbuf_rss_flag_reset(pkt);
>> +
>> +    if (!(mbuf->ol_flags & PKT_TX_TCP_SEG)) {
>> +        return;
>> +    }
>> +
>> +    /* Prepare packet for egress. */
>> +    mbuf->ol_flags |= PKT_TX_TCP_SEG;
>> +    mbuf->ol_flags |= PKT_TX_TCP_CKSUM;
>> +    mbuf->ol_flags |= PKT_TX_IP_CKSUM;
>> +
>> +    /* Set the size of each TCP segment, based on the MTU of the device */
>> +    mbuf->tso_segsz = mtu - mbuf->l3_len - mbuf->l4_len;
> 
> Should it be the mtu of the device or the rounded up mtu of the mbufs 
> used above?

It should be the MTU of the device, which is what's being passed by the
callers of `netdev_dpdk_prep_tso_packet()` when using `dev->mtu`. Or
maybe I didn't get your point entirely?

Tiago.

>> +}
>> +
>>   /* Tries to transmit 'pkts' to txq 'qid' of device 'dev'.  Takes ownership 
>> of
>>    * 'pkts', even in case of failure.
>>    * In case multi-segment mbufs / TSO is being used, it also prepares. In 
>> such
>> @@ -2328,13 +2368,29 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk 
>> *dev, struct rte_mbuf **pkts,
>>       int cnt = 0;
>>       struct rte_mbuf *pkt;
>>   
>> +    /* Filter oversized packets, unless are marked for TSO. */
>>       for (i = 0; i < pkt_cnt; i++) {
>>           pkt = pkts[i];
>> +
>>           if (OVS_UNLIKELY(pkt->pkt_len > dev->max_packet_len)) {
>> -            VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " max_packet_len 
>> %d",
>> -                         dev->up.name, pkt->pkt_len, dev->max_packet_len);
>> -            rte_pktmbuf_free(pkt);
>> -            continue;
>> +            if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
>> +                VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " "
>> +                             "max_packet_len %d",
>> +                             dev->up.name, pkt->pkt_len, 
>> dev->max_packet_len);
>> +                rte_pktmbuf_free(pkt);
>> +                continue;
>> +            } else {
>> +                if (dev->type != DPDK_DEV_VHOST) {
>> +                    netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
>> +                }
>> +
>> +                /* Else the frames will not actually traverse the NIC, but
>> +                 * rather travel between VMs on the same host. */
> The comment and whitspace seems a bit odd here in placement. Could you 
> move it to above the if statement and modify to explain whats happening 
> from that point.
>> +            }
>> +        } else {
>> +            if (dev->type != DPDK_DEV_VHOST) {
>> +                netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
>> +            }
>>           }
>>   
>>           if (OVS_UNLIKELY(i != cnt)) {
>> @@ -2465,6 +2521,12 @@ dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, 
>> struct rte_mbuf **head,
>>       fmbuf->nb_segs = nb_segs;
>>       fmbuf->pkt_len = size;
>>   
>> +    struct dp_packet *pkt = CONTAINER_OF(fmbuf, struct dp_packet, mbuf);
> Can you declare *pkt at the top of the function along with existing 
> pointer declarations.
> 
>> +    pkt->l2_pad_size = packet->l2_pad_size;
>> +    pkt->l2_5_ofs = packet->l2_5_ofs;
>> +    pkt->l3_ofs = packet->l3_ofs;
>> +    pkt->l4_ofs = packet->l4_ofs;
>> +
>>       dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
>>   
>>       return 0;
>> @@ -2499,14 +2561,17 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, 
>> struct dp_packet_batch *batch)
>>   
>>       for (i = 0; i < cnt; i++) {
>>           struct dp_packet *packet = batch->packets[i];
>> +        struct rte_mbuf *pkt = &batch->packets[i]->mbuf;
>>           uint32_t size = dp_packet_size(packet);
>>           int err = 0;
>>   
>>           if (OVS_UNLIKELY(size > dev->max_packet_len)) {
>> -            VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
>> -                         size, dev->max_packet_len);
>> -            dropped++;
>> -            continue;
>> +            if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
>> +                VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
>> +                             size, dev->max_packet_len);
>> +                dropped++;
>> +                continue;
>> +            }
>>           }
>>   
>>           err = dpdk_copy_dp_packet_to_mbuf(packet, &pkts[txcnt],
>> @@ -2522,6 +2587,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, 
>> struct dp_packet_batch *batch)
>>           }
>>           dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
>>   
>> +        if (dev->type != DPDK_DEV_VHOST) {
>> +            /* If packet is non-DPDK, at the very least, we need to update 
>> the
>> +             * mbuf length members, even if TSO is not to be performed. */
>> +            netdev_dpdk_prep_tso_packet(pkts[txcnt], dev->mtu);
>> +        }
>> +
>>           txcnt++;
>>       }
>>   
>> @@ -4263,14 +4334,16 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev 
>> *netdev)
>>               goto unlock;
>>           }
>>   
>> -        err = rte_vhost_driver_disable_features(dev->vhost_id,
>> -                                    1ULL << VIRTIO_NET_F_HOST_TSO4
>> -                                    | 1ULL << VIRTIO_NET_F_HOST_TSO6
>> -                                    | 1ULL << VIRTIO_NET_F_CSUM);
>> -        if (err) {
>> -            VLOG_ERR("rte_vhost_driver_disable_features failed for vhost 
>> user "
>> -                     "client port: %s\n", dev->up.name);
>> -            goto unlock;
>> +        if (!dpdk_multi_segment_mbufs) {
>> +            err = rte_vhost_driver_disable_features(dev->vhost_id,
>> +                                        1ULL << VIRTIO_NET_F_HOST_TSO4
>> +                                        | 1ULL << VIRTIO_NET_F_HOST_TSO6
>> +                                        | 1ULL << VIRTIO_NET_F_CSUM);
>> +            if (err) {
>> +                VLOG_ERR("rte_vhost_driver_disable_features failed for 
>> vhost "
>> +                         "user client port: %s\n", dev->up.name);
>> +                goto unlock;
>> +            }
>>           }
>>   
>>           err = rte_vhost_driver_start(dev->vhost_id);
>> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
>> index c56c86b..8452ab6 100644
>> --- a/lib/netdev-dummy.c
>> +++ b/lib/netdev-dummy.c
>> @@ -1093,13 +1093,22 @@ netdev_dummy_send(struct netdev *netdev, int qid 
>> OVS_UNUSED,
>>   
>>       struct dp_packet *packet;
>>       DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
>> +        size_t size = dp_packet_size(packet);
>> +
>> +        /* TSO not supported in Dummy netdev */
>> +        if (dp_packet_is_tso(packet)) {
>> +            VLOG_WARN("%s: No TSO enabled on port, TSO packet dropped %ld",
> Is enabled the wrong word here, I would think 'supported' as per the 
> comment? Same for the logs below in netdev_linux.
>> +                      netdev_get_name(netdev), size);
>> +
>> +            continue;
>> +        }
>> +
>>           /* We need the whole data to send the packet on the device */
>>           if (!dp_packet_is_linear(packet)) {
>>               dp_packet_linearize(packet);
>>           }
>>   
>>           const void *buffer = dp_packet_data(packet);
>> -        size_t size = dp_packet_size(packet);
>>   
>>           if (packet->packet_type != htonl(PT_ETH)) {
>>               error = EPFNOSUPPORT;
>> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>> index fa79b2a..1476096 100644
>> --- a/lib/netdev-linux.c
>> +++ b/lib/netdev-linux.c
>> @@ -1379,6 +1379,13 @@ netdev_linux_sock_batch_send(int sock, int ifindex,
>>   
>>       struct dp_packet *packet;
>>       DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>> +        /* TSO not supported in Linux netdev */
>> +        if (dp_packet_is_tso(packet)) {
>> +            VLOG_WARN_RL(&rl, "%d: No TSO enabled on port, TSO packet 
>> dropped "
>> +                         "%ld", sock, size);
>> +            continue;
>> +        }
>> +
>>           /* We need the whole data to send the packet on the device */
>>           if (!dp_packet_is_linear(packet)) {
>>               dp_packet_linearize(packet);
>> @@ -1437,6 +1444,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
>>           ssize_t retval;
>>           int error;
>>   
>> +        /* TSO not supported in Linux netdev */
>> +        if (dp_packet_is_tso(packet)) {
>> +            VLOG_WARN_RL(&rl, "%s: No TSO enabled on port, TSO packet 
>> dropped "
>> +                         "%ld", netdev_get_name(netdev_), size);
>> +
>> +            continue;
>> +        }
>> +
>>           /* We need the whole data to send the packet on the device */
>>           if (!dp_packet_is_linear(packet)) {
>>               dp_packet_linearize(packet);
>>
> 
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to