Not a full review.
Comments inline.

> Add netdev_txq_flush(), that flush packets on a queue. This is needed
> to transmit packets on the intermediate queue.
> 
> This commit also implements netdev_dpdk_txq_flush() function. If there
> are any packets waiting in the queue, they are transmitted instantly
> using the rte_eth_tx_burst function. In XPS enabled case, lock is
> taken on the tx queue before flushing the queue.
> 
> Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy at intel.com>
> Signed-off-by: Antonio Fischetti <antonio.fischetti at intel.com>
> Co-authored-by: Antonio Fischetti <antonio.fischetti at intel.com>
> Signed-off-by: Markus Magnusson <markus.magnusson at ericsson.com>
> Co-authored-by: Markus Magnusson <markus.magnusson at ericsson.com>
> Acked-by: Eelco Chaudron <echaudro at redhat.com>
> ---
>  lib/netdev-bsd.c      |  1 +
>  lib/netdev-dpdk.c     | 52 
> ++++++++++++++++++++++++++++++++++++++++++++++-----
>  lib/netdev-dummy.c    |  1 +
>  lib/netdev-linux.c    |  1 +
>  lib/netdev-provider.h |  8 ++++++++
>  lib/netdev-vport.c    |  2 +-
>  lib/netdev.c          |  9 +++++++++
>  lib/netdev.h          |  1 +
>  8 files changed, 69 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
> index 8a4cdb3..75483ad 100644
> --- a/lib/netdev-bsd.c
> +++ b/lib/netdev-bsd.c
> @@ -1546,6 +1546,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum 
> netdev_flags off,
>      netdev_bsd_rxq_recv,                             \
>      netdev_bsd_rxq_wait,                             \
>      netdev_bsd_rxq_drain,                            \
> +    NULL,                                            \
>                                                       \
>      NO_OFFLOAD_API                                   \
>  }
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 1d82bca..50d6b29 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -313,6 +313,11 @@ struct dpdk_mp {
>      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
>  };
>  
> +/* Queue 'INTERIM_QUEUE_BURST_THRESHOLD' packets before transmitting.
> + * Defaults to 'NETDEV_MAX_BURST'(32) packets.
> + */
> +#define INTERIM_QUEUE_BURST_THRESHOLD NETDEV_MAX_BURST
> +
>  /* There should be one 'struct dpdk_tx_queue' created for
>   * each cpu core. */
>  struct dpdk_tx_queue {
> @@ -322,6 +327,12 @@ struct dpdk_tx_queue {
>                                      * pmd threads (see 'concurrent_txq'). */
>      int map;                       /* Mapping of configured vhost-user queues
>                                      * to enabled by guest. */
> +    int dpdk_pkt_cnt;              /* Number of buffered packets waiting to
> +                                      be sent on DPDK tx queue. */
> +    struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
> +                                   /* Intermediate queue where packets can
> +                                    * be buffered to amortize the cost of 
> MMIO
> +                                    * writes. */
>  };
>  
>  /* dpdk has no way to remove dpdk ring ethernet devices
> @@ -1931,6 +1942,32 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>      }
>  }
>  
> +/* Flush tx queues.
> + * This is done periodically to empty the intermediate queue in case of
> + * fewer packets (< INTERIM_QUEUE_BURST_THRESHOLD) buffered in the queue.
> + */
> +static int
> +netdev_dpdk_txq_flush(struct netdev *netdev, int qid , bool concurrent_txq)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
> +
> +    if (OVS_LIKELY(txq->dpdk_pkt_cnt)) {
> +        if (OVS_UNLIKELY(concurrent_txq)) {
> +            qid = qid % dev->up.n_txq;
> +            rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> +        }
> +
> +        netdev_dpdk_eth_tx_burst(dev, qid, txq->dpdk_burst_pkts,
> +                                 txq->dpdk_pkt_cnt);

The queue used for send and the locked one are different because you're
remapping the qid before taking the spinlock.

I suspect that we're always using right queue numbers in current
implementation of dpif-netdev, but I need to recheck to be sure.
Anyway, logic of this function completely broken. 

> +
> +        if (OVS_UNLIKELY(concurrent_txq)) {
> +            rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> +        }
> +    }
> +    return 0;
> +}
> +
>  static int
>  netdev_dpdk_eth_send(struct netdev *netdev, int qid,
>                       struct dp_packet_batch *batch, bool may_steal,
> @@ -3313,7 +3350,7 @@ unlock:
>                            SET_CONFIG, SET_TX_MULTIQ, SEND,    \
>                            GET_CARRIER, GET_STATS,             \
>                            GET_FEATURES, GET_STATUS,           \
> -                          RECONFIGURE, RXQ_RECV)              \
> +                          RECONFIGURE, RXQ_RECV, TXQ_FLUSH)   \
>  {                                                             \
>      NAME,                                                     \
>      true,                       /* is_pmd */                  \
> @@ -3381,6 +3418,7 @@ unlock:
>      RXQ_RECV,                                                 \
>      NULL,                       /* rx_wait */                 \
>      NULL,                       /* rxq_drain */               \
> +    TXQ_FLUSH,                  /* txq_flush */               \
>      NO_OFFLOAD_API                                            \
>  }
>  
> @@ -3398,7 +3436,8 @@ static const struct netdev_class dpdk_class =
>          netdev_dpdk_get_features,
>          netdev_dpdk_get_status,
>          netdev_dpdk_reconfigure,
> -        netdev_dpdk_rxq_recv);
> +        netdev_dpdk_rxq_recv,
> +        netdev_dpdk_txq_flush);
>  
>  static const struct netdev_class dpdk_ring_class =
>      NETDEV_DPDK_CLASS(
> @@ -3414,7 +3453,8 @@ static const struct netdev_class dpdk_ring_class =
>          netdev_dpdk_get_features,
>          netdev_dpdk_get_status,
>          netdev_dpdk_reconfigure,
> -        netdev_dpdk_rxq_recv);
> +        netdev_dpdk_rxq_recv,
> +        NULL);
>  
>  static const struct netdev_class dpdk_vhost_class =
>      NETDEV_DPDK_CLASS(
> @@ -3430,7 +3470,8 @@ static const struct netdev_class dpdk_vhost_class =
>          NULL,
>          NULL,
>          netdev_dpdk_vhost_reconfigure,
> -        netdev_dpdk_vhost_rxq_recv);
> +        netdev_dpdk_vhost_rxq_recv,
> +        NULL);
>  static const struct netdev_class dpdk_vhost_client_class =
>      NETDEV_DPDK_CLASS(
>          "dpdkvhostuserclient",
> @@ -3445,7 +3486,8 @@ static const struct netdev_class 
> dpdk_vhost_client_class =
>          NULL,
>          NULL,
>          netdev_dpdk_vhost_client_reconfigure,
> -        netdev_dpdk_vhost_rxq_recv);
> +        netdev_dpdk_vhost_rxq_recv,
> +        NULL);
>  
>  void
>  netdev_dpdk_register(void)
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index 752f157..86ec902 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -1413,6 +1413,7 @@ netdev_dummy_update_flags(struct netdev *netdev_,
>      netdev_dummy_rxq_recv,                                      \
>      netdev_dummy_rxq_wait,                                      \
>      netdev_dummy_rxq_drain,                                     \
> +    NULL,                                                       \
>                                                                  \
>      NO_OFFLOAD_API                                              \
>  }
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 98820ed..992f887 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -2888,6 +2888,7 @@ netdev_linux_update_flags(struct netdev *netdev_, enum 
> netdev_flags off,
>      netdev_linux_rxq_recv,                                      \
>      netdev_linux_rxq_wait,                                      \
>      netdev_linux_rxq_drain,                                     \
> +    NULL,                                                       \
>                                                                  \
>      FLOW_OFFLOAD_API                                            \
>  }
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index b3c57d5..9c47fdf 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -347,6 +347,11 @@ struct netdev_class {
>       * If the function returns a non-zero value, some of the packets might 
> have
>       * been sent anyway.
>       *
> +     * Some netdev provider - like in case of 'dpdk' - may buffer the batch
> +     * of packets into an intermediate queue.  Buffered packets shall be
> +     * transmitted when the packet count exceeds a threshold (or) by the
> +     * periodic call to the flush function.
> +     *
>       * If 'may_steal' is false, the caller retains ownership of all the
>       * packets.  If 'may_steal' is true, the caller transfers ownership of 
> all
>       * the packets to the network device, regardless of success.
> @@ -788,6 +793,9 @@ struct netdev_class {
>      /* Discards all packets waiting to be received from 'rx'. */
>      int (*rxq_drain)(struct netdev_rxq *rx);
>  
> +    /* Flush all packets waiting to be sent on 'qid' queue. */
> +    int (*txq_flush)(struct netdev *netdev, int qid, bool concurrent_txq);
> +
>      /* ## -------------------------------- ## */
>      /* ## netdev flow offloading functions ## */
>      /* ## -------------------------------- ## */
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index 64a3ba3..3c5eacf 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -951,10 +951,10 @@ netdev_vport_get_ifindex(const struct netdev *netdev_)
>      NULL,                   /* rx_recv */                   \
>      NULL,                   /* rx_wait */                   \
>      NULL,                   /* rx_drain */                  \
> +    NULL,                   /* tx_flush */                  \
>                                                              \
>      NETDEV_FLOW_OFFLOAD_API
>  
> -
>  #define TUNNEL_CLASS(NAME, DPIF_PORT, BUILD_HEADER, PUSH_HEADER, POP_HEADER, 
>   \
>                       GET_IFINDEX)                                            
>   \
>      { DPIF_PORT,                                                             
>   \
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 7e9896b..8250396 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -724,6 +724,15 @@ netdev_rxq_drain(struct netdev_rxq *rx)
>              : 0);
>  }
>  
> +/* Flush packets on the 'qid' queue. */
> +int
> +netdev_txq_flush(struct netdev *netdev, int qid, bool netdev_txq_flush)
> +{
> +    return (netdev->netdev_class->txq_flush
> +            ? netdev->netdev_class->txq_flush(netdev, qid, netdev_txq_flush)
> +            : EOPNOTSUPP);
> +}
> +
>  /* Configures the number of tx queues of 'netdev'. Returns 0 if successful,
>   * otherwise a positive errno value.
>   *
> diff --git a/lib/netdev.h b/lib/netdev.h
> index f8482f7..328a158 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -183,6 +183,7 @@ int netdev_rxq_drain(struct netdev_rxq *);
>  int netdev_send(struct netdev *, int qid, struct dp_packet_batch *,
>                  bool may_steal, bool concurrent_txq);
>  void netdev_send_wait(struct netdev *, int qid);
> +int netdev_txq_flush(struct netdev *, int qid, bool concurrent_txq);
>  
>  /* Flow offloading. */
>  struct offload_info {
> -- 
> 2.4.11

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to