For several f_tx_bulk functions in rte_port_{ethdev,ring,sched}.c,
it appears that the intent of the bsz_mask logic is to test whether
pkts_mask contains a full burst (i.e., the <tx_burst_sz> least
significant bits are set).

There are two problems with the bsz_mask code: 1) It truncates
by using the wrong size for local variable uint32_t bsz_mask, and
2) We may pass oversized bursts to the underlying ethdev/ring/sched,
e.g., tx_burst_sz=16, bsz_mask=0x8000, and pkts_mask=0x1ffff
(17 packets), results in expr==0, and we send a burst larger than
desired (and non-power-of-2) to the underlying tx burst interface.

We propose to effectively set bsz_mask = (1 << tx_burst_sz) - 1
(while avoiding truncation for tx_burst_sz=64), to cache the mask
value of a full burst, and then do a simple compare with pkts_mask
in each f_tx_bulk.

Signed-off-by: Robert Sanford <rsanford at akamai.com>
---
 lib/librte_port/rte_port_ethdev.c |   15 ++++-----------
 lib/librte_port/rte_port_ring.c   |   16 ++++------------
 lib/librte_port/rte_port_sched.c  |    7 ++-----
 3 files changed, 10 insertions(+), 28 deletions(-)

diff --git a/lib/librte_port/rte_port_ethdev.c 
b/lib/librte_port/rte_port_ethdev.c
index 1c34602..3fb4947 100644
--- a/lib/librte_port/rte_port_ethdev.c
+++ b/lib/librte_port/rte_port_ethdev.c
@@ -188,7 +188,7 @@ rte_port_ethdev_writer_create(void *params, int socket_id)
        port->queue_id = conf->queue_id;
        port->tx_burst_sz = conf->tx_burst_sz;
        port->tx_buf_count = 0;
-       port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+       port->bsz_mask = UINT64_MAX >> (64 - conf->tx_burst_sz);

        return port;
 }
@@ -229,12 +229,9 @@ rte_port_ethdev_writer_tx_bulk(void *port,
 {
        struct rte_port_ethdev_writer *p =
                (struct rte_port_ethdev_writer *) port;
-       uint32_t bsz_mask = p->bsz_mask;
        uint32_t tx_buf_count = p->tx_buf_count;
-       uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
-                       ((pkts_mask & bsz_mask) ^ bsz_mask);

-       if (expr == 0) {
+       if (pkts_mask == p->bsz_mask) {
                uint64_t n_pkts = __builtin_popcountll(pkts_mask);
                uint32_t n_pkts_ok;

@@ -369,7 +366,7 @@ rte_port_ethdev_writer_nodrop_create(void *params, int 
socket_id)
        port->queue_id = conf->queue_id;
        port->tx_burst_sz = conf->tx_burst_sz;
        port->tx_buf_count = 0;
-       port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+       port->bsz_mask = UINT64_MAX >> (64 - conf->tx_burst_sz);

        /*
         * When n_retries is 0 it means that we should wait for every packet to
@@ -435,13 +432,9 @@ rte_port_ethdev_writer_nodrop_tx_bulk(void *port,
 {
        struct rte_port_ethdev_writer_nodrop *p =
                (struct rte_port_ethdev_writer_nodrop *) port;
-
-       uint32_t bsz_mask = p->bsz_mask;
        uint32_t tx_buf_count = p->tx_buf_count;
-       uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
-                       ((pkts_mask & bsz_mask) ^ bsz_mask);

-       if (expr == 0) {
+       if (pkts_mask == p->bsz_mask) {
                uint64_t n_pkts = __builtin_popcountll(pkts_mask);
                uint32_t n_pkts_ok;

diff --git a/lib/librte_port/rte_port_ring.c b/lib/librte_port/rte_port_ring.c
index 765ecc5..b36e4ce 100644
--- a/lib/librte_port/rte_port_ring.c
+++ b/lib/librte_port/rte_port_ring.c
@@ -217,7 +217,7 @@ rte_port_ring_writer_create_internal(void *params, int 
socket_id,
        port->ring = conf->ring;
        port->tx_burst_sz = conf->tx_burst_sz;
        port->tx_buf_count = 0;
-       port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+       port->bsz_mask = UINT64_MAX >> (64 - conf->tx_burst_sz);
        port->is_multi = is_multi;

        return port;
@@ -299,13 +299,9 @@ rte_port_ring_writer_tx_bulk_internal(void *port,
 {
        struct rte_port_ring_writer *p =
                (struct rte_port_ring_writer *) port;
-
-       uint32_t bsz_mask = p->bsz_mask;
        uint32_t tx_buf_count = p->tx_buf_count;
-       uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
-                       ((pkts_mask & bsz_mask) ^ bsz_mask);

-       if (expr == 0) {
+       if (pkts_mask == p->bsz_mask) {
                uint64_t n_pkts = __builtin_popcountll(pkts_mask);
                uint32_t n_pkts_ok;

@@ -486,7 +482,7 @@ rte_port_ring_writer_nodrop_create_internal(void *params, 
int socket_id,
        port->ring = conf->ring;
        port->tx_burst_sz = conf->tx_burst_sz;
        port->tx_buf_count = 0;
-       port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+       port->bsz_mask = UINT64_MAX >> (64 - conf->tx_burst_sz);
        port->is_multi = is_multi;

        /*
@@ -613,13 +609,9 @@ rte_port_ring_writer_nodrop_tx_bulk_internal(void *port,
 {
        struct rte_port_ring_writer_nodrop *p =
                (struct rte_port_ring_writer_nodrop *) port;
-
-       uint32_t bsz_mask = p->bsz_mask;
        uint32_t tx_buf_count = p->tx_buf_count;
-       uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
-                       ((pkts_mask & bsz_mask) ^ bsz_mask);

-       if (expr == 0) {
+       if (pkts_mask == p->bsz_mask) {
                uint64_t n_pkts = __builtin_popcountll(pkts_mask);
                uint32_t n_pkts_ok;

diff --git a/lib/librte_port/rte_port_sched.c b/lib/librte_port/rte_port_sched.c
index c5ff8ab..5b6afc4 100644
--- a/lib/librte_port/rte_port_sched.c
+++ b/lib/librte_port/rte_port_sched.c
@@ -185,7 +185,7 @@ rte_port_sched_writer_create(void *params, int socket_id)
        port->sched = conf->sched;
        port->tx_burst_sz = conf->tx_burst_sz;
        port->tx_buf_count = 0;
-       port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+       port->bsz_mask = UINT64_MAX >> (64 - conf->tx_burst_sz);

        return port;
 }
@@ -214,12 +214,9 @@ rte_port_sched_writer_tx_bulk(void *port,
                uint64_t pkts_mask)
 {
        struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port;
-       uint32_t bsz_mask = p->bsz_mask;
        uint32_t tx_buf_count = p->tx_buf_count;
-       uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
-                       ((pkts_mask & bsz_mask) ^ bsz_mask);

-       if (expr == 0) {
+       if (pkts_mask == p->bsz_mask) {
                __rte_unused uint32_t nb_tx;
                uint64_t n_pkts = __builtin_popcountll(pkts_mask);

-- 
1.7.1

Reply via email to