Sturart, test case looks like valuable. Do you plan to update that patch? I think it's better to have test case in separate commit.

Maxim.

On 04/02/15 14:07, Stuart Haslam wrote:
The linux-generic pktio implementations don't correctly handle socket
errors which occur while sending packets via odp_pktio_send(). The
behaviour is also not consistent across the three implementations.

The problems being addressed are;

  - calls may block indefinitely in certain error conditions
  - packets may be freed even though they weren't sent
  - return value doesn't accurately reflect number of packets sent
  - inconsistent use of __odp_errno

Signed-off-by: Stuart Haslam <[email protected]>
---
  platform/linux-generic/odp_packet_io.c     |   2 +-
  platform/linux-generic/odp_packet_socket.c | 157 +++++++++++++--------------
  test/validation/odp_pktio.c                | 163 +++++++++++++++++++++++++----
  3 files changed, 224 insertions(+), 98 deletions(-)

diff --git a/platform/linux-generic/odp_packet_io.c 
b/platform/linux-generic/odp_packet_io.c
index b04ce8b..0c7f2dd 100644
--- a/platform/linux-generic/odp_packet_io.c
+++ b/platform/linux-generic/odp_packet_io.c
@@ -26,7 +26,7 @@
  #include <errno.h>
/* MTU to be reported for the "loop" interface */
-#define PKTIO_LOOP_MTU 1500
+#define PKTIO_LOOP_MTU INT_MAX
  /* MAC address for the "loop" interface */
  static const char pktio_loop_mac[] = {0x02, 0xe9, 0x34, 0x80, 0x73, 0x01};
diff --git a/platform/linux-generic/odp_packet_socket.c b/platform/linux-generic/odp_packet_socket.c
index 2802c43..f6d7330 100644
--- a/platform/linux-generic/odp_packet_socket.c
+++ b/platform/linux-generic/odp_packet_socket.c
@@ -43,6 +43,9 @@
  #include <odp/helper/eth.h>
  #include <odp/helper/ip.h>
+/** determine if a socket read/write error should be reported */
+#define SOCK_ERR_REPORT(e) (e != EAGAIN && e != EWOULDBLOCK && e != EINTR)
+
  /** Provide a sendmmsg wrapper for systems with no libc or kernel support.
   *  As it is implemented as a weak symbol, it has zero effect on systems
   *  with both.
@@ -270,41 +273,34 @@ int recv_pkt_sock_basic(pkt_sock_t *const pkt_sock,
  int send_pkt_sock_basic(pkt_sock_t *const pkt_sock,
                        odp_packet_t pkt_table[], unsigned len)
  {
-       odp_packet_t pkt;
        uint8_t *frame;
        uint32_t frame_len;
-       unsigned i;
-       unsigned flags;
        int sockfd;
-       int nb_tx;
        int ret;
+       unsigned i, n;
sockfd = pkt_sock->sockfd;
-       flags = MSG_DONTWAIT;
        i = 0;
        while (i < len) {
-               pkt = pkt_table[i];
+               frame = odp_packet_l2_ptr(pkt_table[i], &frame_len);
- frame = odp_packet_l2_ptr(pkt, &frame_len);
-
-               ret = send(sockfd, frame, frame_len, flags);
+               ret = send(sockfd, frame, frame_len, MSG_DONTWAIT);
                if (odp_unlikely(ret == -1)) {
-                       if (odp_likely(errno == EAGAIN)) {
-                               flags = 0;      /* blocking for next rounds */
-                               continue;       /* resend buffer */
-                       } else {
-                               break;
+                       if (i == 0 && SOCK_ERR_REPORT(errno)) {
+                               __odp_errno = errno;
+                               ODP_ERR("send(basic): %s\n", strerror(errno));
+                               return -1;
                        }
+                       break;
                }
i++;
-       }                       /* end while */
-       nb_tx = i;
+       }
- for (i = 0; i < len; i++)
-               odp_packet_free(pkt_table[i]);
+       for (n = 0; n < i; ++n)
+               odp_packet_free(pkt_table[n]);
- return nb_tx;
+       return i;
  }
/*
@@ -383,9 +379,7 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
        struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX];
        int ret;
        int sockfd;
-       unsigned i;
-       unsigned sent_msgs = 0;
-       unsigned flags;
+       unsigned i, n;
if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
                return -1;
@@ -401,17 +395,24 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
                msgvec[i].msg_hdr.msg_iovlen = 1;
        }
- flags = MSG_DONTWAIT;
-       for (i = 0; i < len; i += sent_msgs) {
-               ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
-               sent_msgs = ret > 0 ? (unsigned)ret : 0;
-               flags = 0;      /* blocking for next rounds */
+       for (i = 0; i < len; ) {
+               ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
+               if (odp_unlikely(ret == -1)) {
+                       if (i == 0 && SOCK_ERR_REPORT(errno)) {
+                               __odp_errno = errno;
+                               ODP_ERR("sendmmsg(): %s\n", strerror(errno));
+                               return -1;
+                       }
+                       break;
+               }
+
+               i += (unsigned)ret;
        }
- for (i = 0; i < len; i++)
-               odp_packet_free(pkt_table[i]);
+       for (n = 0; n < i; ++n)
+               odp_packet_free(pkt_table[n]);
- return len;
+       return i;
  }
/*
@@ -538,49 +539,69 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct 
ring *ring,
  {
        union frame_map ppd;
        uint32_t pkt_len;
-       unsigned frame_num, next_frame_num;
+       unsigned first_frame_num, frame_num, next_frame_num;
        int ret;
-       unsigned i = 0;
+       unsigned n, i = 0;
+       unsigned nb_tx = 0;
+       int send_errno;
- frame_num = ring->frame_num;
+       first_frame_num = ring->frame_num;
+       frame_num = first_frame_num;
while (i < len) {
-               if (mmap_tx_kernel_ready(ring->rd[frame_num].iov_base)) {
-                       ppd.raw = ring->rd[frame_num].iov_base;
+               ppd.raw = ring->rd[frame_num].iov_base;
- next_frame_num = (frame_num + 1) % ring->rd_num;
+               if (!odp_unlikely(mmap_tx_kernel_ready(ppd.raw)))
+                       break;
- pkt_len = odp_packet_len(pkt_table[i]);
-                       ppd.v2->tp_h.tp_snaplen = pkt_len;
-                       ppd.v2->tp_h.tp_len = pkt_len;
+               next_frame_num = (frame_num + 1) % ring->rd_num;
- odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
-                                               (uint8_t *)ppd.raw +
-                                               TPACKET2_HDRLEN -
-                                               sizeof(struct sockaddr_ll));
+               pkt_len = odp_packet_len(pkt_table[i]);
+               ppd.v2->tp_h.tp_snaplen = pkt_len;
+               ppd.v2->tp_h.tp_len = pkt_len;
- mmap_tx_user_ready(ppd.raw);
+               odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
+                                       (uint8_t *)ppd.raw +
+                                       TPACKET2_HDRLEN -
+                                       sizeof(struct sockaddr_ll));
- odp_packet_free(pkt_table[i]);
-                       frame_num = next_frame_num;
-                       i++;
-               } else {
+               mmap_tx_user_ready(ppd.raw);
+
+               frame_num = next_frame_num;
+               i++;
+       }
+
+       ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
+       send_errno = errno;
+
+       /* On success, the return value indicates the number of bytes sent. On
+        * failure a value of -1 is returned, even if the failure occurred
+        * after some of the packets in the ring have already been sent, so we
+        * need to inspect the packet status to determine which were sent. */
+       for (n = first_frame_num; n < first_frame_num+i; ++n) {
+               struct tpacket2_hdr *hdr = ring->rd[n].iov_base;
+               if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) {
+                       nb_tx++;
+               } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) {
+                       /* status will be cleared on the next send request */
                        break;
                }
        }
- ring->frame_num = frame_num;
+       ring->frame_num += nb_tx;
- ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
-       if (ret == -1) {
-               if (errno != EAGAIN) {
-                       __odp_errno = errno;
-                       ODP_ERR("sendto(pkt mmap): %s\n", strerror(errno));
-                       return -1;
-               }
+       if (odp_unlikely(ret == -1 &&
+                        nb_tx == 0 &&
+                        SOCK_ERR_REPORT(send_errno))) {
+               __odp_errno = send_errno;
+               ODP_ERR("sendto(pkt mmap): %s\n", strerror(send_errno));
+               return -1;
        }
- return i;
+       for (i = 0; i < nb_tx; ++i)
+               odp_packet_free(pkt_table[i]);
+
+       return nb_tx;
  }
static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout)
@@ -624,21 +645,6 @@ static void mmap_fill_ring(struct ring *ring, odp_pool_t 
pool_hdl, int fanout)
        ring->flen = ring->req.tp_frame_size;
  }
-static int mmap_set_packet_loss_discard(int sock)
-{
-       int ret, discard = 1;
-
-       ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *)&discard,
-                        sizeof(discard));
-       if (ret == -1) {
-               __odp_errno = errno;
-               ODP_ERR("setsockopt(PACKET_LOSS): %s\n", strerror(errno));
-               return -1;
-       }
-
-       return 0;
-}
-
  static int mmap_setup_ring(int sock, struct ring *ring, int type,
                           odp_pool_t pool_hdl, int fanout)
  {
@@ -648,12 +654,6 @@ static int mmap_setup_ring(int sock, struct ring *ring, 
int type,
        ring->type = type;
        ring->version = TPACKET_V2;
- if (type == PACKET_TX_RING) {
-               ret = mmap_set_packet_loss_discard(sock);
-               if (ret != 0)
-                       return -1;
-       }
-
        mmap_fill_ring(ring, pool_hdl, fanout);
ret = setsockopt(sock, SOL_PACKET, type, &ring->req, sizeof(ring->req));
@@ -747,6 +747,7 @@ static int mmap_bind_sock(pkt_sock_mmap_t *pkt_sock, const 
char *netdev)
        return 0;
  }
+
  static int mmap_store_hw_addr(pkt_sock_mmap_t *const pkt_sock,
                              const char *netdev)
  {
diff --git a/test/validation/odp_pktio.c b/test/validation/odp_pktio.c
index a078efe..55df12e 100644
--- a/test/validation/odp_pktio.c
+++ b/test/validation/odp_pktio.c
@@ -21,6 +21,11 @@
  #define MAX_NUM_IFACES         2
  #define TEST_SEQ_INVALID       ((uint32_t)~0)
  #define TEST_SEQ_MAGIC         0x92749451
+#define TX_BATCH_LEN           4
+
+/** Maximum reported MTU size at which the transmit failure test will
+ *  attempt to send oversized packets. */
+#define TEST_MTU_OVERSIZE_LIMIT 65536
/** interface names used for testing */
  static const char *iface_name[MAX_NUM_IFACES];
@@ -34,6 +39,7 @@ typedef struct {
        odp_pktio_t id;
        odp_queue_t outq;
        odp_queue_t inq;
+       enum { PKTIN_MODE_RECV, PKTIN_MODE_POLL, PKTIN_MODE_SCHED } mode;
  } pktio_info_t;
/** magic number and sequence at start of UDP payload */
@@ -323,30 +329,40 @@ static odp_event_t queue_deq_wait_time(odp_queue_t queue, 
uint64_t ns)
        return ODP_EVENT_INVALID;
  }
-static odp_packet_t wait_for_packet(odp_queue_t queue,
+static odp_packet_t wait_for_packet(pktio_info_t *pktio_rx,
                                    uint32_t seq, uint64_t ns)
  {
        uint64_t start, now, diff;
        odp_event_t ev;
-       odp_packet_t pkt = ODP_PACKET_INVALID;
+       odp_packet_t pkt;
start = odp_time_cycles(); do {
-               if (queue != ODP_QUEUE_INVALID)
-                       ev = queue_deq_wait_time(queue, ns);
-               else
-                       ev  = odp_schedule(NULL, ns);
-
-               if (ev != ODP_EVENT_INVALID) {
-                       if (odp_event_type(ev) == ODP_EVENT_PACKET) {
-                               pkt = odp_packet_from_event(ev);
-                               if (pktio_pkt_seq(pkt) == seq)
-                                       return pkt;
+               pkt = ODP_PACKET_INVALID;
+
+               if (pktio_rx->mode == PKTIN_MODE_RECV) {
+                       odp_pktio_recv(pktio_rx->id, &pkt, 1);
+               } else {
+                       if (pktio_rx->mode == PKTIN_MODE_POLL)
+                               ev = queue_deq_wait_time(pktio_rx->inq, ns);
+                       else
+                               ev = odp_schedule(NULL, ns);
+
+                       if (ev != ODP_EVENT_INVALID) {
+                               if (odp_event_type(ev) == ODP_EVENT_PACKET)
+                                       pkt = odp_packet_from_event(ev);
+                               else
+                                       odp_buffer_free(
+                                               odp_buffer_from_event(ev));
                        }
+               }
- /* not interested in this event */
-                       odp_buffer_free(odp_buffer_from_event(ev));
+               if (pkt != ODP_PACKET_INVALID) {
+                       if (pktio_pkt_seq(pkt) == seq)
+                               return pkt;
+
+                       odp_packet_free(pkt);
                }
now = odp_time_cycles();
@@ -406,7 +422,7 @@ static void pktio_txrx_multi(pktio_info_t *pktio_a, 
pktio_info_t *pktio_b,
/* and wait for them to arrive back */
        for (i = 0; i < num_pkts; ++i) {
-               rx_pkt = wait_for_packet(pktio_b->inq, tx_seq[i], ODP_TIME_SEC);
+               rx_pkt = wait_for_packet(pktio_b, tx_seq[i], ODP_TIME_SEC);
if (rx_pkt == ODP_PACKET_INVALID)
                        break;
@@ -436,10 +452,13 @@ static void pktio_test_txrx(odp_queue_type_t q_type, int 
num_pkts)
                }
                create_inq(io->id);
                io->outq = odp_pktio_outq_getdef(io->id);
-               if (q_type == ODP_QUEUE_TYPE_POLL)
+               if (q_type == ODP_QUEUE_TYPE_POLL) {
+                       io->mode = PKTIN_MODE_POLL;
                        io->inq = odp_pktio_inq_getdef(io->id);
-               else
+               } else {
+                       io->mode = PKTIN_MODE_SCHED;
                        io->inq = ODP_QUEUE_INVALID;
+               }
        }
/* if we have two interfaces then send through one and receive on
@@ -461,7 +480,7 @@ static void test_odp_pktio_poll_queue(void)
static void test_odp_pktio_poll_multi(void)
  {
-       pktio_test_txrx(ODP_QUEUE_TYPE_POLL, 4);
+       pktio_test_txrx(ODP_QUEUE_TYPE_POLL, TX_BATCH_LEN);
  }
static void test_odp_pktio_sched_queue(void)
@@ -471,7 +490,7 @@ static void test_odp_pktio_sched_queue(void)
static void test_odp_pktio_sched_multi(void)
  {
-       pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, 4);
+       pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, TX_BATCH_LEN);
  }
static void test_odp_pktio_jumbo(void)
@@ -635,6 +654,111 @@ static void test_odp_pktio_close(void)
        CU_ASSERT_EQUAL(res, -1);
  }
+
+static void test_odp_pktio_send_failure(void)
+{
+       odp_pktio_t pktio_tx, pktio_rx;
+       odp_packet_t pkt_tbl[TX_BATCH_LEN];
+       uint32_t pkt_seq[TX_BATCH_LEN];
+       int ret, mtu, i = 0;
+       odp_pool_param_t pool_params;
+       odp_pool_t pkt_pool;
+       int long_pkt_idx = TX_BATCH_LEN/2;
+       pktio_info_t info_rx;
+
+       pktio_tx = create_pktio(iface_name[0]);
+       if (pktio_tx == ODP_PKTIO_INVALID) {
+               CU_FAIL("failed to open pktio");
+               return;
+       }
+
+       /* read the MTU from the transmit interface */
+       mtu = odp_pktio_mtu(pktio_tx);
+
+       if (mtu > TEST_MTU_OVERSIZE_LIMIT) {
+               CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
+               return;
+       }
+
+       /* configure the pool so that we can generate test packets larger
+        * than the interface MTU */
+       memset(&pool_params, 0, sizeof(pool_params));
+       pool_params.pkt.len     = mtu + 32;
+       pool_params.pkt.seg_len = pool_params.pkt.len;
+       pool_params.pkt.num     = TX_BATCH_LEN+1;
+       pool_params.type        = ODP_POOL_PACKET;
+       pkt_pool = odp_pool_create("pkt_pool_oversize",
+                                  ODP_SHM_NULL, &pool_params);
+       CU_ASSERT_FATAL(pkt_pool != ODP_POOL_INVALID);
+
+       if (num_ifaces > 1)
+               pktio_rx = create_pktio(iface_name[1]);
+       else
+               pktio_rx = pktio_tx;
+
+       /* generate a batch of packets with a single overly long packet
+        * in the middle */
+       for (i = 0; i < TX_BATCH_LEN; ++i) {
+               uint32_t pkt_len;
+               if (i == long_pkt_idx)
+                       pkt_len = pool_params.pkt.len;
+               else
+                       pkt_len = PKT_LEN_NORMAL;
+
+               pkt_tbl[i] = odp_packet_alloc(pkt_pool, pkt_len);
+               if (pkt_tbl[i] == ODP_PACKET_INVALID)
+                       break;
+
+               pkt_seq[i] = pktio_init_packet(pkt_tbl[i]);
+               if (pkt_seq[i] == TEST_SEQ_INVALID)
+                       break;
+       }
+
+       if (i != TX_BATCH_LEN) {
+               CU_FAIL("failed to generate test packets\n");
+               return;
+       }
+
+       /* try to send the batch with the long packet in the middle, the
+        * initial short packets should be sent */
+       odp_errno_zero();
+       ret = odp_pktio_send(pktio_tx, pkt_tbl, TX_BATCH_LEN);
+       CU_ASSERT(ret == long_pkt_idx);
+       CU_ASSERT(odp_errno() == 0);
+
+       info_rx.id   = pktio_rx;
+       info_rx.outq = ODP_QUEUE_INVALID;
+       info_rx.inq  = ODP_QUEUE_INVALID;
+       info_rx.mode = PKTIN_MODE_RECV;
+
+       for (i = 0; i < ret; ++i) {
+               pkt_tbl[i] = wait_for_packet(&info_rx,
+                                            pkt_seq[i], ODP_TIME_SEC);
+               if (pkt_tbl[i] == ODP_PACKET_INVALID)
+                       break;
+       }
+       CU_ASSERT(i == ret);
+
+       /* now try to send starting with the too-long packet and verify
+        * it fails */
+       odp_errno_zero();
+       ret = odp_pktio_send(pktio_tx,
+                            &pkt_tbl[long_pkt_idx],
+                            TX_BATCH_LEN-long_pkt_idx);
+       CU_ASSERT(ret == -1);
+       CU_ASSERT(odp_errno() != 0);
+
+       for (i = 0; i < TX_BATCH_LEN; ++i) {
+               if (pkt_tbl[i] != ODP_PACKET_INVALID)
+                       odp_packet_free(pkt_tbl[i]);
+       }
+
+       if (pktio_rx != pktio_tx)
+               CU_ASSERT(odp_pktio_close(pktio_rx) == 0);
+       CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
+       CU_ASSERT(odp_pool_destroy(pkt_pool) == 0);
+}
+
  static int init_pktio_suite(void)
  {
        iface_name[0] = getenv("ODP_PKTIO_IF0");
@@ -704,6 +828,7 @@ CU_TestInfo pktio_tests[] = {
        {"pktio promisc mode",        test_odp_pktio_promisc},
        {"pktio mac",         test_odp_pktio_mac},
        {"pktio inq_remdef",  test_odp_pktio_inq_remdef},
+       {"pktio send failure",        test_odp_pktio_send_failure},
        CU_TEST_INFO_NULL
  };

_______________________________________________
lng-odp mailing list
[email protected]
https://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to