The linux-generic pktio implementations don't correctly handle socket
errors which occur while sending packets via odp_pktio_send(). The
behaviour is also not consistent across the three implementations.
The problems being addressed are;
- calls may block indefinitely in certain error conditions
- packets may be freed even though they weren't sent
- return value doesn't accurately reflect number of packets sent
- inconsistent use of __odp_errno
Signed-off-by: Stuart Haslam <[email protected]>
---
platform/linux-generic/odp_packet_io.c | 2 +-
platform/linux-generic/odp_packet_socket.c | 157 +++++++++++++--------------
test/validation/odp_pktio.c | 163 +++++++++++++++++++++++++----
3 files changed, 224 insertions(+), 98 deletions(-)
diff --git a/platform/linux-generic/odp_packet_io.c
b/platform/linux-generic/odp_packet_io.c
index b04ce8b..0c7f2dd 100644
--- a/platform/linux-generic/odp_packet_io.c
+++ b/platform/linux-generic/odp_packet_io.c
@@ -26,7 +26,7 @@
#include <errno.h>
/* MTU to be reported for the "loop" interface */
-#define PKTIO_LOOP_MTU 1500
+#define PKTIO_LOOP_MTU INT_MAX
/* MAC address for the "loop" interface */
static const char pktio_loop_mac[] = {0x02, 0xe9, 0x34, 0x80, 0x73, 0x01};
diff --git a/platform/linux-generic/odp_packet_socket.c b/platform/linux-generic/odp_packet_socket.c
index 2802c43..f6d7330 100644
--- a/platform/linux-generic/odp_packet_socket.c
+++ b/platform/linux-generic/odp_packet_socket.c
@@ -43,6 +43,9 @@
#include <odp/helper/eth.h>
#include <odp/helper/ip.h>
+/** determine if a socket read/write error should be reported */
+#define SOCK_ERR_REPORT(e) (e != EAGAIN && e != EWOULDBLOCK && e != EINTR)
+
/** Provide a sendmmsg wrapper for systems with no libc or kernel support.
* As it is implemented as a weak symbol, it has zero effect on systems
* with both.
@@ -270,41 +273,34 @@ int recv_pkt_sock_basic(pkt_sock_t *const pkt_sock,
int send_pkt_sock_basic(pkt_sock_t *const pkt_sock,
odp_packet_t pkt_table[], unsigned len)
{
- odp_packet_t pkt;
uint8_t *frame;
uint32_t frame_len;
- unsigned i;
- unsigned flags;
int sockfd;
- int nb_tx;
int ret;
+ unsigned i, n;
sockfd = pkt_sock->sockfd;
- flags = MSG_DONTWAIT;
i = 0;
while (i < len) {
- pkt = pkt_table[i];
+ frame = odp_packet_l2_ptr(pkt_table[i], &frame_len);
- frame = odp_packet_l2_ptr(pkt, &frame_len);
-
- ret = send(sockfd, frame, frame_len, flags);
+ ret = send(sockfd, frame, frame_len, MSG_DONTWAIT);
if (odp_unlikely(ret == -1)) {
- if (odp_likely(errno == EAGAIN)) {
- flags = 0; /* blocking for next rounds */
- continue; /* resend buffer */
- } else {
- break;
+ if (i == 0 && SOCK_ERR_REPORT(errno)) {
+ __odp_errno = errno;
+ ODP_ERR("send(basic): %s\n", strerror(errno));
+ return -1;
}
+ break;
}
i++;
- } /* end while */
- nb_tx = i;
+ }
- for (i = 0; i < len; i++)
- odp_packet_free(pkt_table[i]);
+ for (n = 0; n < i; ++n)
+ odp_packet_free(pkt_table[n]);
- return nb_tx;
+ return i;
}
/*
@@ -383,9 +379,7 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
struct iovec iovecs[ODP_PACKET_SOCKET_MAX_BURST_TX];
int ret;
int sockfd;
- unsigned i;
- unsigned sent_msgs = 0;
- unsigned flags;
+ unsigned i, n;
if (odp_unlikely(len > ODP_PACKET_SOCKET_MAX_BURST_TX))
return -1;
@@ -401,17 +395,24 @@ int send_pkt_sock_mmsg(pkt_sock_t *const pkt_sock,
msgvec[i].msg_hdr.msg_iovlen = 1;
}
- flags = MSG_DONTWAIT;
- for (i = 0; i < len; i += sent_msgs) {
- ret = sendmmsg(sockfd, &msgvec[i], len - i, flags);
- sent_msgs = ret > 0 ? (unsigned)ret : 0;
- flags = 0; /* blocking for next rounds */
+ for (i = 0; i < len; ) {
+ ret = sendmmsg(sockfd, &msgvec[i], len - i, MSG_DONTWAIT);
+ if (odp_unlikely(ret == -1)) {
+ if (i == 0 && SOCK_ERR_REPORT(errno)) {
+ __odp_errno = errno;
+ ODP_ERR("sendmmsg(): %s\n", strerror(errno));
+ return -1;
+ }
+ break;
+ }
+
+ i += (unsigned)ret;
}
- for (i = 0; i < len; i++)
- odp_packet_free(pkt_table[i]);
+ for (n = 0; n < i; ++n)
+ odp_packet_free(pkt_table[n]);
- return len;
+ return i;
}
/*
@@ -538,49 +539,69 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct
ring *ring,
{
union frame_map ppd;
uint32_t pkt_len;
- unsigned frame_num, next_frame_num;
+ unsigned first_frame_num, frame_num, next_frame_num;
int ret;
- unsigned i = 0;
+ unsigned n, i = 0;
+ unsigned nb_tx = 0;
+ int send_errno;
- frame_num = ring->frame_num;
+ first_frame_num = ring->frame_num;
+ frame_num = first_frame_num;
while (i < len) {
- if (mmap_tx_kernel_ready(ring->rd[frame_num].iov_base)) {
- ppd.raw = ring->rd[frame_num].iov_base;
+ ppd.raw = ring->rd[frame_num].iov_base;
- next_frame_num = (frame_num + 1) % ring->rd_num;
+ if (!odp_unlikely(mmap_tx_kernel_ready(ppd.raw)))
+ break;
- pkt_len = odp_packet_len(pkt_table[i]);
- ppd.v2->tp_h.tp_snaplen = pkt_len;
- ppd.v2->tp_h.tp_len = pkt_len;
+ next_frame_num = (frame_num + 1) % ring->rd_num;
- odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
- (uint8_t *)ppd.raw +
- TPACKET2_HDRLEN -
- sizeof(struct sockaddr_ll));
+ pkt_len = odp_packet_len(pkt_table[i]);
+ ppd.v2->tp_h.tp_snaplen = pkt_len;
+ ppd.v2->tp_h.tp_len = pkt_len;
- mmap_tx_user_ready(ppd.raw);
+ odp_packet_copydata_out(pkt_table[i], 0, pkt_len,
+ (uint8_t *)ppd.raw +
+ TPACKET2_HDRLEN -
+ sizeof(struct sockaddr_ll));
- odp_packet_free(pkt_table[i]);
- frame_num = next_frame_num;
- i++;
- } else {
+ mmap_tx_user_ready(ppd.raw);
+
+ frame_num = next_frame_num;
+ i++;
+ }
+
+ ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
+ send_errno = errno;
+
+ /* On success, the return value indicates the number of bytes sent. On
+ * failure a value of -1 is returned, even if the failure occurred
+ * after some of the packets in the ring have already been sent, so we
+ * need to inspect the packet status to determine which were sent. */
+ for (n = first_frame_num; n < first_frame_num+i; ++n) {
+ struct tpacket2_hdr *hdr = ring->rd[n].iov_base;
+ if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) {
+ nb_tx++;
+ } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) {
+ /* status will be cleared on the next send request */
break;
}
}
- ring->frame_num = frame_num;
+ ring->frame_num += nb_tx;
- ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
- if (ret == -1) {
- if (errno != EAGAIN) {
- __odp_errno = errno;
- ODP_ERR("sendto(pkt mmap): %s\n", strerror(errno));
- return -1;
- }
+ if (odp_unlikely(ret == -1 &&
+ nb_tx == 0 &&
+ SOCK_ERR_REPORT(send_errno))) {
+ __odp_errno = send_errno;
+ ODP_ERR("sendto(pkt mmap): %s\n", strerror(send_errno));
+ return -1;
}
- return i;
+ for (i = 0; i < nb_tx; ++i)
+ odp_packet_free(pkt_table[i]);
+
+ return nb_tx;
}
static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout)
@@ -624,21 +645,6 @@ static void mmap_fill_ring(struct ring *ring, odp_pool_t
pool_hdl, int fanout)
ring->flen = ring->req.tp_frame_size;
}
-static int mmap_set_packet_loss_discard(int sock)
-{
- int ret, discard = 1;
-
- ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *)&discard,
- sizeof(discard));
- if (ret == -1) {
- __odp_errno = errno;
- ODP_ERR("setsockopt(PACKET_LOSS): %s\n", strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
static int mmap_setup_ring(int sock, struct ring *ring, int type,
odp_pool_t pool_hdl, int fanout)
{
@@ -648,12 +654,6 @@ static int mmap_setup_ring(int sock, struct ring *ring,
int type,
ring->type = type;
ring->version = TPACKET_V2;
- if (type == PACKET_TX_RING) {
- ret = mmap_set_packet_loss_discard(sock);
- if (ret != 0)
- return -1;
- }
-
mmap_fill_ring(ring, pool_hdl, fanout);
ret = setsockopt(sock, SOL_PACKET, type, &ring->req, sizeof(ring->req));
@@ -747,6 +747,7 @@ static int mmap_bind_sock(pkt_sock_mmap_t *pkt_sock, const
char *netdev)
return 0;
}
+
static int mmap_store_hw_addr(pkt_sock_mmap_t *const pkt_sock,
const char *netdev)
{
diff --git a/test/validation/odp_pktio.c b/test/validation/odp_pktio.c
index a078efe..55df12e 100644
--- a/test/validation/odp_pktio.c
+++ b/test/validation/odp_pktio.c
@@ -21,6 +21,11 @@
#define MAX_NUM_IFACES 2
#define TEST_SEQ_INVALID ((uint32_t)~0)
#define TEST_SEQ_MAGIC 0x92749451
+#define TX_BATCH_LEN 4
+
+/** Maximum reported MTU size at which the transmit failure test will
+ * attempt to send oversized packets. */
+#define TEST_MTU_OVERSIZE_LIMIT 65536
/** interface names used for testing */
static const char *iface_name[MAX_NUM_IFACES];
@@ -34,6 +39,7 @@ typedef struct {
odp_pktio_t id;
odp_queue_t outq;
odp_queue_t inq;
+ enum { PKTIN_MODE_RECV, PKTIN_MODE_POLL, PKTIN_MODE_SCHED } mode;
} pktio_info_t;
/** magic number and sequence at start of UDP payload */
@@ -323,30 +329,40 @@ static odp_event_t queue_deq_wait_time(odp_queue_t queue,
uint64_t ns)
return ODP_EVENT_INVALID;
}
-static odp_packet_t wait_for_packet(odp_queue_t queue,
+static odp_packet_t wait_for_packet(pktio_info_t *pktio_rx,
uint32_t seq, uint64_t ns)
{
uint64_t start, now, diff;
odp_event_t ev;
- odp_packet_t pkt = ODP_PACKET_INVALID;
+ odp_packet_t pkt;
start = odp_time_cycles();
do {
- if (queue != ODP_QUEUE_INVALID)
- ev = queue_deq_wait_time(queue, ns);
- else
- ev = odp_schedule(NULL, ns);
-
- if (ev != ODP_EVENT_INVALID) {
- if (odp_event_type(ev) == ODP_EVENT_PACKET) {
- pkt = odp_packet_from_event(ev);
- if (pktio_pkt_seq(pkt) == seq)
- return pkt;
+ pkt = ODP_PACKET_INVALID;
+
+ if (pktio_rx->mode == PKTIN_MODE_RECV) {
+ odp_pktio_recv(pktio_rx->id, &pkt, 1);
+ } else {
+ if (pktio_rx->mode == PKTIN_MODE_POLL)
+ ev = queue_deq_wait_time(pktio_rx->inq, ns);
+ else
+ ev = odp_schedule(NULL, ns);
+
+ if (ev != ODP_EVENT_INVALID) {
+ if (odp_event_type(ev) == ODP_EVENT_PACKET)
+ pkt = odp_packet_from_event(ev);
+ else
+ odp_buffer_free(
+ odp_buffer_from_event(ev));
}
+ }
- /* not interested in this event */
- odp_buffer_free(odp_buffer_from_event(ev));
+ if (pkt != ODP_PACKET_INVALID) {
+ if (pktio_pkt_seq(pkt) == seq)
+ return pkt;
+
+ odp_packet_free(pkt);
}
now = odp_time_cycles();
@@ -406,7 +422,7 @@ static void pktio_txrx_multi(pktio_info_t *pktio_a,
pktio_info_t *pktio_b,
/* and wait for them to arrive back */
for (i = 0; i < num_pkts; ++i) {
- rx_pkt = wait_for_packet(pktio_b->inq, tx_seq[i], ODP_TIME_SEC);
+ rx_pkt = wait_for_packet(pktio_b, tx_seq[i], ODP_TIME_SEC);
if (rx_pkt == ODP_PACKET_INVALID)
break;
@@ -436,10 +452,13 @@ static void pktio_test_txrx(odp_queue_type_t q_type, int
num_pkts)
}
create_inq(io->id);
io->outq = odp_pktio_outq_getdef(io->id);
- if (q_type == ODP_QUEUE_TYPE_POLL)
+ if (q_type == ODP_QUEUE_TYPE_POLL) {
+ io->mode = PKTIN_MODE_POLL;
io->inq = odp_pktio_inq_getdef(io->id);
- else
+ } else {
+ io->mode = PKTIN_MODE_SCHED;
io->inq = ODP_QUEUE_INVALID;
+ }
}
/* if we have two interfaces then send through one and receive on
@@ -461,7 +480,7 @@ static void test_odp_pktio_poll_queue(void)
static void test_odp_pktio_poll_multi(void)
{
- pktio_test_txrx(ODP_QUEUE_TYPE_POLL, 4);
+ pktio_test_txrx(ODP_QUEUE_TYPE_POLL, TX_BATCH_LEN);
}
static void test_odp_pktio_sched_queue(void)
@@ -471,7 +490,7 @@ static void test_odp_pktio_sched_queue(void)
static void test_odp_pktio_sched_multi(void)
{
- pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, 4);
+ pktio_test_txrx(ODP_QUEUE_TYPE_SCHED, TX_BATCH_LEN);
}
static void test_odp_pktio_jumbo(void)
@@ -635,6 +654,111 @@ static void test_odp_pktio_close(void)
CU_ASSERT_EQUAL(res, -1);
}
+
+static void test_odp_pktio_send_failure(void)
+{
+ odp_pktio_t pktio_tx, pktio_rx;
+ odp_packet_t pkt_tbl[TX_BATCH_LEN];
+ uint32_t pkt_seq[TX_BATCH_LEN];
+ int ret, mtu, i = 0;
+ odp_pool_param_t pool_params;
+ odp_pool_t pkt_pool;
+ int long_pkt_idx = TX_BATCH_LEN/2;
+ pktio_info_t info_rx;
+
+ pktio_tx = create_pktio(iface_name[0]);
+ if (pktio_tx == ODP_PKTIO_INVALID) {
+ CU_FAIL("failed to open pktio");
+ return;
+ }
+
+ /* read the MTU from the transmit interface */
+ mtu = odp_pktio_mtu(pktio_tx);
+
+ if (mtu > TEST_MTU_OVERSIZE_LIMIT) {
+ CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
+ return;
+ }
+
+ /* configure the pool so that we can generate test packets larger
+ * than the interface MTU */
+ memset(&pool_params, 0, sizeof(pool_params));
+ pool_params.pkt.len = mtu + 32;
+ pool_params.pkt.seg_len = pool_params.pkt.len;
+ pool_params.pkt.num = TX_BATCH_LEN+1;
+ pool_params.type = ODP_POOL_PACKET;
+ pkt_pool = odp_pool_create("pkt_pool_oversize",
+ ODP_SHM_NULL, &pool_params);
+ CU_ASSERT_FATAL(pkt_pool != ODP_POOL_INVALID);
+
+ if (num_ifaces > 1)
+ pktio_rx = create_pktio(iface_name[1]);
+ else
+ pktio_rx = pktio_tx;
+
+ /* generate a batch of packets with a single overly long packet
+ * in the middle */
+ for (i = 0; i < TX_BATCH_LEN; ++i) {
+ uint32_t pkt_len;
+ if (i == long_pkt_idx)
+ pkt_len = pool_params.pkt.len;
+ else
+ pkt_len = PKT_LEN_NORMAL;
+
+ pkt_tbl[i] = odp_packet_alloc(pkt_pool, pkt_len);
+ if (pkt_tbl[i] == ODP_PACKET_INVALID)
+ break;
+
+ pkt_seq[i] = pktio_init_packet(pkt_tbl[i]);
+ if (pkt_seq[i] == TEST_SEQ_INVALID)
+ break;
+ }
+
+ if (i != TX_BATCH_LEN) {
+ CU_FAIL("failed to generate test packets\n");
+ return;
+ }
+
+ /* try to send the batch with the long packet in the middle, the
+ * initial short packets should be sent */
+ odp_errno_zero();
+ ret = odp_pktio_send(pktio_tx, pkt_tbl, TX_BATCH_LEN);
+ CU_ASSERT(ret == long_pkt_idx);
+ CU_ASSERT(odp_errno() == 0);
+
+ info_rx.id = pktio_rx;
+ info_rx.outq = ODP_QUEUE_INVALID;
+ info_rx.inq = ODP_QUEUE_INVALID;
+ info_rx.mode = PKTIN_MODE_RECV;
+
+ for (i = 0; i < ret; ++i) {
+ pkt_tbl[i] = wait_for_packet(&info_rx,
+ pkt_seq[i], ODP_TIME_SEC);
+ if (pkt_tbl[i] == ODP_PACKET_INVALID)
+ break;
+ }
+ CU_ASSERT(i == ret);
+
+ /* now try to send starting with the too-long packet and verify
+ * it fails */
+ odp_errno_zero();
+ ret = odp_pktio_send(pktio_tx,
+ &pkt_tbl[long_pkt_idx],
+ TX_BATCH_LEN-long_pkt_idx);
+ CU_ASSERT(ret == -1);
+ CU_ASSERT(odp_errno() != 0);
+
+ for (i = 0; i < TX_BATCH_LEN; ++i) {
+ if (pkt_tbl[i] != ODP_PACKET_INVALID)
+ odp_packet_free(pkt_tbl[i]);
+ }
+
+ if (pktio_rx != pktio_tx)
+ CU_ASSERT(odp_pktio_close(pktio_rx) == 0);
+ CU_ASSERT(odp_pktio_close(pktio_tx) == 0);
+ CU_ASSERT(odp_pool_destroy(pkt_pool) == 0);
+}
+
static int init_pktio_suite(void)
{
iface_name[0] = getenv("ODP_PKTIO_IF0");
@@ -704,6 +828,7 @@ CU_TestInfo pktio_tests[] = {
{"pktio promisc mode", test_odp_pktio_promisc},
{"pktio mac", test_odp_pktio_mac},
{"pktio inq_remdef", test_odp_pktio_inq_remdef},
+ {"pktio send failure", test_odp_pktio_send_failure},
CU_TEST_INFO_NULL
};