DPDK can receive multiple packets but current netdev API does not allow that. Following patch allows dpif-netdev receive batch of packet in a rx_recv() call for any netdev port. This will be used by dpdk-netdev.
Signed-off-by: Pravin B Shelar <[email protected]> --- lib/automake.mk | 1 + lib/dpif-netdev.c | 51 ++++++++++++++++++------------------------------- lib/dpif-netdev.h | 40 ++++++++++++++++++++++++++++++++++++++ lib/netdev-dummy.c | 26 +++++++++---------------- lib/netdev-linux.c | 20 ++++++++++++++++--- lib/netdev-provider.h | 24 +++++------------------ lib/netdev.c | 24 +++++------------------ lib/netdev.h | 2 +- lib/packets.c | 9 +++++++++ lib/packets.h | 1 + 10 files changed, 107 insertions(+), 91 deletions(-) create mode 100644 lib/dpif-netdev.h diff --git a/lib/automake.mk b/lib/automake.mk index e22165c..832b7f9 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -52,6 +52,7 @@ lib_libopenvswitch_la_SOURCES = \ lib/dhparams.h \ lib/dirs.h \ lib/dpif-netdev.c \ + lib/dpif-netdev.h \ lib/dpif-provider.h \ lib/dpif.c \ lib/dpif.h \ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 3bbfd2a..d78fb25 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -34,6 +34,7 @@ #include "classifier.h" #include "csum.h" #include "dpif.h" +#include "dpif-netdev.h" #include "dpif-provider.h" #include "dummy.h" #include "dynamic-string.h" @@ -68,10 +69,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev); /* Configuration parameters. */ enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ -/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP - * headers to be aligned on a 4-byte boundary. */ -enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN }; - /* Queues. */ enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */ enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */ @@ -1443,6 +1440,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) { struct dp_netdev *dp = get_dp_netdev(dpif); struct pkt_metadata *md = &execute->md; + struct ofpbuf *packet; struct flow key; if (execute->packet->size < ETH_HEADER_LEN || @@ -1453,8 +1451,9 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) /* Extract flow key. */ flow_extract(execute->packet, md, &key); + packet = ofpbuf_clone(execute->packet); ovs_rwlock_rdlock(&dp->port_rwlock); - dp_netdev_execute_actions(dp, &key, execute->packet, md, execute->actions, + dp_netdev_execute_actions(dp, &key, packet, md, execute->actions, execute->actions_len); ovs_rwlock_unlock(&dp->port_rwlock); @@ -1586,12 +1585,10 @@ dp_forwarder_main(void *f_) { struct dp_forwarder *f = f_; struct dp_netdev *dp = f->dp; - struct ofpbuf packet; f->name = xasprintf("forwarder_%u", ovsthread_id_self()); set_subprogram_name("%s", f->name); - ofpbuf_init(&packet, 0); while (!latch_is_set(&dp->exit_latch)) { bool received_anything; int i; @@ -1605,25 +1602,19 @@ dp_forwarder_main(void *f_) if (port->rx && port->node.hash >= f->min_hash && port->node.hash <= f->max_hash) { - int buf_size; + struct ofpbuf *packets[MAX_RX_BATCH]; + int count; int error; - int mtu; - - if (netdev_get_mtu(port->netdev, &mtu)) { - mtu = ETH_PAYLOAD_MAX; - } - buf_size = DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + mtu; - - ofpbuf_clear(&packet); - ofpbuf_reserve_with_tailroom(&packet, DP_NETDEV_HEADROOM, - buf_size); - error = netdev_rx_recv(port->rx, &packet); + error = netdev_rx_recv(port->rx, packets, &count); if (!error) { + int i; struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no); - dp_netdev_port_input(dp, &packet, &md); + for (i = 0; i < count; i++) { + dp_netdev_port_input(dp, packets[i], &md); + } received_anything = true; } else if (error != EAGAIN && error != EOPNOTSUPP) { static struct vlog_rate_limit rl @@ -1659,7 +1650,6 @@ dp_forwarder_main(void *f_) poll_block(); } - ofpbuf_uninit(&packet); free(f->name); @@ -1741,6 +1731,7 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, } else { ovsthread_counter_inc(dp->n_missed, 1); dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL); + ofpbuf_delete(packet); } } @@ -1767,6 +1758,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, if (userdata) { buf_size += NLA_ALIGN(userdata->nla_len); } + buf_size += packet->size; ofpbuf_init(buf, buf_size); /* Put ODP flow. */ @@ -1780,10 +1772,8 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, NLA_ALIGN(userdata->nla_len)); } - /* Steal packet data. */ - ovs_assert(packet->source == OFPBUF_MALLOC); - upcall->packet = *packet; - ofpbuf_use(packet, NULL, 0); + upcall->packet.data = ofpbuf_put(buf, packet->data, packet->size); + upcall->packet.size = packet->size; seq_change(dp->queue_seq); @@ -1825,15 +1815,8 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); - /* Make a copy if we are not allowed to steal the packet's data. */ - if (!may_steal) { - packet = ofpbuf_clone_with_headroom(packet, DP_NETDEV_HEADROOM); - } dp_netdev_output_userspace(aux->dp, packet, DPIF_UC_ACTION, aux->key, userdata); - if (!may_steal) { - ofpbuf_uninit(packet); - } break; } case OVS_ACTION_ATTR_PUSH_VLAN: @@ -1846,6 +1829,10 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } + + if (may_steal) { + ofpbuf_delete(packet); + } } static void diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h new file mode 100644 index 0000000..64c93ee --- /dev/null +++ b/lib/dpif-netdev.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DPIF_NETDEV_H +#define DPIF_NETDEV_H 1 + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include "openvswitch/types.h" +#include "packets.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP + * headers to be aligned on a 4-byte boundary. */ +enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN }; + +enum { MAX_RX_BATCH = 256 }; /* Maximum number of flows in flow table. */ + +#ifdef __cplusplus +} +#endif + +#endif /* netdev.h */ diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index f23fc9f..27487f9 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -755,12 +755,11 @@ netdev_dummy_rx_dealloc(struct netdev_rx *rx_) } static int -netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer) +netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf **arr, int *c) { struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_); struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); struct ofpbuf *packet; - int retval; ovs_mutex_lock(&netdev->mutex); if (!list_is_empty(&rx->recv_queue)) { @@ -774,22 +773,15 @@ netdev_dummy_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer) if (!packet) { return EAGAIN; } + ovs_mutex_lock(&netdev->mutex); + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += packet->size; + ovs_mutex_unlock(&netdev->mutex); - if (packet->size <= ofpbuf_tailroom(buffer)) { - memcpy(buffer->data, packet->data, packet->size); - buffer->size += packet->size; - retval = 0; - - ovs_mutex_lock(&netdev->mutex); - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += packet->size; - ovs_mutex_unlock(&netdev->mutex); - } else { - retval = EMSGSIZE; - } - ofpbuf_delete(packet); - - return retval; + packet_set_size(packet, packet->size); + arr[0] = packet; + *c = 1; + return 0; } static void diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 75ce7c6..574a572 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -50,6 +50,7 @@ #include "connectivity.h" #include "coverage.h" #include "dpif-linux.h" +#include "dpif-netdev.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "hash.h" @@ -461,6 +462,7 @@ static int af_packet_sock(void); static bool netdev_linux_miimon_enabled(void); static void netdev_linux_miimon_run(void); static void netdev_linux_miimon_wait(void); +static int netdev_linux_get_mtu__(struct netdev_linux *netdev, int *mtup); static bool is_netdev_linux_class(const struct netdev_class *netdev_class) @@ -984,10 +986,19 @@ netdev_linux_rx_recv_tap(int fd, struct ofpbuf *buffer) } static int -netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer) +netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf **packet, int *c) { struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_); - int retval; + struct netdev *netdev = rx->up.netdev; + struct ofpbuf *buffer; + ssize_t retval; + int mtu; + + if (netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu)) { + mtu = ETH_PAYLOAD_MAX; + } + + buffer = ofpbuf_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu, DP_NETDEV_HEADROOM); retval = (rx->is_tap ? netdev_linux_rx_recv_tap(rx->fd, buffer) @@ -995,8 +1006,11 @@ netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer) if (retval && retval != EAGAIN && retval != EMSGSIZE) { VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", ovs_strerror(errno), netdev_rx_get_name(rx_)); + } else { + packet_set_size(buffer, buffer->size); + packet[0] = buffer; + *c = 1; } - return retval; } diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 673d3ab..9bacaa0 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -634,28 +634,14 @@ struct netdev_class { void (*rx_destruct)(struct netdev_rx *); void (*rx_dealloc)(struct netdev_rx *); - /* Attempts to receive a packet from 'rx' into the tailroom of 'buffer', - * which should initially be empty. If successful, returns 0 and - * increments 'buffer->size' by the number of bytes in the received packet, - * otherwise a positive errno value. Returns EAGAIN immediately if no - * packet is ready to be received. - * - * Must return EMSGSIZE, and discard the packet, if the received packet - * is longer than 'ofpbuf_tailroom(buffer)'. - * - * Implementations may make use of VLAN_HEADER_LEN bytes of tailroom to - * add a VLAN header which is obtained out-of-band to the packet. If - * this occurs then VLAN_HEADER_LEN bytes of tailroom will no longer be - * available for the packet, otherwise it may be used for the packet - * itself. - * - * It is advised that the tailroom of 'buffer' should be - * VLAN_HEADER_LEN bytes longer than the MTU to allow space for an - * out-of-band VLAN header to be added to the packet. + /* Attempts to receive batch of packets from 'rx' and place array of pointers + * into '*pkt'. netdev is responsible for allocating buffers. + * '*cnt' points to packet count for given batch. Once packets are returned + * to caller, netdev should give up ownership of ofbpuf data. * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ - int (*rx_recv)(struct netdev_rx *rx, struct ofpbuf *buffer); + int (*rx_recv)(struct netdev_rx *rx, struct ofpbuf **pkt, int *cnt); /* Registers with the poll loop to wake up from the next call to * poll_block() when a packet is ready to be received with netdev_rx_recv() diff --git a/lib/netdev.c b/lib/netdev.c index e9c8d8f..a058742 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -551,10 +551,7 @@ netdev_rx_close(struct netdev_rx *rx) } } -/* Attempts to receive a packet from 'rx' into the tailroom of 'buffer', which - * must initially be empty. If successful, returns 0 and increments - * 'buffer->size' by the number of bytes in the received packet, otherwise a - * positive errno value. +/* Attempts to receive batch packet from 'rx'. * * Returns EAGAIN immediately if no packet is ready to be received. * @@ -562,10 +559,7 @@ netdev_rx_close(struct netdev_rx *rx) * than 'ofpbuf_tailroom(buffer)'. * * Implementations may make use of VLAN_HEADER_LEN bytes of tailroom to - * add a VLAN header which is obtained out-of-band to the packet. If - * this occurs then VLAN_HEADER_LEN bytes of tailroom will no longer be - * available for the packet, otherwise it may be used for the packet - * itself. + * add a VLAN header which is obtained out-of-band to the packet. * * It is advised that the tailroom of 'buffer' should be * VLAN_HEADER_LEN bytes longer than the MTU to allow space for an @@ -575,23 +569,15 @@ netdev_rx_close(struct netdev_rx *rx) * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ int -netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer) +netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf **buffers, int *cnt) { int retval; - ovs_assert(buffer->size == 0); - ovs_assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN); - - retval = rx->netdev->netdev_class->rx_recv(rx, buffer); + retval = rx->netdev->netdev_class->rx_recv(rx, buffers, cnt); if (!retval) { COVERAGE_INC(netdev_received); - if (buffer->size < ETH_TOTAL_MIN) { - ofpbuf_put_zeros(buffer, ETH_TOTAL_MIN - buffer->size); - } - return 0; - } else { - return retval; } + return retval; } /* Arranges for poll_block() to wake up when a packet is ready to be received diff --git a/lib/netdev.h b/lib/netdev.h index 410c35b..bfd8f91 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -161,7 +161,7 @@ void netdev_rx_close(struct netdev_rx *); const char *netdev_rx_get_name(const struct netdev_rx *); -int netdev_rx_recv(struct netdev_rx *, struct ofpbuf *); +int netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf **buffers, int *cnt); void netdev_rx_wait(struct netdev_rx *); int netdev_rx_drain(struct netdev_rx *); diff --git a/lib/packets.c b/lib/packets.c index 3f7d6eb..6f2fca6 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1012,3 +1012,12 @@ void pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow) pkt_metadata_init(md, &flow->tunnel, flow->skb_priority, flow->pkt_mark, &flow->in_port); } + +void +packet_set_size(struct ofpbuf *b, int size) +{ + b->size = size; + if (b->size < ETH_TOTAL_MIN) { + ofpbuf_put_zeros(b, ETH_TOTAL_MIN - b->size); + } +} diff --git a/lib/packets.h b/lib/packets.h index e6b3303..f41a934 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -671,5 +671,6 @@ void packet_set_sctp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst); uint16_t packet_get_tcp_flags(const struct ofpbuf *, const struct flow *); void packet_format_tcp_flags(struct ds *, uint16_t); const char *packet_tcp_flag_to_string(uint32_t flag); +void packet_set_size(struct ofpbuf *b, int size); #endif /* packets.h */ -- 1.7.9.5 _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
