Introduce traffic cross namespaces transmission as local node.
By this way, throughput between nodes in namespace as fast as local.
Testcase:
$ip netns exec 1 benchmark_client -c 100
$ip netns exec 2 benchmark_server
Before:
+---------------------------------------------------------------------------------------------+
| Msg Size | # | # Msgs/ | Elapsed | Throughput
|
| [octets] | Conns | Conn | [ms]
+------------------------------------------------+
| | | | | Total [Msg/s] | Total [Mb/s] |
Per Conn [Mb/s] |
+---------------------------------------------------------------------------------------------+
| 64 | 100 | 64000 | 13005 | 492103 | 251 |
2 |
+---------------------------------------------------------------------------------------------+
| 256 | 100 | 32000 | 4964 | 644627 | 1320 |
13 |
+---------------------------------------------------------------------------------------------+
| 1024 | 100 | 16000 | 4524 | 353612 | 2896 |
28 |
+---------------------------------------------------------------------------------------------+
| 4096 | 100 | 8000 | 3675 | 217644 | 7131 |
71 |
+---------------------------------------------------------------------------------------------+
| 16384 | 100 | 4000 | 7914 | 50540 | 6624 |
66 |
+---------------------------------------------------------------------------------------------+
| 65536 | 100 | 2000 | 13000 | 15384 | 8065 |
80 |
+---------------------------------------------------------------------------------------------+
After:
+---------------------------------------------------------------------------------------------+
| Msg Size | # | # Msgs/ | Elapsed | Throughput
|
| [octets] | Conns | Conn | [ms]
+------------------------------------------------+
| | | | | Total [Msg/s] | Total [Mb/s] |
Per Conn [Mb/s] |
+---------------------------------------------------------------------------------------------+
| 64 | 100 | 64000 | 7842 | 816090 | 417 |
4 |
+---------------------------------------------------------------------------------------------+
| 256 | 100 | 32000 | 3593 | 890469 | 1823 |
18 |
+---------------------------------------------------------------------------------------------+
| 1024 | 100 | 16000 | 1835 | 871828 | 7142 |
71 |
+---------------------------------------------------------------------------------------------+
| 4096 | 100 | 8000 | 1134 | 704904 | 23098 |
230 |
+---------------------------------------------------------------------------------------------+
| 16384 | 100 | 4000 | 878 | 455295 | 59676 |
596 |
+---------------------------------------------------------------------------------------------+
| 65536 | 100 | 2000 | 1007 | 198487 | 104064 |
1040 |
+---------------------------------------------------------------------------------------------+
Signed-off-by: Hoang Le <[email protected]>
---
net/tipc/discover.c | 6 ++-
net/tipc/msg.h | 10 +++++
net/tipc/name_distr.c | 2 +-
net/tipc/node.c | 94 +++++++++++++++++++++++++++++++++++++++++--
net/tipc/node.h | 4 +-
net/tipc/socket.c | 6 +--
6 files changed, 111 insertions(+), 11 deletions(-)
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68e8a69..98d4eea97eb7 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -38,6 +38,8 @@
#include "node.h"
#include "discover.h"
+#include <net/netns/hash.h>
+
/* min delay during bearer start up */
#define TIPC_DISC_INIT msecs_to_jiffies(125)
/* max delay if bearer has no links */
@@ -94,6 +96,7 @@ static void tipc_disc_init_msg(struct net *net, struct
sk_buff *skb,
msg_set_dest_domain(hdr, dest_domain);
msg_set_bc_netid(hdr, tn->net_id);
b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+ msg_set_peer_net_hash(hdr, net_hash_mix(net));
msg_set_node_id(hdr, tipc_own_id(net));
}
@@ -200,6 +203,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
u8 peer_id[NODE_ID_LEN] = {0,};
u32 dst = msg_dest_domain(hdr);
u32 net_id = msg_bc_netid(hdr);
+ u32 pnet_hash = msg_peer_net_hash(hdr);
struct tipc_media_addr maddr;
u32 src = msg_prevnode(hdr);
u32 mtyp = msg_type(hdr);
@@ -242,7 +246,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
if (!tipc_in_scope(legacy, b->domain, src))
return;
tipc_node_check_dest(net, src, peer_id, b, caps, signature,
- &maddr, &respond, &dupl_addr);
+ pnet_hash, &maddr, &respond, &dupl_addr);
if (dupl_addr)
disc_dupl_alert(b, src, &maddr);
if (!respond)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 0daa6f04ca81..a8d0f28094f2 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -973,6 +973,16 @@ static inline void msg_set_grp_remitted(struct tipc_msg
*m, u16 n)
msg_set_bits(m, 9, 16, 0xffff, n);
}
+static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n)
+{
+ msg_set_word(m, 9, n);
+}
+
+static inline u32 msg_peer_net_hash(struct tipc_msg *m)
+{
+ return msg_word(m, 9);
+}
+
/* Word 10
*/
static inline u16 msg_grp_evt(struct tipc_msg *m)
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 836e629e8f4a..5feaf3b67380 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -146,7 +146,7 @@ static void named_distribute(struct net *net, struct
sk_buff_head *list,
struct publication *publ;
struct sk_buff *skb = NULL;
struct distr_item *item = NULL;
- u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) /
+ u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c8f6177dd5a2..9a4ffd647701 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -45,6 +45,8 @@
#include "netlink.h"
#include "trace.h"
+#include <net/netns/hash.h>
+
#define INVALID_NODE_SIG 0x10000
#define NODE_CLEANUP_AFTER 300000
@@ -126,6 +128,7 @@ struct tipc_node {
struct timer_list timer;
struct rcu_head rcu;
unsigned long delete_at;
+ struct net *pnet;
};
/* Node FSM states and events:
@@ -184,7 +187,7 @@ static struct tipc_link *node_active_link(struct tipc_node
*n, int sel)
return n->links[bearer_id].link;
}
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected)
{
struct tipc_node *n;
int bearer_id;
@@ -194,6 +197,14 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
if (unlikely(!n))
return mtu;
+ /* Allow MAX_MSG_SIZE when building connection oriented message
+ * if they are in the same core network
+ */
+ if (n->pnet && connected) {
+ tipc_node_put(n);
+ return mtu;
+ }
+
bearer_id = n->active_links[sel & 1];
if (likely(bearer_id != INVALID_BEARER_ID))
mtu = n->links[bearer_id].mtu;
@@ -361,11 +372,14 @@ static void tipc_node_write_unlock(struct tipc_node *n)
}
static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
- u8 *peer_id, u16 capabilities)
+ u8 *peer_id, u16 capabilities,
+ u32 signature, u32 pnet_hash)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *n, *temp_node;
+ struct tipc_net *tn_peer;
struct tipc_link *l;
+ struct net *tmp;
int bearer_id;
int i;
@@ -400,6 +414,23 @@ static struct tipc_node *tipc_node_create(struct net *net,
u32 addr,
memcpy(&n->peer_id, peer_id, 16);
n->net = net;
n->capabilities = capabilities;
+ n->pnet = NULL;
+ for_each_net_rcu(tmp) {
+ /* Integrity checking whether node exists in namespace or not */
+ if (net_hash_mix(tmp) != pnet_hash)
+ continue;
+ tn_peer = net_generic(tmp, tipc_net_id);
+ if (!tn_peer)
+ continue;
+
+ if ((tn_peer->random & 0x7fff) != (signature & 0x7fff))
+ continue;
+
+ if (!memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN)) {
+ n->pnet = tmp;
+ break;
+ }
+ }
kref_init(&n->kref);
rwlock_init(&n->lock);
INIT_HLIST_NODE(&n->hash);
@@ -979,7 +1010,7 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
void tipc_node_check_dest(struct net *net, u32 addr,
u8 *peer_id, struct tipc_bearer *b,
- u16 capabilities, u32 signature,
+ u16 capabilities, u32 signature, u32 pnet_hash,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr)
{
@@ -998,7 +1029,8 @@ void tipc_node_check_dest(struct net *net, u32 addr,
*dupl_addr = false;
*respond = false;
- n = tipc_node_create(net, addr, peer_id, capabilities);
+ n = tipc_node_create(net, addr, peer_id, capabilities, signature,
+ pnet_hash);
if (!n)
return;
@@ -1424,6 +1456,49 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg,
struct tipc_node *node)
return -EMSGSIZE;
}
+static void tipc_lxc_xmit(struct net *pnet, struct sk_buff_head *list)
+{
+ struct tipc_msg *hdr = buf_msg(skb_peek(list));
+ struct sk_buff_head inputq;
+
+ switch (msg_user(hdr)) {
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ if (msg_connected(hdr) || msg_named(hdr)) {
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(pnet, list);
+ return;
+ }
+ if (msg_mcast(hdr)) {
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(pnet, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ return;
+ }
+ return;
+ case MSG_FRAGMENTER:
+ if (tipc_msg_assemble(list)) {
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(pnet, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ }
+ return;
+ case LINK_PROTOCOL:
+ case NAME_DISTRIBUTOR:
+ case GROUP_PROTOCOL:
+ case CONN_MANAGER:
+ case TUNNEL_PROTOCOL:
+ case BCAST_PROTOCOL:
+ return;
+ default:
+ return;
+ };
+}
+
/**
* tipc_node_xmit() is the general link level function for message sending
* @net: the applicable net namespace
@@ -1439,6 +1514,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head
*list,
struct tipc_link_entry *le = NULL;
struct tipc_node *n;
struct sk_buff_head xmitq;
+ bool node_up = false;
int bearer_id;
int rc;
@@ -1455,6 +1531,16 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head
*list,
return -EHOSTUNREACH;
}
+ node_up = node_is_up(n);
+ if (node_up && n->pnet && check_net(n->pnet)) {
+ /* xmit inner linux container */
+ tipc_lxc_xmit(n->pnet, list);
+ if (likely(skb_queue_empty(list))) {
+ tipc_node_put(n);
+ return 0;
+ }
+ }
+
tipc_node_read_lock(n);
bearer_id = n->active_links[selector & 1];
if (unlikely(bearer_id == INVALID_BEARER_ID)) {
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 291d0ecd4101..11eb95ce358b 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -75,7 +75,7 @@ u32 tipc_node_get_addr(struct tipc_node *node);
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
struct tipc_bearer *bearer,
- u16 capabilities, u32 signature,
+ u16 capabilities, u32 signature, u32 pnet_hash,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr);
void tipc_node_delete_links(struct net *net, int bearer_id);
@@ -92,7 +92,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head
*subscr, u32 addr);
void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
bool tipc_node_is_up(struct net *net, u32 addr);
u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3b9f8cc328f5..fb24df03da6c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -854,7 +854,7 @@ static int tipc_send_group_msg(struct net *net, struct
tipc_sock *tsk,
/* Build message as chain of buffers */
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1388,7 +1388,7 @@ static int __tipc_sendmsg(struct socket *sock, struct
msghdr *m, size_t dlen)
return rc;
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1526,7 +1526,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk,
u32 peer_port,
sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
tipc_set_sk_state(sk, TIPC_ESTABLISHED);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
- tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
+ tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
__skb_queue_purge(&sk->sk_write_queue);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
--
2.20.1
_______________________________________________
tipc-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tipc-discussion