[tipc-discussion] [PATCH] tipc: fix uninit-value in tipc_nl_node_reset_link_stats
syzbot found the following issue on: == BUG: KMSAN: uninit-value in strlen lib/string.c:495 [inline] BUG: KMSAN: uninit-value in strstr+0xb4/0x2e0 lib/string.c:840 strlen lib/string.c:495 [inline] strstr+0xb4/0x2e0 lib/string.c:840 tipc_nl_node_reset_link_stats+0x41e/0xba0 net/tipc/node.c:2582 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] genl_family_rcv_msg net/netlink/genetlink.c:775 [inline] genl_rcv_msg+0x103f/0x1260 net/netlink/genetlink.c:792 netlink_rcv_skb+0x3a5/0x6c0 net/netlink/af_netlink.c:2501 genl_rcv+0x3c/0x50 net/netlink/genetlink.c:803 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0xf3b/0x1270 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x1288/0x1440 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] sys_sendmsg+0xabc/0xe90 net/socket.c:2492 ___sys_sendmsg+0x2a5/0x350 net/socket.c:2546 __sys_sendmsg net/socket.c:2575 [inline] __do_sys_sendmsg net/socket.c:2584 [inline] __se_sys_sendmsg net/socket.c:2582 [inline] __x64_sys_sendmsg+0x367/0x540 net/socket.c:2582 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 == This is because link name string is not validated before it's used in calling strstr() and strlen(). Reported-by: syzbot+a73d24a22be5f...@syzkaller.appspotmail.com Signed-off-by: Hoang Le --- net/tipc/node.c | 8 1 file changed, 8 insertions(+) diff --git a/net/tipc/node.c b/net/tipc/node.c index b48d97cbbe29..23419a599471 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2561,6 +2561,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct tipc_net *tn = tipc_net(net); struct tipc_link_entry *le; + int len; if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2574,7 +2575,14 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) if (!attrs[TIPC_NLA_LINK_NAME]) return -EINVAL; + len = nla_len(attrs[TIPC_NLA_LINK_NAME]); + if (len <= 0) + return -EINVAL; + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!memchr(link_name, '\0', len)) + return -EINVAL; err = -EINVAL; if (!strcmp(link_name, tipc_bclink_name)) { -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: cleanup unused function
tipc_dest_list_len() is not being called anywhere. Clean it up. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/name_table.c | 11 --- net/tipc/name_table.h | 1 - 2 files changed, 12 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 1d8ba233d047..d1180370fdf4 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l) kfree(dst); } } - -int tipc_dest_list_len(struct list_head *l) -{ - struct tipc_dest *dst; - int i = 0; - - list_for_each_entry(dst, l, list) { - i++; - } - return i; -} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 259f95e3d99c..3bcd9ef8cee3 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port); bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); bool tipc_dest_del(struct list_head *l, u32 node, u32 port); void tipc_dest_list_purge(struct list_head *l); -int tipc_dest_list_len(struct list_head *l); #endif -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: fix use-after-free Read in tipc_named_reinit
syzbot found the following issue on: == BUG: KASAN: use-after-free in tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413 Read of size 8 at addr 88805299a000 by task kworker/1:9/23764 CPU: 1 PID: 23764 Comm: kworker/1:9 Not tainted 5.18.0-rc4-syzkaller-00878-g17d49e6e8012 #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events tipc_net_finalize_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xeb/0x495 mm/kasan/report.c:313 print_report mm/kasan/report.c:429 [inline] kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413 tipc_net_finalize+0x234/0x3d0 net/tipc/net.c:138 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 [...] == In the commit d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work"), the cancel_work_sync() function just to make sure ONLY the work tipc_net_finalize_work() is executing/pending on any CPU completed before tipc namespace is destroyed through tipc_exit_net(). But this function is not guaranteed the work is the last queued. So, the destroyed instance may be accessed in the work which will try to enqueue later. In order to completely fix, we re-order the calling of cancel_work_sync() to make sure the work tipc_net_finalize_work() was last queued and it must be completed by calling cancel_work_sync(). Reported-by: syzbot+47af19f3307fc9c5c...@syzkaller.appspotmail.com Fixes: d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work") Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: Hoang Le --- net/tipc/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 3f4542e0f065..434e70eabe08 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net) struct tipc_net *tn = tipc_net(net); tipc_detach_loopback(net); + tipc_net_stop(net); /* Make sure the tipc_net_finalize_work() finished */ cancel_work_sync(>work); - tipc_net_stop(net); - tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: cleanup unused function
tipc_dest_list_len() is not being called anywhere. Clean it up. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/name_table.c | 11 --- net/tipc/name_table.h | 1 - 2 files changed, 12 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 1d8ba233d047..d1180370fdf4 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l) kfree(dst); } } - -int tipc_dest_list_len(struct list_head *l) -{ - struct tipc_dest *dst; - int i = 0; - - list_for_each_entry(dst, l, list) { - i++; - } - return i; -} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 259f95e3d99c..3bcd9ef8cee3 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port); bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); bool tipc_dest_del(struct list_head *l, u32 node, u32 port); void tipc_dest_list_purge(struct list_head *l); -int tipc_dest_list_len(struct list_head *l); #endif -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH] tipc: cleanup unused function
tipc_dest_list_len() is not being called anywhere. Clean it up. Signed-off-by: Hoang Le --- net/tipc/name_table.c | 11 --- net/tipc/name_table.h | 1 - 2 files changed, 12 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 1d8ba233d047..d1180370fdf4 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l) kfree(dst); } } - -int tipc_dest_list_len(struct list_head *l) -{ - struct tipc_dest *dst; - int i = 0; - - list_for_each_entry(dst, l, list) { - i++; - } - return i; -} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 259f95e3d99c..3bcd9ef8cee3 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port); bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); bool tipc_dest_del(struct list_head *l, u32 node, u32 port); void tipc_dest_list_purge(struct list_head *l); -int tipc_dest_list_len(struct list_head *l); #endif -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH] tipc: fix use-after-free Read in tipc_named_reinit
syzbot found the following issue on: == BUG: KASAN: use-after-free in tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413 Read of size 8 at addr 88805299a000 by task kworker/1:9/23764 CPU: 1 PID: 23764 Comm: kworker/1:9 Not tainted 5.18.0-rc4-syzkaller-00878-g17d49e6e8012 #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events tipc_net_finalize_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xeb/0x495 mm/kasan/report.c:313 print_report mm/kasan/report.c:429 [inline] kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413 tipc_net_finalize+0x234/0x3d0 net/tipc/net.c:138 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 [...] == In the commit d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work"), the cancel_work_sync() function just to make sure ONLY the work tipc_net_finalize_work() is executing/pending on any CPU completed before tipc namespace is destroyed through tipc_exit_net(). But this function is not guaranteed the work is the last queued. So, the destroyed instance may be accessed in the work which will try to enqueue later. In order to completely fix, we re-order the calling of cancel_work_sync() to make sure the work tipc_net_finalize_work() was last queued and it must be completed by calling cancel_work_sync(). Reported-by: syzbot+47af19f3307fc9c5c...@syzkaller.appspotmail.com Fixes: d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work") Signed-off-by: Ying Xue Signed-off-by: Hoang Le --- net/tipc/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 3f4542e0f065..434e70eabe08 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net) struct tipc_net *tn = tipc_net(net); tipc_detach_loopback(net); + tipc_net_stop(net); /* Make sure the tipc_net_finalize_work() finished */ cancel_work_sync(>work); - tipc_net_stop(net); - tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net v3] tipc: check attribute length for bearer name
syzbot reported uninit-value: = BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline] BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725 string_nocheck lib/vsprintf.c:644 [inline] string+0x4f9/0x6f0 lib/vsprintf.c:725 vsnprintf+0x/0x3650 lib/vsprintf.c:2806 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50 _printk+0x18d/0x1cf kernel/printk/printk.c:2293 tipc_enable_bearer net/tipc/bearer.c:371 [inline] __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] - Do sanity check the attribute length for TIPC_NLA_BEARER_NAME. - Do not use 'illegal name' in printing message. Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com Fixes: cb30a63384bc ("tipc: refactor function tipc_enable_bearer()") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- v3: add Fixes tag in commit message. v2: remove unnecessary sanity check as Jakub's comment. --- net/tipc/bearer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6d39ca05f249..932c87b98eca 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, u32 i; if (!bearer_name_validate(name, _names)) { - errstr = "illegal name"; NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net v3] tipc: check attribute length for bearer name
syzbot reported uninit-value: = BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline] BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725 string_nocheck lib/vsprintf.c:644 [inline] string+0x4f9/0x6f0 lib/vsprintf.c:725 vsnprintf+0x/0x3650 lib/vsprintf.c:2806 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50 _printk+0x18d/0x1cf kernel/printk/printk.c:2293 tipc_enable_bearer net/tipc/bearer.c:371 [inline] __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] - Do sanity check the attribute length for TIPC_NLA_BEARER_NAME. - Do not use 'illegal name' in printing message. v3: add Fixes tag in commit message. v2: remove unnecessary sanity check as Jakub's comment. Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com Fixes: cb30a63384bc ("tipc: refactor function tipc_enable_bearer()") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bearer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6d39ca05f249..932c87b98eca 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, u32 i; if (!bearer_name_validate(name, _names)) { - errstr = "illegal name"; NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net v2] tipc: check attribute length for bearer name
syzbot reported uninit-value: = BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline] BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725 string_nocheck lib/vsprintf.c:644 [inline] string+0x4f9/0x6f0 lib/vsprintf.c:725 vsnprintf+0x/0x3650 lib/vsprintf.c:2806 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50 _printk+0x18d/0x1cf kernel/printk/printk.c:2293 tipc_enable_bearer net/tipc/bearer.c:371 [inline] __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] - Do sanity check the attribute length for TIPC_NLA_BEARER_NAME. - Do not use 'illegal name' in printing message. v2: remove unnecessary sanity check as Jakub's comment Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bearer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6d39ca05f249..932c87b98eca 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, u32 i; if (!bearer_name_validate(name, _names)) { - errstr = "illegal name"; NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: check attribute length for bearer name
syzbot reported uninit-value: = BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline] BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725 string_nocheck lib/vsprintf.c:644 [inline] string+0x4f9/0x6f0 lib/vsprintf.c:725 vsnprintf+0x/0x3650 lib/vsprintf.c:2806 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50 _printk+0x18d/0x1cf kernel/printk/printk.c:2293 tipc_enable_bearer net/tipc/bearer.c:371 [inline] __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] - Do sanity check the attribute length for TIPC_NLA_BEARER_NAME. - Do not use 'illegal name' in printing message. Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bearer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6d39ca05f249..0fd7554c7cde 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -258,10 +258,10 @@ static int tipc_enable_bearer(struct net *net, const char *name, char *errstr = ""; u32 i; - if (!bearer_name_validate(name, _names)) { - errstr = "illegal name"; + if (strlen(name) > TIPC_MAX_BEARER_NAME || + !bearer_name_validate(name, _names)) { NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: check attribute length for bearer name
syzbot reported uninit-value: = BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline] BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725 string_nocheck lib/vsprintf.c:644 [inline] string+0x4f9/0x6f0 lib/vsprintf.c:725 vsnprintf+0x/0x3650 lib/vsprintf.c:2806 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50 _printk+0x18d/0x1cf kernel/printk/printk.c:2293 tipc_enable_bearer net/tipc/bearer.c:371 [inline] __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] - Do sanity check the attribute length for TIPC_NLA_BEARER_NAME. - Do not use 'illegal name' in printing message. Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com Signed-off-by: Hoang Le --- net/tipc/bearer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6d39ca05f249..0fd7554c7cde 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -258,10 +258,10 @@ static int tipc_enable_bearer(struct net *net, const char *name, char *errstr = ""; u32 i; - if (!bearer_name_validate(name, _names)) { - errstr = "illegal name"; + if (strlen(name) > TIPC_MAX_BEARER_NAME || + !bearer_name_validate(name, _names)) { NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: fix the timer expires after interval 100ms
In the timer callback function tipc_sk_timeout(), we're trying to reschedule another timeout to retransmit a setup request if destination link is congested. But we use the incorrect timeout value (msecs_to_jiffies(100)) instead of (jiffies + msecs_to_jiffies(100)), so that the timer expires immediately, it's irrelevant for original description. In this commit we correct the timeout value in sk_reset_timer() Fixes: 6787927475e5 ("tipc: buffer overflow handling in listener socket") Acked-by: Ying Xue Signed-off-by: Hoang Le --- net/tipc/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7545321c3440..17f8c523e33b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) /* Try again later if dest link is congested */ if (tsk->cong_link_cnt) { - sk_reset_timer(sk, >sk_timer, msecs_to_jiffies(100)); + sk_reset_timer(sk, >sk_timer, + jiffies + msecs_to_jiffies(100)); return; } /* Prepare SYN for retransmit */ -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: fix the timer expires after interval 100ms
In the timer callback function tipc_sk_timeout(), we're trying to reschedule another timeout to retransmit a setup request if destination link is congested. But we use the incorrect timeout value (msecs_to_jiffies(100)) instead of (jiffies + msecs_to_jiffies(100)), so that the timer expires immediately, it's irrelevant for original description. In this commit we correct the timeout value in sk_reset_timer() Fixes: 6787927475e5 ("tipc: buffer overflow handling in listener socket") Signed-off-by: Hoang Le --- net/tipc/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7545321c3440..17f8c523e33b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) /* Try again later if dest link is congested */ if (tsk->cong_link_cnt) { - sk_reset_timer(sk, >sk_timer, msecs_to_jiffies(100)); + sk_reset_timer(sk, >sk_timer, + jiffies + msecs_to_jiffies(100)); return; } /* Prepare SYN for retransmit */ -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] Revert "tipc: use consistent GFP flags"
This reverts commit 86c3a3e964d910a62eeb277d60b2a60ebefa9feb. The tipc_aead_init() function can be calling from an interrupt routine. This allocation might sleep with GFP_KERNEL flag, hence the following BUG is reported. [ 17.657509] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:230 [ 17.660916] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/3 [ 17.664093] preempt_count: 302, expected: 0 [ 17.665619] RCU nest depth: 2, expected: 0 [ 17.667163] Preemption disabled at: [ 17.667165] [<>] 0x0 [ 17.669753] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: GW 5.16.0-rc4+ #1 [ 17.673006] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 17.675540] Call Trace: [ 17.676285] [ 17.676913] dump_stack_lvl+0x34/0x44 [ 17.678033] __might_resched.cold+0xd6/0x10f [ 17.679311] kmem_cache_alloc_trace+0x14d/0x220 [ 17.680663] tipc_crypto_start+0x4a/0x2b0 [tipc] [ 17.682146] ? kmem_cache_alloc_trace+0xd3/0x220 [ 17.683545] tipc_node_create+0x2f0/0x790 [tipc] [ 17.684956] tipc_node_check_dest+0x72/0x680 [tipc] [ 17.686706] ? ___cache_free+0x31/0x350 [ 17.688008] ? skb_release_data+0x128/0x140 [ 17.689431] tipc_disc_rcv+0x479/0x510 [tipc] [ 17.690904] tipc_rcv+0x71c/0x730 [tipc] [ 17.692219] ? __netif_receive_skb_core+0xb7/0xf60 [ 17.693856] tipc_l2_rcv_msg+0x5e/0x90 [tipc] [ 17.695333] __netif_receive_skb_list_core+0x20b/0x260 [ 17.697072] netif_receive_skb_list_internal+0x1bf/0x2e0 [ 17.698870] ? dev_gro_receive+0x4c2/0x680 [ 17.700255] napi_complete_done+0x6f/0x180 [ 17.701657] virtnet_poll+0x29c/0x42e [virtio_net] [ 17.703262] __napi_poll+0x2c/0x170 [ 17.704429] net_rx_action+0x22f/0x280 [ 17.705706] __do_softirq+0xfd/0x30a [ 17.706921] common_interrupt+0xa4/0xc0 [ 17.708206] [ 17.708922] [ 17.709651] asm_common_interrupt+0x1e/0x40 [ 17.711078] RIP: 0010:default_idle+0x18/0x20 Fixes: 86c3a3e964d9 ("tipc: use consistent GFP flags") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/crypto.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 81116312b753..9325479295b8 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -524,7 +524,7 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, return -EEXIST; /* Allocate a new AEAD */ - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (unlikely(!tmp)) return -ENOMEM; @@ -1463,7 +1463,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, return -EEXIST; /* Allocate crypto */ - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return -ENOMEM; @@ -1477,7 +1477,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, } /* Allocate statistic structure */ - c->stats = alloc_percpu(struct tipc_crypto_stats); + c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { if (c->wq) destroy_workqueue(c->wq); @@ -2450,7 +2450,7 @@ static void tipc_crypto_work_tx(struct work_struct *work) } /* Lets duplicate it first */ - skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_KERNEL); + skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC); rcu_read_unlock(); /* Now, generate new key, initiate & distribute it */ -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] Revert "tipc: use consistent GFP flags"
This reverts commit 86c3a3e964d910a62eeb277d60b2a60ebefa9feb. The tipc_aead_init() function can be calling from an interrupt routine. This allocation might sleep with GFP_KERNEL flag, hence the following BUG is reported. [ 17.657509] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:230 [ 17.660916] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/3 [ 17.664093] preempt_count: 302, expected: 0 [ 17.665619] RCU nest depth: 2, expected: 0 [ 17.667163] Preemption disabled at: [ 17.667165] [<>] 0x0 [ 17.669753] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: GW 5.16.0-rc4+ #1 [ 17.673006] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 17.675540] Call Trace: [ 17.676285] [ 17.676913] dump_stack_lvl+0x34/0x44 [ 17.678033] __might_resched.cold+0xd6/0x10f [ 17.679311] kmem_cache_alloc_trace+0x14d/0x220 [ 17.680663] tipc_crypto_start+0x4a/0x2b0 [tipc] [ 17.682146] ? kmem_cache_alloc_trace+0xd3/0x220 [ 17.683545] tipc_node_create+0x2f0/0x790 [tipc] [ 17.684956] tipc_node_check_dest+0x72/0x680 [tipc] [ 17.686706] ? ___cache_free+0x31/0x350 [ 17.688008] ? skb_release_data+0x128/0x140 [ 17.689431] tipc_disc_rcv+0x479/0x510 [tipc] [ 17.690904] tipc_rcv+0x71c/0x730 [tipc] [ 17.692219] ? __netif_receive_skb_core+0xb7/0xf60 [ 17.693856] tipc_l2_rcv_msg+0x5e/0x90 [tipc] [ 17.695333] __netif_receive_skb_list_core+0x20b/0x260 [ 17.697072] netif_receive_skb_list_internal+0x1bf/0x2e0 [ 17.698870] ? dev_gro_receive+0x4c2/0x680 [ 17.700255] napi_complete_done+0x6f/0x180 [ 17.701657] virtnet_poll+0x29c/0x42e [virtio_net] [ 17.703262] __napi_poll+0x2c/0x170 [ 17.704429] net_rx_action+0x22f/0x280 [ 17.705706] __do_softirq+0xfd/0x30a [ 17.706921] common_interrupt+0xa4/0xc0 [ 17.708206] [ 17.708922] [ 17.709651] asm_common_interrupt+0x1e/0x40 [ 17.711078] RIP: 0010:default_idle+0x18/0x20 Signed-off-by: Hoang Le --- net/tipc/crypto.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 81116312b753..9325479295b8 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -524,7 +524,7 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, return -EEXIST; /* Allocate a new AEAD */ - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (unlikely(!tmp)) return -ENOMEM; @@ -1463,7 +1463,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, return -EEXIST; /* Allocate crypto */ - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return -ENOMEM; @@ -1477,7 +1477,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, } /* Allocate statistic structure */ - c->stats = alloc_percpu(struct tipc_crypto_stats); + c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { if (c->wq) destroy_workqueue(c->wq); @@ -2450,7 +2450,7 @@ static void tipc_crypto_work_tx(struct work_struct *work) } /* Lets duplicate it first */ - skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_KERNEL); + skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC); rcu_read_unlock(); /* Now, generate new key, initiate & distribute it */ -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: increase timeout in tipc_sk_enqueue()
In tipc_sk_enqueue() we use hardcoded 2 jiffies to extract socket buffer from generic queue to particular socket. The 2 jiffies is too short in case there are other high priority tasks get CPU cycles for multiple jiffies update. As result, no buffer could be enqueued to particular socket. To solve this, we switch to use constant timeout 20msecs. Then, the function will be expired between 2 jiffies (CONFIG_100HZ) and 20 jiffies (CONFIG_1000HZ). Fixes: c637c1035534 ("tipc: resolve race problem at unicast message reception") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a0a27d87f631..ad570c2450be 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, u32 dport, struct sk_buff_head *xmitq) { - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + usecs_to_jiffies(2); struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: increase timeout in tipc_sk_enqueue()
In tipc_sk_enqueue() we use hardcoded 2 jiffies to extract socket buffer from generic queue to particular socket. The 2 jiffies is too short in case there are other high priority tasks get CPU cycles for multiple jiffies update. As result, no buffer could be enqueued to particular socket. To solve this, we switch to use to constant timeout 20msecs. Then, the function will be expired between 2 jiffies (CONFIG_100HZ) and 20 jiffies (CONFIG_1000HZ). Signed-off-by: Hoang Le --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a0a27d87f631..ad570c2450be 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, u32 dport, struct sk_buff_head *xmitq) { - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + usecs_to_jiffies(2); struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] Revert "tipc: Return the correct errno code"
This reverts commit 0efea3c649f0 because of: - The returning -ENOBUF error is fine on socket buffer allocation. - There is side effect in the calling path tipc_node_xmit()->tipc_link_xmit() when checking error code returning. Fixes: 0efea3c649f0 ("tipc: Return the correct errno code") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/link.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index cf586840caeb..1b7a487c8841 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, dnode, l->addr, dport, 0, 0); if (!skb) - return -ENOMEM; + return -ENOBUFS; msg_set_dest_droppable(buf_msg(skb), true); TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); skb_queue_tail(>wakeupq, skb); @@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l) * * Consumes the buffer chain. * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted - * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM + * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) @@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (!_skb) { kfree_skb(skb); __skb_queue_purge(list); - return -ENOMEM; + return -ENOBUFS; } __skb_queue_tail(transmq, skb); tipc_link_set_skb_retransmit_time(skb, l); -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] Revert "tipc: Return the correct errno code"
This reverts commit 0efea3c649f0 because of: - The returning -ENOBUF error is fine on socket buffer allocation. - There is side effect in the calling path tipc_node_xmit()->tipc_link_xmit() when checking error code returning. Fixes: 0efea3c649f0 ("tipc: Return the correct errno code") Signed-off-by: Hoang Le --- net/tipc/link.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index cf586840caeb..1b7a487c8841 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, dnode, l->addr, dport, 0, 0); if (!skb) - return -ENOMEM; + return -ENOBUFS; msg_set_dest_droppable(buf_msg(skb), true); TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); skb_queue_tail(>wakeupq, skb); @@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l) * * Consumes the buffer chain. * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted - * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM + * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) @@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (!_skb) { kfree_skb(skb); __skb_queue_purge(list); - return -ENOMEM; + return -ENOBUFS; } __skb_queue_tail(transmq, skb); tipc_link_set_skb_retransmit_time(skb, l); -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: fix sleeping in tipc accept routine
The release_sock() is blocking function, it would change the state after sleeping. In order to evaluate the stated condition outside the socket lock context, switch to use wait_woken() instead. Fixes: 6398e23cdb1d8 ("tipc: standardize accept routine") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/socket.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9b0b311c7ec1..2c71828b7e5c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2664,7 +2664,7 @@ static int tipc_listen(struct socket *sock, int len) static int tipc_wait_for_accept(struct socket *sock, long timeo) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; /* True wake-one mechanism for incoming connections: only @@ -2673,12 +2673,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * anymore, the common case will execute the loop only once. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), , - TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(>sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), ); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), ); } err = 0; if (!skb_queue_empty(>sk_receive_queue)) @@ -2690,7 +2690,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) if (signal_pending(current)) break; } - finish_wait(sk_sleep(sk), ); return err; } -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: fix sleeping in tipc accept routine
The release_sock() is blocking function, it would change the state after sleeping. In order to evaluate the stated condition outside the socket lock context, switch to use wait_woken() instead. Signed-off-by: Hoang Le --- net/tipc/socket.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9b0b311c7ec1..2c71828b7e5c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2664,7 +2664,7 @@ static int tipc_listen(struct socket *sock, int len) static int tipc_wait_for_accept(struct socket *sock, long timeo) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; /* True wake-one mechanism for incoming connections: only @@ -2673,12 +2673,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * anymore, the common case will execute the loop only once. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), , - TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(>sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), ); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), ); } err = 0; if (!skb_queue_empty(>sk_receive_queue)) @@ -2690,7 +2690,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) if (signal_pending(current)) break; } - finish_wait(sk_sleep(sk), ); return err; } -- 2.30.2 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"
This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046. Above fix is not correct and caused memory leak issue. Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv") Acked-by: Jon Maloy Acked-by: Tung Nguyen Signed-off-by: Hoang Le --- net/tipc/socket.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 58935cd0d068..53af72824c9c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1262,7 +1262,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_lock_bh(>lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(, inputq); - __skb_dequeue(arrvq); + /* Decrease the skb's refcnt as increasing in the +* function tipc_skb_peek +*/ + kfree_skb(__skb_dequeue(arrvq)); } spin_unlock_bh(>lock); __skb_queue_purge(); -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] tipc: make node link identity publish thread safe
The using of the node address and node link identity are not thread safe, meaning that two publications may be published the same values, as result one of them will get failure because of already existing in the name table. To avoid this we have to use the node address and node link identity values from inside the node item's write lock protection. Fixes: 50a3499ab853 ("tipc: simplify signature of tipc_namtbl_publish()") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/node.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 8217905348f4..81af92954c6c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(>lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, n->addr, n->capabilities); + tipc_publ_notify(net, publ_list, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, n->addr, n->capabilities); + tipc_named_node_up(net, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, n->addr, bearer_id); - tipc_nametbl_publish(net, , , n->link_id); + tipc_mon_peer_up(net, sk.node, bearer_id); + tipc_nametbl_publish(net, , , sk.ref); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, n->addr, bearer_id); - tipc_nametbl_withdraw(net, , , n->link_id); + tipc_mon_peer_down(net, sk.node, bearer_id); + tipc_nametbl_withdraw(net, , , sk.ref); } } -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2] tipc: call a sub-routine in separate socket
When receiving a result from first query to netlink, we may exec a another query inside the callback. If calling this sub-routine in the same socket, it will be discarded the result from previous exection. To avoid this we perform a nested query in separate socket. Fixes: 202102830663 ("tipc: use the libmnl functions in lib/mnl_utils.c") Signed-off-by: Hoang Le Acked-by: Jon Maloy --- tipc/bearer.c | 50 +- tipc/link.c | 15 +-- tipc/socket.c | 17 +++-- 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/tipc/bearer.c b/tipc/bearer.c index 2afc48b9b108..968293bc9160 100644 --- a/tipc/bearer.c +++ b/tipc/bearer.c @@ -20,7 +20,9 @@ #include #include #include +#include +#include "mnl_utils.h" #include "utils.h" #include "cmdl.h" #include "msg.h" @@ -98,16 +100,28 @@ static int get_netid_cb(const struct nlmsghdr *nlh, void *data) static int generate_multicast(short af, char *buf, int bufsize) { + struct mnlu_gen_socket bearer_nlg; struct nlmsghdr *nlh; int netid; + int err = 0; - nlh = msg_init(TIPC_NL_NET_GET); + err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME, + TIPC_GENL_V2_VERSION); + if (err) + return -1; + + nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_NET_GET, + NLM_F_REQUEST | NLM_F_DUMP); if (!nlh) { fprintf(stderr, "error, message initialization failed\n"); + mnlu_gen_socket_close(_nlg); return -1; } - if (msg_dumpit(nlh, get_netid_cb, )) { + + err = mnlu_gen_socket_sndrcv(_nlg, nlh, get_netid_cb, ); + if (err) { fprintf(stderr, "error, failed to fetch TIPC network id from kernel\n"); + mnlu_gen_socket_close(_nlg); return -EINVAL; } if (af == AF_INET) @@ -115,6 +129,7 @@ static int generate_multicast(short af, char *buf, int bufsize) else snprintf(buf, bufsize, "ff02::%u", netid); + mnlu_gen_socket_close(_nlg); return 0; } @@ -794,10 +809,35 @@ static int bearer_get_udp_cb(const struct nlmsghdr *nlh, void *data) if ((cb_data->attr == TIPC_NLA_UDP_REMOTE) && (cb_data->prop == UDP_PROP_IP) && opts[TIPC_NLA_UDP_MULTI_REMOTEIP]) { - struct genlmsghdr *genl = mnl_nlmsg_get_payload(cb_data->nlh); + struct mnlu_gen_socket bearer_nlg; + struct nlattr *attr; + struct nlmsghdr *h; + const char *bname; + int err = 0; + + err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME, + TIPC_GENL_V2_VERSION); + if (err) + return -1; + + h = mnlu_gen_socket_cmd_prepare(_nlg, + TIPC_NL_UDP_GET_REMOTEIP, + NLM_F_REQUEST | NLM_F_DUMP); + if (!h) { + fprintf(stderr, "error, message initialization failed\n"); + mnlu_gen_socket_close(_nlg); + return -1; + } - genl->cmd = TIPC_NL_UDP_GET_REMOTEIP; - return msg_dumpit(cb_data->nlh, bearer_dump_udp_cb, NULL); + attr = mnl_attr_nest_start(h, TIPC_NLA_BEARER); + bname = mnl_attr_get_str(attrs[TIPC_NLA_BEARER_NAME]); + mnl_attr_put_strz(h, TIPC_NLA_BEARER_NAME, bname); + mnl_attr_nest_end(h, attr); + + err = mnlu_gen_socket_sndrcv(_nlg, h, +bearer_dump_udp_cb, NULL); + mnlu_gen_socket_close(_nlg); + return err; } addr = mnl_attr_get_payload(opts[cb_data->attr]); diff --git a/tipc/link.c b/tipc/link.c index 2123f109c694..9994ada2a367 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -17,7 +17,9 @@ #include #include #include +#include +#include "mnl_utils.h" #include "cmdl.h" #include "msg.h" #include "link.h" @@ -993,13 +995,20 @@ exit: static int link_mon_peer_list(uint32_t mon_ref) { + struct mnlu_gen_socket link_nlg; struct nlmsghdr *nlh; struct nlattr *nest; int err = 0; - nlh = msg_init(TIPC_NL_MON_PEER_GET); + err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME, + TIPC_GENL_V2_VERSION); + if (err) + return -1; + nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_MON_PEER_GET, + NLM_F_REQUEST | NLM_F_DUMP); if (!nlh) { fprintf
[tipc-discussion] [iproute2-next v2] tipc: call a sub-routine in separate socket
When receiving a result from first query to netlink, we may exec a another query inside the callback. If calling this sub-routine in the same socket, it will be discarded the result from previous exection. To avoid this we perform a nested query in separate socket. Fixes: 202102830663 ("tipc: use the libmnl functions in lib/mnl_utils.c") Signed-off-by: Hoang Le --- tipc/bearer.c | 50 +- tipc/link.c | 15 +-- tipc/socket.c | 17 +++-- 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/tipc/bearer.c b/tipc/bearer.c index 2afc48b9b108..968293bc9160 100644 --- a/tipc/bearer.c +++ b/tipc/bearer.c @@ -20,7 +20,9 @@ #include #include #include +#include +#include "mnl_utils.h" #include "utils.h" #include "cmdl.h" #include "msg.h" @@ -98,16 +100,28 @@ static int get_netid_cb(const struct nlmsghdr *nlh, void *data) static int generate_multicast(short af, char *buf, int bufsize) { + struct mnlu_gen_socket bearer_nlg; struct nlmsghdr *nlh; int netid; + int err = 0; - nlh = msg_init(TIPC_NL_NET_GET); + err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME, + TIPC_GENL_V2_VERSION); + if (err) + return -1; + + nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_NET_GET, + NLM_F_REQUEST | NLM_F_DUMP); if (!nlh) { fprintf(stderr, "error, message initialization failed\n"); + mnlu_gen_socket_close(_nlg); return -1; } - if (msg_dumpit(nlh, get_netid_cb, )) { + + err = mnlu_gen_socket_sndrcv(_nlg, nlh, get_netid_cb, ); + if (err) { fprintf(stderr, "error, failed to fetch TIPC network id from kernel\n"); + mnlu_gen_socket_close(_nlg); return -EINVAL; } if (af == AF_INET) @@ -115,6 +129,7 @@ static int generate_multicast(short af, char *buf, int bufsize) else snprintf(buf, bufsize, "ff02::%u", netid); + mnlu_gen_socket_close(_nlg); return 0; } @@ -794,10 +809,35 @@ static int bearer_get_udp_cb(const struct nlmsghdr *nlh, void *data) if ((cb_data->attr == TIPC_NLA_UDP_REMOTE) && (cb_data->prop == UDP_PROP_IP) && opts[TIPC_NLA_UDP_MULTI_REMOTEIP]) { - struct genlmsghdr *genl = mnl_nlmsg_get_payload(cb_data->nlh); + struct mnlu_gen_socket bearer_nlg; + struct nlattr *attr; + struct nlmsghdr *h; + const char *bname; + int err = 0; + + err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME, + TIPC_GENL_V2_VERSION); + if (err) + return -1; + + h = mnlu_gen_socket_cmd_prepare(_nlg, + TIPC_NL_UDP_GET_REMOTEIP, + NLM_F_REQUEST | NLM_F_DUMP); + if (!h) { + fprintf(stderr, "error, message initialization failed\n"); + mnlu_gen_socket_close(_nlg); + return -1; + } - genl->cmd = TIPC_NL_UDP_GET_REMOTEIP; - return msg_dumpit(cb_data->nlh, bearer_dump_udp_cb, NULL); + attr = mnl_attr_nest_start(h, TIPC_NLA_BEARER); + bname = mnl_attr_get_str(attrs[TIPC_NLA_BEARER_NAME]); + mnl_attr_put_strz(h, TIPC_NLA_BEARER_NAME, bname); + mnl_attr_nest_end(h, attr); + + err = mnlu_gen_socket_sndrcv(_nlg, h, +bearer_dump_udp_cb, NULL); + mnlu_gen_socket_close(_nlg); + return err; } addr = mnl_attr_get_payload(opts[cb_data->attr]); diff --git a/tipc/link.c b/tipc/link.c index 2123f109c694..9994ada2a367 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -17,7 +17,9 @@ #include #include #include +#include +#include "mnl_utils.h" #include "cmdl.h" #include "msg.h" #include "link.h" @@ -993,13 +995,20 @@ exit: static int link_mon_peer_list(uint32_t mon_ref) { + struct mnlu_gen_socket link_nlg; struct nlmsghdr *nlh; struct nlattr *nest; int err = 0; - nlh = msg_init(TIPC_NL_MON_PEER_GET); + err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME, + TIPC_GENL_V2_VERSION); + if (err) + return -1; + nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_MON_PEER_GET, + NLM_F_REQUEST | NLM_F_DUMP); if (!nlh) { fprintf(stderr, "error, m
[tipc-discussion] [[re-send] net] tipc: make node link identity publish thread safe
The using of the node address and node link identity are not thread safe, meaning that two publications may be published the same values, as result one of them will get failure because of already existing in the name table. To avoid this we have to use the node address and node link identity values from inside the node item's write lock protection. Fixes: 50a3499ab853 ("tipc: simplify signature of tipc_namtbl_publish()") Signed-off-by: Hoang Le --- net/tipc/node.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 8217905348f4..81af92954c6c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(>lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, n->addr, n->capabilities); + tipc_publ_notify(net, publ_list, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, n->addr, n->capabilities); + tipc_named_node_up(net, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, n->addr, bearer_id); - tipc_nametbl_publish(net, , , n->link_id); + tipc_mon_peer_up(net, sk.node, bearer_id); + tipc_nametbl_publish(net, , , sk.ref); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, n->addr, bearer_id); - tipc_nametbl_withdraw(net, , , n->link_id); + tipc_mon_peer_down(net, sk.node, bearer_id); + tipc_nametbl_withdraw(net, , , sk.ref); } } -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net v2] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"
This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046. Above fix is not correct and caused memory leak issue. Acked-by: Tung Nguyen Signed-off-by: Hoang Le --- net/tipc/socket.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 58935cd0d068..53af72824c9c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1262,7 +1262,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_lock_bh(>lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(, inputq); - __skb_dequeue(arrvq); + /* Decrease the skb's refcnt as increasing in the +* function tipc_skb_peek +*/ + kfree_skb(__skb_dequeue(arrvq)); } spin_unlock_bh(>lock); __skb_queue_purge(); -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"
This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046. Above fix is not correct and caused memory leak issue: In the function tipc_skb_peek, skb's refcnt increasing. Then we have to call kfree_skb twice to decrease skb's refcnt and free a skb. Signed-off-by: Hoang Le --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 58935cd0d068..f21162aa0cf7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1262,7 +1262,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_lock_bh(>lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(, inputq); - __skb_dequeue(arrvq); + kfree_skb(__skb_dequeue(arrvq)); } spin_unlock_bh(>lock); __skb_queue_purge(); -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next] tipc: call a sub-routine in separate socket
When receiving a result from first query to netlink, we may exec a another query inside the callback. If calling this sub-routine in the same socket, it will be discarded the result from previous exection. To avoid this we perform a nested query in separate socket. Fixes: 202102830663 ("tipc: use the libmnl functions in lib/mnl_utils.c") Signed-off-by: Hoang Le --- tipc/link.c | 15 +-- tipc/socket.c | 17 +++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/tipc/link.c b/tipc/link.c index 2123f109c694..9994ada2a367 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -17,7 +17,9 @@ #include #include #include +#include +#include "mnl_utils.h" #include "cmdl.h" #include "msg.h" #include "link.h" @@ -993,13 +995,20 @@ exit: static int link_mon_peer_list(uint32_t mon_ref) { + struct mnlu_gen_socket link_nlg; struct nlmsghdr *nlh; struct nlattr *nest; int err = 0; - nlh = msg_init(TIPC_NL_MON_PEER_GET); + err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME, + TIPC_GENL_V2_VERSION); + if (err) + return -1; + nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_MON_PEER_GET, + NLM_F_REQUEST | NLM_F_DUMP); if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); + mnlu_gen_socket_close(_nlg); return -1; } @@ -1007,7 +1016,9 @@ static int link_mon_peer_list(uint32_t mon_ref) mnl_attr_put_u32(nlh, TIPC_NLA_MON_REF, mon_ref); mnl_attr_nest_end(nlh, nest); - err = msg_dumpit(nlh, link_mon_peer_list_cb, NULL); + err = mnlu_gen_socket_sndrcv(_nlg, nlh, link_mon_peer_list_cb, +NULL); + mnlu_gen_socket_close(_nlg); return err; } diff --git a/tipc/socket.c b/tipc/socket.c index deae12af4409..597ffd91af52 100644 --- a/tipc/socket.c +++ b/tipc/socket.c @@ -15,7 +15,9 @@ #include #include #include +#include +#include "mnl_utils.h" #include "cmdl.h" #include "msg.h" #include "socket.h" @@ -44,12 +46,21 @@ static int publ_list_cb(const struct nlmsghdr *nlh, void *data) static int publ_list(uint32_t sock) { + struct mnlu_gen_socket sock_nlg; struct nlmsghdr *nlh; struct nlattr *nest; + int err; - nlh = msg_init(TIPC_NL_PUBL_GET); + err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME, + TIPC_GENL_V2_VERSION); + if (err) + return -1; + + nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_PUBL_GET, + NLM_F_REQUEST | NLM_F_DUMP); if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); + mnlu_gen_socket_close(_nlg); return -1; } @@ -57,7 +68,9 @@ static int publ_list(uint32_t sock) mnl_attr_put_u32(nlh, TIPC_NLA_SOCK_REF, sock); mnl_attr_nest_end(nlh, nest); - return msg_dumpit(nlh, publ_list_cb, NULL); + err = mnlu_gen_socket_sndrcv(_nlg, nlh, publ_list_cb, NULL); + mnlu_gen_socket_close(_nlg); + return err; } static int sock_list_cb(const struct nlmsghdr *nlh, void *data) -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next] tipc: use the libmnl functions in lib/mnl_utils.c
To avoid code duplication, tipc should be converted to use the helper functions for working with libmnl in lib/mnl_utils.c Acked-by: Jon Maloy Signed-off-by: Hoang Le --- tipc/bearer.c| 38 ++ tipc/cmdl.c | 2 - tipc/link.c | 37 + tipc/media.c | 15 +++--- tipc/msg.c | 132 +++ tipc/msg.h | 2 +- tipc/nametable.c | 5 +- tipc/node.c | 33 +--- tipc/peer.c | 8 ++- tipc/socket.c| 10 ++-- tipc/tipc.c | 21 +++- 11 files changed, 83 insertions(+), 220 deletions(-) diff --git a/tipc/bearer.c b/tipc/bearer.c index 4470819e4a96..2afc48b9b108 100644 --- a/tipc/bearer.c +++ b/tipc/bearer.c @@ -21,9 +21,6 @@ #include #include -#include -#include - #include "utils.h" #include "cmdl.h" #include "msg.h" @@ -101,11 +98,11 @@ static int get_netid_cb(const struct nlmsghdr *nlh, void *data) static int generate_multicast(short af, char *buf, int bufsize) { - int netid; - char mnl_msg[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *nlh; + int netid; - if (!(nlh = msg_init(mnl_msg, TIPC_NL_NET_GET))) { + nlh = msg_init(TIPC_NL_NET_GET); + if (!nlh) { fprintf(stderr, "error, message initialization failed\n"); return -1; } @@ -399,7 +396,6 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const struct cmd *cmd, { int err; char *media; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct opt *opt; struct nlattr *attrs; struct opt opts[] = { @@ -435,7 +431,8 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ADD))) { + nlh = msg_init(TIPC_NL_BEARER_ADD); + if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } @@ -482,7 +479,6 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd, int err; struct opt *opt; struct nlattr *nest; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct opt opts[] = { { "device", OPT_KEYVAL, NULL }, { "domain", OPT_KEYVAL, NULL }, @@ -508,7 +504,8 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ENABLE))) { + nlh = msg_init(TIPC_NL_BEARER_ENABLE); + if (!nlh) { fprintf(stderr, "error: message initialisation failed\n"); return -1; } @@ -563,7 +560,6 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { int err; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlattr *nest; struct opt opts[] = { { "device", OPT_KEYVAL, NULL }, @@ -584,7 +580,8 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_DISABLE))) { + nlh = msg_init(TIPC_NL_BEARER_DISABLE); + if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } @@ -628,7 +625,6 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, int err; int val; int prop; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlattr *props; struct nlattr *attrs; struct opt opts[] = { @@ -675,7 +671,8 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, } } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_SET))) { + nlh = msg_init(TIPC_NL_BEARER_SET); + if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } @@ -876,7 +873,6 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const struct cmd *cmd, { int err; char *media; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct opt *opt; struct cb_data cb_data = {0}; struct nlattr *attrs; @@ -918,7 +914,8 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_GET))) { + nlh = msg_init(TIPC_NL_BEARER_GET); + if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } @@ -956,7 +953,6 @@ static int cmd_bearer_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd, {
[tipc-discussion] [net] tipc: fix unique bearer names sanity check
When enabling a bearer by name, we don't sanity check its name with higher slot in bearer list. This may have the effect that the name of an already enabled bearer bypasses the check. To fix the above issue, we just perform an extra checking with all existing bearers. Fixes: cb30a63384bc9 ("tipc: refactor function tipc_enable_bearer()") Cc: sta...@vger.kernel.org Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bearer.c | 46 +++--- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d47e0b940ac9..443f8e5b9477 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -256,6 +256,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, int bearer_id = 0; int res = -EINVAL; char *errstr = ""; + u32 i; if (!bearer_name_validate(name, _names)) { errstr = "illegal name"; @@ -280,31 +281,38 @@ static int tipc_enable_bearer(struct net *net, const char *name, prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ - while (bearer_id < MAX_BEARERS) { - b = rtnl_dereference(tn->bearer_list[bearer_id]); - if (!b) - break; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + while (i-- != 0) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { + bearer_id = i; + continue; + } if (!strcmp(name, b->name)) { errstr = "already enabled"; NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } - bearer_id++; - if (b->priority != prio) - continue; - if (++with_this_prio <= 2) - continue; - pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", - name, prio); - if (prio == TIPC_MIN_LINK_PRI) { - errstr = "cannot adjust to lower"; - NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); - goto rejected; + + if (b->priority == prio && + (++with_this_prio > 2)) { + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); + goto rejected; + } + + pr_warn("Bearer <%s>: trying with adjusted priority\n", + name); + prio--; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + with_this_prio = 1; } - pr_warn("Bearer <%s>: trying with adjusted priority\n", name); - prio--; - bearer_id = 0; - with_this_prio = 1; } if (bearer_id >= MAX_BEARERS) { -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net v2] tipc: fix unique bearer names sanity check
When enabling a bearer with identify by name, we don't sanity check its name with higher slot in bearer list. This lead to duplicate bearer names bypassed the check. To fix the above issue, we just perform an extra checking with all existing bearers. Fixes: cb30a63384bc9 ("tipc: refactor function tipc_enable_bearer()") Signed-off-by: Hoang Le --- net/tipc/bearer.c | 45 ++--- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d47e0b940ac9..94eddc67d52e 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -256,6 +256,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, int bearer_id = 0; int res = -EINVAL; char *errstr = ""; + u32 i; if (!bearer_name_validate(name, _names)) { errstr = "illegal name"; @@ -280,31 +281,37 @@ static int tipc_enable_bearer(struct net *net, const char *name, prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ - while (bearer_id < MAX_BEARERS) { - b = rtnl_dereference(tn->bearer_list[bearer_id]); - if (!b) - break; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + while (i-- != 0) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { + bearer_id = i; + continue; + } if (!strcmp(name, b->name)) { errstr = "already enabled"; NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } - bearer_id++; - if (b->priority != prio) - continue; - if (++with_this_prio <= 2) - continue; - pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", - name, prio); - if (prio == TIPC_MIN_LINK_PRI) { - errstr = "cannot adjust to lower"; - NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); - goto rejected; + + if (b->priority == prio && + (++with_this_prio > 2)) { + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); + goto rejected; + } + + pr_warn("Bearer <%s>: trying with adjusted priority\n", name); + prio--; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + with_this_prio = 1; } - pr_warn("Bearer <%s>: trying with adjusted priority\n", name); - prio--; - bearer_id = 0; - with_this_prio = 1; } if (bearer_id >= MAX_BEARERS) { -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next] tipc: use the libmnl functions in lib/mnl_utils.c
To avoid duplication, tipc should be converted to use the helper functions for working with libmnl in lib/mnl_utils.c Signed-off-by: Hoang Le --- tipc/bearer.c| 38 ++ tipc/cmdl.c | 2 - tipc/link.c | 37 + tipc/media.c | 15 +++--- tipc/msg.c | 132 +++ tipc/msg.h | 2 +- tipc/nametable.c | 5 +- tipc/node.c | 33 +--- tipc/peer.c | 8 ++- tipc/socket.c| 10 ++-- tipc/tipc.c | 21 +++- 11 files changed, 83 insertions(+), 220 deletions(-) diff --git a/tipc/bearer.c b/tipc/bearer.c index 4470819e4a96..2afc48b9b108 100644 --- a/tipc/bearer.c +++ b/tipc/bearer.c @@ -21,9 +21,6 @@ #include #include -#include -#include - #include "utils.h" #include "cmdl.h" #include "msg.h" @@ -101,11 +98,11 @@ static int get_netid_cb(const struct nlmsghdr *nlh, void *data) static int generate_multicast(short af, char *buf, int bufsize) { - int netid; - char mnl_msg[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *nlh; + int netid; - if (!(nlh = msg_init(mnl_msg, TIPC_NL_NET_GET))) { + nlh = msg_init(TIPC_NL_NET_GET); + if (!nlh) { fprintf(stderr, "error, message initialization failed\n"); return -1; } @@ -399,7 +396,6 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const struct cmd *cmd, { int err; char *media; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct opt *opt; struct nlattr *attrs; struct opt opts[] = { @@ -435,7 +431,8 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ADD))) { + nlh = msg_init(TIPC_NL_BEARER_ADD); + if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } @@ -482,7 +479,6 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd, int err; struct opt *opt; struct nlattr *nest; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct opt opts[] = { { "device", OPT_KEYVAL, NULL }, { "domain", OPT_KEYVAL, NULL }, @@ -508,7 +504,8 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ENABLE))) { + nlh = msg_init(TIPC_NL_BEARER_ENABLE); + if (!nlh) { fprintf(stderr, "error: message initialisation failed\n"); return -1; } @@ -563,7 +560,6 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { int err; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlattr *nest; struct opt opts[] = { { "device", OPT_KEYVAL, NULL }, @@ -584,7 +580,8 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_DISABLE))) { + nlh = msg_init(TIPC_NL_BEARER_DISABLE); + if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } @@ -628,7 +625,6 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, int err; int val; int prop; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlattr *props; struct nlattr *attrs; struct opt opts[] = { @@ -675,7 +671,8 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, } } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_SET))) { + nlh = msg_init(TIPC_NL_BEARER_SET); + if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } @@ -876,7 +873,6 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const struct cmd *cmd, { int err; char *media; - char buf[MNL_SOCKET_BUFFER_SIZE]; struct opt *opt; struct cb_data cb_data = {0}; struct nlattr *attrs; @@ -918,7 +914,8 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } - if (!(nlh = msg_init(buf, TIPC_NL_BEARER_GET))) { + nlh = msg_init(TIPC_NL_BEARER_GET); + if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } @@ -956,7 +953,6 @@ static int cmd_bearer_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd, { in
[tipc-discussion] [net] tipc: fix unique bearer names sanity check
When enabling a bearer with identify by name, we don't sanity check its name with higher slot in bearer list. This lead to duplicate bearer names bypassed the check. To fix the above issue, we just perform an extra checking with all existing bearers. Fixes: cb30a63384bc9 ("tipc: refactor function tipc_enable_bearer()") Signed-off-by: Hoang Le --- net/tipc/bearer.c | 13 + 1 file changed, 13 insertions(+) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d47e0b940ac9..6fae68f0e654 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -256,6 +256,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, int bearer_id = 0; int res = -EINVAL; char *errstr = ""; + u32 i; if (!bearer_name_validate(name, _names)) { errstr = "illegal name"; @@ -313,6 +314,18 @@ static int tipc_enable_bearer(struct net *net, const char *name, goto rejected; } + /* Check new bearer from free slot up to MAX_BEARERS */ + for (i = bearer_id + 1; i <= MAX_BEARERS; ++i) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) + continue; + if (!strcmp(name, b->name)) { + errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); + goto rejected; + } + } + b = kzalloc(sizeof(*b), GFP_ATOMIC); if (!b) return -ENOMEM; -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: fix kernel-doc warnings
Fix kernel-doc warning introduced in commit b83e214b2e04 ("tipc: add extack messages for bearer/media failure"): net/tipc/bearer.c:248: warning: Function parameter or member 'extack' not described in 'tipc_enable_bearer' Fixes: b83e214b2e04 ("tipc: add extack messages for bearer/media failure") Signed-off-by: Hoang Le --- net/tipc/bearer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 1090f21fcfac..d47e0b940ac9 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -240,6 +240,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) * @disc_domain: bearer domain * @prio: bearer priority * @attr: nlattr array + * @extack: netlink extended ack */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next] tipc: add support for the netlink extack
Add support extack in tipc to dump the netlink extack error messages (i.e -EINVAL) sent from kernel. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- tipc/msg.c | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tipc/msg.c b/tipc/msg.c index dc09d05048f3..f29b2f8d35ad 100644 --- a/tipc/msg.c +++ b/tipc/msg.c @@ -18,6 +18,7 @@ #include #include +#include "libnetlink.h" #include "msg.h" int parse_attrs(const struct nlattr *attr, void *data) @@ -49,6 +50,7 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) { int ret; struct mnl_socket *nl; + int one = 1; nl = mnl_socket_open(NETLINK_GENERIC); if (nl == NULL) { @@ -56,6 +58,8 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) return NULL; } + /* support to get extended ACK */ + mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, , sizeof(one)); ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID); if (ret < 0) { perror("mnl_socket_bind"); @@ -73,21 +77,32 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int seq) { - int ret; unsigned int portid; char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *h; + size_t num_bytes; + int is_err = 0; + int ret = 0; portid = mnl_socket_get_portid(nl); - ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); - while (ret > 0) { - ret = mnl_cb_run(buf, ret, seq, portid, callback, data); + num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + while (num_bytes > 0) { + ret = mnl_cb_run(buf, num_bytes, seq, portid, callback, data); if (ret <= 0) break; - ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + } + + if (ret == -1) { + if (num_bytes > 0) { + h = (struct nlmsghdr *)buf; + is_err = nl_dump_ext_ack(h, NULL); + } + + if (!is_err) + perror("error"); } - if (ret == -1) - perror("error"); mnl_socket_close(nl); -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: add extack messages for bearer/media failure
Add extack error messages for -EINVAL errors when enabling bearer, getting/setting properties for a media/bearer Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bearer.c | 50 +-- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a4389ef08a98..1090f21fcfac 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -243,7 +243,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, - struct nlattr *attr[]) + struct nlattr *attr[], + struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; @@ -257,17 +258,20 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (!bearer_name_validate(name, _names)) { errstr = "illegal name"; + NL_SET_ERR_MSG(extack, "Illegal name"); goto rejected; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; + NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; + NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } @@ -281,6 +285,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, break; if (!strcmp(name, b->name)) { errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } bearer_id++; @@ -292,6 +297,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, name, prio); if (prio == TIPC_MIN_LINK_PRI) { errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); goto rejected; } pr_warn("Bearer <%s>: trying with adjusted priority\n", name); @@ -302,6 +308,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; + NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } @@ -315,6 +322,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { kfree(b); errstr = "failed to enable media"; + NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } @@ -331,6 +339,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; + NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } @@ -909,6 +918,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } @@ -948,8 +958,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); - if (!bearer) + if (!bearer) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } bearer_disable(net, bearer); @@ -1007,7 +1019,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } - return tipc_enable_bearer(net, bearer, domain, prio, attrs); + return tipc_enable_bearer(net, bearer, domain, prio, attrs, + info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) @@ -1046,6 +1059,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL
[tipc-discussion] [iproute2-next v2] tipc: add support for the netlink extack
Add support extack in tipc to dump the netlink extack error messages (i.e -EINVAL) sent from kernel. Signed-off-by: Hoang Le --- tipc/msg.c | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tipc/msg.c b/tipc/msg.c index dc09d05048f3..f29b2f8d35ad 100644 --- a/tipc/msg.c +++ b/tipc/msg.c @@ -18,6 +18,7 @@ #include #include +#include "libnetlink.h" #include "msg.h" int parse_attrs(const struct nlattr *attr, void *data) @@ -49,6 +50,7 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) { int ret; struct mnl_socket *nl; + int one = 1; nl = mnl_socket_open(NETLINK_GENERIC); if (nl == NULL) { @@ -56,6 +58,8 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) return NULL; } + /* support to get extended ACK */ + mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, , sizeof(one)); ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID); if (ret < 0) { perror("mnl_socket_bind"); @@ -73,21 +77,32 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int seq) { - int ret; unsigned int portid; char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *h; + size_t num_bytes; + int is_err = 0; + int ret = 0; portid = mnl_socket_get_portid(nl); - ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); - while (ret > 0) { - ret = mnl_cb_run(buf, ret, seq, portid, callback, data); + num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + while (num_bytes > 0) { + ret = mnl_cb_run(buf, num_bytes, seq, portid, callback, data); if (ret <= 0) break; - ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + } + + if (ret == -1) { + if (num_bytes > 0) { + h = (struct nlmsghdr *)buf; + is_err = nl_dump_ext_ack(h, NULL); + } + + if (!is_err) + perror("error"); } - if (ret == -1) - perror("error"); mnl_socket_close(nl); -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next v2] tipc: add support for the netlink extack
Add support extack in tipc to dump the netlink extack error messages (i.e -EINVAL) sent from kernel. Signed-off-by: Hoang Le --- tipc/msg.c | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tipc/msg.c b/tipc/msg.c index dc09d05048f3..f29b2f8d35ad 100644 --- a/tipc/msg.c +++ b/tipc/msg.c @@ -18,6 +18,7 @@ #include #include +#include "libnetlink.h" #include "msg.h" int parse_attrs(const struct nlattr *attr, void *data) @@ -49,6 +50,7 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) { int ret; struct mnl_socket *nl; + int one = 1; nl = mnl_socket_open(NETLINK_GENERIC); if (nl == NULL) { @@ -56,6 +58,8 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) return NULL; } + /* support to get extended ACK */ + mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, , sizeof(one)); ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID); if (ret < 0) { perror("mnl_socket_bind"); @@ -73,21 +77,32 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int seq) { - int ret; unsigned int portid; char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *h; + size_t num_bytes; + int is_err = 0; + int ret = 0; portid = mnl_socket_get_portid(nl); - ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); - while (ret > 0) { - ret = mnl_cb_run(buf, ret, seq, portid, callback, data); + num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + while (num_bytes > 0) { + ret = mnl_cb_run(buf, num_bytes, seq, portid, callback, data); if (ret <= 0) break; - ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + } + + if (ret == -1) { + if (num_bytes > 0) { + h = (struct nlmsghdr *)buf; + is_err = nl_dump_ext_ack(h, NULL); + } + + if (!is_err) + perror("error"); } - if (ret == -1) - perror("error"); mnl_socket_close(nl); -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next] tipc: add support for the netlink extack
Add support extack in tipc to dump the netlink extack error messages (i.e -EINVAL) sent from kernel. Signed-off-by: Hoang Le --- tipc/msg.c | 23 +-- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tipc/msg.c b/tipc/msg.c index dc09d05048f3..57ece0d13194 100644 --- a/tipc/msg.c +++ b/tipc/msg.c @@ -18,6 +18,7 @@ #include #include +#include "libnetlink.h" #include "msg.h" int parse_attrs(const struct nlattr *attr, void *data) @@ -49,6 +50,7 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) { int ret; struct mnl_socket *nl; + int one = 1; nl = mnl_socket_open(NETLINK_GENERIC); if (nl == NULL) { @@ -56,6 +58,8 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) return NULL; } + /* support to get extended ACK */ + mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, , sizeof(one)); ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID); if (ret < 0) { perror("mnl_socket_bind"); @@ -73,21 +77,28 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh) static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int seq) { - int ret; unsigned int portid; char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *h; + size_t num_bytes; + int ret = 0; portid = mnl_socket_get_portid(nl); - ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); - while (ret > 0) { - ret = mnl_cb_run(buf, ret, seq, portid, callback, data); + num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + while (num_bytes > 0) { + ret = mnl_cb_run(buf, num_bytes, seq, portid, callback, data); if (ret <= 0) break; - ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf)); } - if (ret == -1) + + if (num_bytes > 0 && ret <= 0) { + h = (struct nlmsghdr *)buf; + nl_dump_ext_ack(h, NULL); + } else { perror("error"); + } mnl_socket_close(nl); -- 2.25.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: add extack messages for bearer/media failure
Add extack error messages for -EINVAL errors when enabling bearer, getting/setting properties for a media/bearer Signed-off-by: Hoang Le --- net/tipc/bearer.c | 50 +-- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a4389ef08a98..1090f21fcfac 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -243,7 +243,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, - struct nlattr *attr[]) + struct nlattr *attr[], + struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; @@ -257,17 +258,20 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (!bearer_name_validate(name, _names)) { errstr = "illegal name"; + NL_SET_ERR_MSG(extack, "Illegal name"); goto rejected; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; + NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; + NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } @@ -281,6 +285,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, break; if (!strcmp(name, b->name)) { errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } bearer_id++; @@ -292,6 +297,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, name, prio); if (prio == TIPC_MIN_LINK_PRI) { errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); goto rejected; } pr_warn("Bearer <%s>: trying with adjusted priority\n", name); @@ -302,6 +308,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; + NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } @@ -315,6 +322,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { kfree(b); errstr = "failed to enable media"; + NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } @@ -331,6 +339,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; + NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } @@ -909,6 +918,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } @@ -948,8 +958,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); - if (!bearer) + if (!bearer) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } bearer_disable(net, bearer); @@ -1007,7 +1019,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } - return tipc_enable_bearer(net, bearer, domain, prio, attrs); + return tipc_enable_bearer(net, bearer, domain, prio, attrs, + info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) @@ -1046,6 +1059,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } @@ -1086,8 +1100,10 @@ in
[tipc-discussion] [net-next] tipc: Add a missing case of TIPC_DIRECT_MSG type
In the commit f73b12812a3d ("tipc: improve throughput between nodes in netns"), we're missing a check to handle TIPC_DIRECT_MSG type, it's still using old sending mechanism for this message type. So, throughput improvement is not significant as expected. Besides that, when sending a large message with that type, we're also handle wrong receiving queue, it should be enqueued in socket receiving instead of multicast messages. Fix this by adding the missing case for TIPC_DIRECT_MSG. Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns") Reported-by: Tuong Lien Signed-off-by: Hoang Le --- net/tipc/msg.h| 5 + net/tipc/node.c | 3 ++- net/tipc/socket.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 6d466ebdb64f..871feadbbc19 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -394,6 +394,11 @@ static inline u32 msg_connected(struct tipc_msg *m) return msg_type(m) == TIPC_CONN_MSG; } +static inline u32 msg_direct(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_DIRECT_MSG; +} + static inline u32 msg_errcode(struct tipc_msg *m) { return msg_bits(m, 1, 25, 0xf); diff --git a/net/tipc/node.c b/net/tipc/node.c index 0c88778c88b5..10292c942384 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1586,7 +1586,8 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: - if (msg_connected(hdr) || msg_named(hdr)) { + if (msg_connected(hdr) || msg_named(hdr) || + msg_direct(hdr)) { tipc_loopback_trace(peer_net, list); spin_lock_init(>lock); tipc_sk_rcv(peer_net, list); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 693e8902161e..87466607097f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1461,7 +1461,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) } __skb_queue_head_init(); - mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, ); if (unlikely(rc != dlen)) return rc; -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: simplify trivial boolean return
Checking and returning 'true' boolean is useless as it will be returning at end of function Signed-off-by: Hoang Le --- net/tipc/msg.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0d515d20b056..4d0e0bdd997b 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -736,9 +736,6 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg_set_destport(msg, dport); *err = TIPC_OK; - if (!skb_cloned(skb)) - return true; - return true; } -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: update replicast capability for broadcast send link
When setting up a cluster with non-replicast/replicast capability supported. This capability will be disabled for broadcast send link in order to be backwards compatible. However, when these non-support nodes left and be removed out the cluster. We don't update this capability on broadcast send link. Then, some of features that based on this capability will also disabling as unexpected. In this commit, we make sure the broadcast send link capabilities will be re-calculated as soon as a node removed/rejoined a cluster. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bcast.c | 4 ++-- net/tipc/bcast.h | 2 +- net/tipc/link.c | 2 +- net/tipc/node.c | 8 +++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index f41096a759fa..55aeba681cf4 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -87,9 +87,9 @@ int tipc_bcast_get_mtu(struct net *net) return tipc_link_mss(tipc_bc_sndlink(net)); } -void tipc_bcast_disable_rcast(struct net *net) +void tipc_bcast_toggle_rcast(struct net *net, bool supp) { - tipc_bc_base(net)->rcast_support = false; + tipc_bc_base(net)->rcast_support = supp; } static void tipc_bcbase_calc_bc_threshold(struct net *net) diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index dadad953e2be..9e847d9617d3 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -85,7 +85,7 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl); void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id); void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); int tipc_bcast_get_mtu(struct net *net); -void tipc_bcast_disable_rcast(struct net *net); +void tipc_bcast_toggle_rcast(struct net *net, bool supp); int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_mc_method *method, struct tipc_nlist *dests, u16 *cong_link_cnt); diff --git a/net/tipc/link.c b/net/tipc/link.c index fb72031228c9..24d4d10756d3 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -550,7 +550,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, /* Disable replicast if even a single peer doesn't support it */ if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST)) - tipc_bcast_disable_rcast(net); + tipc_bcast_toggle_rcast(net, false); return true; } diff --git a/net/tipc/node.c b/net/tipc/node.c index aaf595613e6e..ab04e00cb95b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -496,6 +496,9 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, + (tn->capabilities & TIPC_BCAST_RCAST)); + goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -557,6 +560,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, list_for_each_entry_rcu(temp_node, >node_list, list) { tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(>node_list_lock); @@ -740,7 +744,8 @@ static bool tipc_node_cleanup(struct tipc_node *peer) list_for_each_entry_rcu(temp_node, >node_list, list) { tn->capabilities &= temp_node->capabilities; } - + tipc_bcast_toggle_rcast(peer->net, + (tn->capabilities & TIPC_BCAST_RCAST)); spin_unlock_bh(>node_list_lock); return deleted; } @@ -2198,6 +2203,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) list_for_each_entry_rcu(temp_node, >node_list, list) { tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); err = 0; err_out: tipc_node_put(peer); -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: update replicast capability for broadcast send link
When setting up a cluster with non-replicast/replicast capability supported. This capability will be disabled for broadcast send link in order to be backwards compatible. However, when these non-support nodes left and be removed out the cluster. We don't update this capability on broadcast send link. Then, some of features that based on this capability will also disabling as unexpected. In this commit, we make sure the broadcast send link capabilities will be re-calculated as soon as a node removed/rejoined a cluster. Signed-off-by: Hoang Le --- net/tipc/bcast.c | 4 ++-- net/tipc/bcast.h | 2 +- net/tipc/link.c | 2 +- net/tipc/node.c | 8 +++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 44ed481fec47..3d14e60ef642 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -87,9 +87,9 @@ int tipc_bcast_get_mtu(struct net *net) return tipc_link_mss(tipc_bc_sndlink(net)); } -void tipc_bcast_disable_rcast(struct net *net) +void tipc_bcast_toggle_rcast(struct net *net, bool supp) { - tipc_bc_base(net)->rcast_support = false; + tipc_bc_base(net)->rcast_support = supp; } static void tipc_bcbase_calc_bc_threshold(struct net *net) diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index dadad953e2be..9e847d9617d3 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -85,7 +85,7 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl); void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id); void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); int tipc_bcast_get_mtu(struct net *net); -void tipc_bcast_disable_rcast(struct net *net); +void tipc_bcast_toggle_rcast(struct net *net, bool supp); int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_mc_method *method, struct tipc_nlist *dests, u16 *cong_link_cnt); diff --git a/net/tipc/link.c b/net/tipc/link.c index a2e9a64d5a0f..5153b9bb7b3f 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -550,7 +550,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, /* Disable replicast if even a single peer doesn't support it */ if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST)) - tipc_bcast_disable_rcast(net); + tipc_bcast_toggle_rcast(net, false); return true; } diff --git a/net/tipc/node.c b/net/tipc/node.c index b058647fa78b..b9f6b5dfdb5b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -496,6 +496,9 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, + (tn->capabilities & TIPC_BCAST_RCAST)); + goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -557,6 +560,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, list_for_each_entry_rcu(temp_node, >node_list, list) { tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(>node_list_lock); @@ -740,7 +744,8 @@ static bool tipc_node_cleanup(struct tipc_node *peer) list_for_each_entry_rcu(temp_node, >node_list, list) { tn->capabilities &= temp_node->capabilities; } - + tipc_bcast_toggle_rcast(peer->net, + (tn->capabilities & TIPC_BCAST_RCAST)); spin_unlock_bh(>node_list_lock); return deleted; } @@ -2198,6 +2203,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) list_for_each_entry_rcu(temp_node, >node_list, list) { tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); err = 0; err_out: tipc_node_put(peer); -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [net-next] tipc: update a binding service via broadcast
Hi Jon, Please take a look at v2. The mechanism looks the same as I did before in commit: c55c8edafa91 ("tipc: smooth change between replicast and broadcast") However, in this case we handle only one direction: replicast -> broadcast. Then, it is still backward compatible. [...] >From ae2ee6a7064de3ec1dc2c7df2db241d22b0d129f Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Wed, 13 Nov 2019 14:01:03 +0700 Subject: [PATCH] tipc: update a binding service via broadcast Currently, updating binding table (add service binding to name table/withdraw a service binding) is being sent over replicast. However, if we are scaling up clusters to > 100 nodes/containers this method is less affection because of looping through nodes in a cluster one by one. It is worth to use broadcast to update a binding service. Then binding table updates in all nodes for one shot. The mechanism is backward compatible because of sending side changing. v2: resolve synchronization problem when switching from unicast to broadcast Signed-off-by: Hoang Le --- net/tipc/bcast.c | 13 + net/tipc/bcast.h | 2 ++ net/tipc/link.c | 16 net/tipc/name_distr.c | 8 net/tipc/name_table.c | 9 ++--- 5 files changed, 45 insertions(+), 3 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index f41096a759fa..18431fa897ab 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, __skb_queue_tail(inputq, _skb); } } + +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb) +{ + struct sk_buff_head xmitq; + u16 cong_link_cnt; + int rc = 0; + + __skb_queue_head_init(); + __skb_queue_tail(, skb); + rc = tipc_bcast_xmit(net, , _link_cnt); + __skb_queue_purge(); + return rc; +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index dadad953e2be..a100da3800fc 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net); u32 tipc_bcast_get_broadcast_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb); + void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq); diff --git a/net/tipc/link.c b/net/tipc/link.c index fb72031228c9..22f1854435df 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -187,6 +187,9 @@ struct tipc_link { struct tipc_link *bc_sndlink; u8 nack_state; bool bc_peer_is_up; + bool named_sync; + struct sk_buff_head defer_namedq; + /* Statistics */ struct tipc_stats stats; @@ -363,6 +366,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l, trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!"); tipc_link_reset(rcv_l); rcv_l->state = LINK_RESET; + rcv_l->named_sync = false; if (!snd_l->ackers) { trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!"); tipc_link_reset(snd_l); @@ -508,6 +512,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, __skb_queue_head_init(>failover_deferdq); skb_queue_head_init(>wakeupq); skb_queue_head_init(l->inputq); + __skb_queue_head_init(>defer_namedq); return true; } @@ -932,6 +937,8 @@ void tipc_link_reset(struct tipc_link *l) l->silent_intv_cnt = 0; l->rst_cnt = 0; l->bc_peer_is_up = false; + l->named_sync = false; + __skb_queue_purge(>defer_namedq); memset(>mon_state, 0, sizeof(l->mon_state)); tipc_link_reset_stats(l); } @@ -1210,6 +1217,15 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, return true; case NAME_DISTRIBUTOR: l->bc_rcvlink->state = LINK_ESTABLISHED; + if (msg_is_syn(hdr)) { + l->bc_rcvlink->named_sync = true; + skb_queue_splice_tail_init(>defer_namedq, l->namedq); + return true; + } + if (msg_is_rcast(hdr) && !l->bc_rcvlink->named_sync) { + skb_queue_tail(>defer_namedq, skb); + return true; + } skb_queue_tail(l->namedq, skb); return true; case MSG_BUNDLER: diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 5feaf3b67380..419b3f0f102d 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -180,6 +180,14 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); __
[tipc-discussion] [net-next] tipc: update a binding service via broadcast
Currently, updating binding table (add service binding to name table/withdraw a service binding) is being sent over replicast. However, if we are scaling up clusters to > 100 nodes/containers this method is less affection because of looping through nodes in a cluster one by one. It is worth to use broadcast to update a binding service. Then binding table updates in all nodes for one shot. The mechanism is backward compatible because of sending side changing. Signed-off-by: Hoang Le --- net/tipc/bcast.c | 13 + net/tipc/bcast.h | 2 ++ net/tipc/name_table.c | 4 ++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index f41096a759fa..18431fa897ab 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, __skb_queue_tail(inputq, _skb); } } + +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb) +{ + struct sk_buff_head xmitq; + u16 cong_link_cnt; + int rc = 0; + + __skb_queue_head_init(); + __skb_queue_tail(, skb); + rc = tipc_bcast_xmit(net, , _link_cnt); + __skb_queue_purge(); + return rc; +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index dadad953e2be..a100da3800fc 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net); u32 tipc_bcast_get_broadcast_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb); + void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 66a65c2cdb23..9e9c61f7c999 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -633,7 +633,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, spin_unlock_bh(>nametbl_lock); if (skb) - tipc_node_broadcast(net, skb); + tipc_bcast_named_publish(net, skb); return p; } @@ -664,7 +664,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, spin_unlock_bh(>nametbl_lock); if (skb) { - tipc_node_broadcast(net, skb); + tipc_bcast_named_publish(net, skb); return 1; } return 0; -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [net-next] tipc: update mon's self addr when node addr generated
Yeah, but I think we will have the same result with both of them. Because link monitoring is still 'idle' during this period time. Regards, Hoang -Original Message- From: Jon Maloy Sent: Wednesday, November 13, 2019 8:35 AM To: Hoang Huu Le ; ma...@donjonn.com; tipc-discussion@lists.sourceforge.net Subject: RE: [net-next] tipc: update mon's self addr when node addr generated Thinking about it, wouldn't it be better to add the node to the monitor at the moment it really has an address, and not earlier? To add it to the monitor with address 0 is pretty meaningless. ///jon > -Original Message- > From: Jon Maloy > Sent: 11-Nov-19 09:10 > To: Hoang Le ; ma...@donjonn.com; > tipc-discussion@lists.sourceforge.net > Subject: RE: [net-next] tipc: update mon's self addr when node addr generated > > Acked. > > ///jon > > > -----Original Message- > > From: Hoang Le > > Sent: 11-Nov-19 04:24 > > To: Jon Maloy ; ma...@donjonn.com; > > tipc-discussion@lists.sourceforge.net > > Subject: [net-next] tipc: update mon's self addr when node addr generated > > > > In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address > > hash values"), the 32-bit node address only generated after one second > > trial period expired. However the self's addr in struct tipc_monitor do > > not update according to node address generated. This lead to it is > > always zero as initial value. As result, sorting algorithm using this > > value does not work as expected, neither neighbor monitoring framework. > > > > In this commit, we add a fix to update self's addr when 32-bit node > > address generated. > > > > Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash > > values") > > Signed-off-by: Hoang Le > > --- > > net/tipc/monitor.c | 15 +++ > > net/tipc/monitor.h | 1 + > > net/tipc/net.c | 2 ++ > > 3 files changed, 18 insertions(+) > > > > diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c > > index 6a6eae88442f..58708b4c7719 100644 > > --- a/net/tipc/monitor.c > > +++ b/net/tipc/monitor.c > > @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id) > > kfree(mon); > > } > > > > +void tipc_mon_reinit_self(struct net *net) > > +{ > > + struct tipc_monitor *mon; > > + int bearer_id; > > + > > + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { > > + mon = tipc_monitor(net, bearer_id); > > + if (!mon) > > + continue; > > + write_lock_bh(>lock); > > + mon->self->addr = tipc_own_addr(net); > > + write_unlock_bh(>lock); > > + } > > +} > > + > > int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) > > { > > struct tipc_net *tn = tipc_net(net); > > diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h > > index 2a21b93e0d04..ed63d2e650b0 100644 > > --- a/net/tipc/monitor.h > > +++ b/net/tipc/monitor.h > > @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct > > tipc_nl_msg *msg, > > u32 bearer_id); > > int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, > > u32 bearer_id, u32 *prev_node); > > +void tipc_mon_reinit_self(struct net *net); > > > > extern const int tipc_max_domain_size; > > #endif > > diff --git a/net/tipc/net.c b/net/tipc/net.c > > index 85707c185360..2de3cec9929d 100644 > > --- a/net/tipc/net.c > > +++ b/net/tipc/net.c > > @@ -42,6 +42,7 @@ > > #include "node.h" > > #include "bcast.h" > > #include "netlink.h" > > +#include "monitor.h" > > > > /* > > * The TIPC locking policy is designed to ensure a very fine locking > > @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) > > tipc_set_node_addr(net, addr); > > tipc_named_reinit(net); > > tipc_sk_reinit(net); > > + tipc_mon_reinit_self(net); > > tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, > > TIPC_CLUSTER_SCOPE, 0, addr); > > } > > -- > > 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: update mon's self addr when node addr generated
In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values"), the 32-bit node address only generated after one second trial period expired. However the self's addr in struct tipc_monitor do not update according to node address generated. This lead to it is always zero as initial value. As result, sorting algorithm using this value does not work as expected, neither neighbor monitoring framework. In this commit, we add a fix to update self's addr when 32-bit node address generated. Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/monitor.c | 15 +++ net/tipc/monitor.h | 1 + net/tipc/net.c | 2 ++ 3 files changed, 18 insertions(+) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 6a6eae88442f..58708b4c7719 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id) kfree(mon); } +void tipc_mon_reinit_self(struct net *net) +{ + struct tipc_monitor *mon; + int bearer_id; + + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + mon = tipc_monitor(net, bearer_id); + if (!mon) + continue; + write_lock_bh(>lock); + mon->self->addr = tipc_own_addr(net); + write_unlock_bh(>lock); + } +} + int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) { struct tipc_net *tn = tipc_net(net); diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 2a21b93e0d04..ed63d2e650b0 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id); int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node); +void tipc_mon_reinit_self(struct net *net); extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index 85707c185360..2de3cec9929d 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -42,6 +42,7 @@ #include "node.h" #include "bcast.h" #include "netlink.h" +#include "monitor.h" /* * The TIPC locking policy is designed to ensure a very fine locking @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) tipc_set_node_addr(net, addr); tipc_named_reinit(net); tipc_sk_reinit(net); + tipc_mon_reinit_self(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, TIPC_CLUSTER_SCOPE, 0, addr); } -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: update mon's self addr when node addr generated
In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values"), the 32-bit node address only generated after one second trial period expired. However the self's addr in struct tipc_monitor do not update according to node address generated. This lead to it is always zero as initial value. As result, sorting algorithm using this value does not work as expected, neither neighbor monitoring framework. In this commit, we add a fix to update self's addr when 32-bit node address generated. Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") Signed-off-by: Hoang Le --- net/tipc/monitor.c | 15 +++ net/tipc/monitor.h | 1 + net/tipc/net.c | 2 ++ 3 files changed, 18 insertions(+) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 6a6eae88442f..58708b4c7719 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id) kfree(mon); } +void tipc_mon_reinit_self(struct net *net) +{ + struct tipc_monitor *mon; + int bearer_id; + + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + mon = tipc_monitor(net, bearer_id); + if (!mon) + continue; + write_lock_bh(>lock); + mon->self->addr = tipc_own_addr(net); + write_unlock_bh(>lock); + } +} + int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) { struct tipc_net *tn = tipc_net(net); diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 2a21b93e0d04..ed63d2e650b0 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id); int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node); +void tipc_mon_reinit_self(struct net *net); extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index 85707c185360..2de3cec9929d 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -42,6 +42,7 @@ #include "node.h" #include "bcast.h" #include "netlink.h" +#include "monitor.h" /* * The TIPC locking policy is designed to ensure a very fine locking @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) tipc_set_node_addr(net, addr); tipc_named_reinit(net); tipc_sk_reinit(net); + tipc_mon_reinit_self(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, TIPC_CLUSTER_SCOPE, 0, addr); } -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: eliminate checking netns if node established
Currently, we scan over all network namespaces at each received discovery message in order to check if the sending peer might be present in a host local namespaces. This is unnecessary since we can assume that a peer will not change its location during an established session. We now improve the condition for this testing so that we don't perform any redundant scans. Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/node.c | 14 +- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 1f1584518221..b66d2f67b1dd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -472,10 +472,6 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_bc_sndlink(net), >bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - if (n->peer_net) { - n->peer_net = NULL; - n->peer_hash_mix = 0; - } kfree(n); n = NULL; goto exit; @@ -1073,6 +1069,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, if (sign_match && addr_match && link_up) { /* All is fine. Do nothing. */ reset = false; + /* Peer node is not a container/local namespace */ + if (!n->peer_hash_mix) + n->peer_hash_mix = hash_mixes; } else if (sign_match && addr_match && !link_up) { /* Respond. The link will come up in due time */ *respond = true; @@ -1398,11 +1397,8 @@ static void node_lost_contact(struct tipc_node *n, /* Notify publications from this node */ n->action_flags |= TIPC_NOTIFY_NODE_DOWN; - - if (n->peer_net) { - n->peer_net = NULL; - n->peer_hash_mix = 0; - } + n->peer_net = NULL; + n->peer_hash_mix = 0; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next 2/2] tipc: reduce sensitive to retransmit failures
With huge cluster (e.g >200nodes), the amount of that flow: gap -> retransmit packet -> acked will take time in case of STATE_MSG dropped/delayed because a lot of traffic. This lead to 1.5 sec tolerance value criteria made link easy failure around 2nd, 3rd of failed retransmission attempts. Instead of re-introduced criteria of 99 faled retransmissions to fix the issue, we increase failure detection timer to ten times tolerance value. Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 038861bad72b..2aed7a958a8c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1087,7 +1087,7 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, return false; if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + - msecs_to_jiffies(r->tolerance))) + msecs_to_jiffies(r->tolerance * 10))) return false; hdr = buf_msg(skb); -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next 1/2] tipc: update cluster capabilities if node deleted
There are two improvements when re-calculate cluster capabilities: - When deleting a specific down node, need to re-calculate. - In tipc_node_cleanup(), do not need to re-calculate if node is still existing in cluster. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/node.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 742c04756d72..a20fabd09e7e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -665,6 +665,11 @@ static bool tipc_node_cleanup(struct tipc_node *peer) } tipc_node_write_unlock(peer); + if (!deleted) { + spin_unlock_bh(>node_list_lock); + return deleted; + } + /* Calculate cluster capabilities */ tn->capabilities = TIPC_NODE_CAPABILITIES; list_for_each_entry_rcu(temp_node, >node_list, list) { @@ -2041,7 +2046,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; - struct tipc_node *peer; + struct tipc_node *peer, *temp_node; u32 addr; int err; @@ -2082,6 +2087,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) tipc_node_write_unlock(peer); tipc_node_delete(peer); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } err = 0; err_out: tipc_node_put(peer); -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: eliminate checking netns if node acknowledge
At current we do check netns local for every neighbor discovery that is being sent from external netns node. This is become unnecessary for node acknowledge. We now improve above checking for peer node come back and discovery message sent from unacknowledge node. Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns") Signed-off-by: Hoang Le --- net/tipc/node.c | 16 +++- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 4b60928049ea..742c04756d72 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -472,10 +472,8 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_bc_sndlink(net), >bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - if (n->peer_net) { - n->peer_net = NULL; - n->peer_hash_mix = 0; - } + n->peer_net = NULL; + n->peer_hash_mix = 0; kfree(n); n = NULL; goto exit; @@ -1068,6 +1066,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, if (sign_match && addr_match && link_up) { /* All is fine. Do nothing. */ reset = false; + /* Peer node is not a container/netns local */ + if (!n->peer_hash_mix) + n->peer_hash_mix = hash_mixes; } else if (sign_match && addr_match && !link_up) { /* Respond. The link will come up in due time */ *respond = true; @@ -1393,11 +1394,8 @@ static void node_lost_contact(struct tipc_node *n, /* Notify publications from this node */ n->action_flags |= TIPC_NOTIFY_NODE_DOWN; - - if (n->peer_net) { - n->peer_net = NULL; - n->peer_hash_mix = 0; - } + n->peer_net = NULL; + n->peer_hash_mix = 0; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next 2/2] tipc: add column to indicate netns-local
Example: Node IdentityHash Is container? State 1001002 01001002 noup 1001010 31000101 noup 1001011 31010101 noup 1001012 31020101 noup 1001003 31030001 yes up 1001013 31030101 noup 1001004 31040001 yes up 1001014 31040101 noup 1001015 31050101 noup 1001006 31060001 yes up 1001016 31060101 noup 1001007 31070001 yes up 1001008 31080001 yes up 1001009 31090001 yes up 100100a 31510001 yes up 100100b 31520001 yes up 100100c 31530001 yes up 100100d 31540001 noup 100100e 31550001 noup 100100f 31560001 noup Signed-off-by: Hoang Le --- include/uapi/linux/tipc_netlink.h | 1 + tipc/node.c | 7 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index efb958fd167d..1a071268bf5d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -160,6 +160,7 @@ enum { TIPC_NLA_NODE_UNSPEC, TIPC_NLA_NODE_ADDR, /* u32 */ TIPC_NLA_NODE_UP, /* flag */ + TIPC_NLA_NODE_LOCAL,/* flag */ __TIPC_NLA_NODE_MAX, TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1 diff --git a/tipc/node.c b/tipc/node.c index 2fec6753c974..b4203af014d3 100644 --- a/tipc/node.c +++ b/tipc/node.c @@ -42,6 +42,11 @@ static int node_list_cb(const struct nlmsghdr *nlh, void *data) addr = mnl_attr_get_u32(attrs[TIPC_NLA_NODE_ADDR]); hash2nodestr(addr, str); printf("%-32s %08x ", str, addr); + if (attrs[TIPC_NLA_NODE_LOCAL]) + printf("%-12s ", "yes"); + else + printf("%-12s ", "no"); + if (attrs[TIPC_NLA_NODE_UP]) printf("up\n"); else @@ -63,7 +68,7 @@ static int cmd_node_list(struct nlmsghdr *nlh, const struct cmd *cmd, fprintf(stderr, "error, message initialisation failed\n"); return -1; } - printf("Node IdentityHash State\n"); + printf("Node IdentityHash Is container? State\n"); return msg_dumpit(nlh, node_list_cb, NULL); } -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next 1/2] tipc: support 128bit node identity for peer removing
We add the support to remove a specific node down with 128bit node identifier, as an alternative to legacy 32-bit node address. v2: improve usage for 'tipc peer remove' command Signed-off-by: Hoang Le --- tipc/peer.c | 53 - 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/tipc/peer.c b/tipc/peer.c index f6380777033d..f14ec35e6f71 100644 --- a/tipc/peer.c +++ b/tipc/peer.c @@ -59,17 +59,68 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_doit(nlh, NULL, NULL); } +static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + __u8 id[16] = {0,}; + __u64 *w0 = (__u64 *)[0]; + __u64 *w1 = (__u64 *)[8]; + struct nlattr *nest; + char *str; + + if (cmdl->argc != cmdl->optind + 1) { + fprintf(stderr, "Usage: %s peer remove identity NODEID\n", + cmdl->argv[0]); + return -EINVAL; + } + + str = shift_cmdl(cmdl); + if (str2nodeid(str, id)) { + fprintf(stderr, "Invalid node identity\n"); + return -EINVAL; + } + + nlh = msg_init(buf, TIPC_NL_PEER_REMOVE); + if (!nlh) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + + nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1); + mnl_attr_nest_end(nlh, nest); + + return msg_doit(nlh, NULL, NULL); +} + static void cmd_peer_rm_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove PROPERTY\n\n" + "PROPERTIES\n" + " identity NODEID - Remove peer node identity\n", + cmdl->argv[0]); +} + +static void cmd_peer_rm_addr_help(struct cmdl *cmdl) { fprintf(stderr, "Usage: %s peer remove address ADDRESS\n", cmdl->argv[0]); } +static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove identity NODEID\n", + cmdl->argv[0]); +} + static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { const struct cmd cmds[] = { - { "address",cmd_peer_rm_addr, cmd_peer_rm_help }, + { "address", cmd_peer_rm_addr, cmd_peer_rm_addr_help }, + { "identity", cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help }, { NULL } }; -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: update cluster capabilities if node deleted
There are two improvements when re-calculate cluster capabilities: - When deleting a specific down node, need to re-calculate. - In tipc_node_cleanup(), do not need to re-calculate if node is still existing in cluster. Signed-off-by: Hoang Le --- net/tipc/node.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 4b60928049ea..1f1584518221 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -667,6 +667,11 @@ static bool tipc_node_cleanup(struct tipc_node *peer) } tipc_node_write_unlock(peer); + if (!deleted) { + spin_unlock_bh(>node_list_lock); + return deleted; + } + /* Calculate cluster capabilities */ tn->capabilities = TIPC_NODE_CAPABILITIES; list_for_each_entry_rcu(temp_node, >node_list, list) { @@ -2043,7 +2048,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; - struct tipc_node *peer; + struct tipc_node *peer, *temp_node; u32 addr; int err; @@ -2084,6 +2089,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) tipc_node_write_unlock(peer); tipc_node_delete(peer); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } err = 0; err_out: tipc_node_put(peer); -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: reduce sensitive to retransmit failures
With huge cluster (e.g >200nodes), the amount of that flow: gap -> retransmit packet -> acked will take time in case of STATE_MSG dropped/delayed because a lot of traffic. This lead to 1.5 sec tolerance value criteria made link easy failure around 2nd, 3rd of failed retransmission attempts. Instead of re-introduced criteria of 99 failed retransmissions to fix the issue, we increase failure detection timer to ten times tolerance value. Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria") Signed-off-by: Hoang Le --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 7d7a66178607..9f524c325c0d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1084,7 +1084,7 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, return false; if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + - msecs_to_jiffies(r->tolerance))) + msecs_to_jiffies(r->tolerance * 10))) return false; hdr = buf_msg(skb); -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next v2] tipc: improve throughput between nodes in netns
Currently, TIPC transports intra-node user data messages directly socket to socket, hence shortcutting all the lower layers of the communication stack. This gives TIPC very good intra node performance, both regarding throughput and latency. We now introduce a similar mechanism for TIPC data traffic across network namespaces located in the same kernel. On the send path, the call chain is as always accompanied by the sending node's network name space pointer. However, once we have reliably established that the receiving node is represented by a namespace on the same host, we just replace the namespace pointer with the receiving node/namespace's ditto, and follow the regular socket receive patch though the receiving node. This technique gives us a throughput similar to the node internal throughput, several times larger than if we let the traffic go though the full network stacks. As a comparison, max throughput for 64k messages is four times larger than TCP throughput for the same type of traffic. To meet any security concerns, the following should be noted. - All nodes joining a cluster are supposed to have been be certified and authenticated by mechanisms outside TIPC. This is no different for nodes/namespaces on the same host; they have to auto discover each other using the attached interfaces, and establish links which are supervised via the regular link monitoring mechanism. Hence, a kernel local node has no other way to join a cluster than any other node, and have to obey to policies set in the IP or device layers of the stack. - Only when a sender has established with 100% certainty that the peer node is located in a kernel local namespace does it choose to let user data messages, and only those, take the crossover path to the receiving node/namespace. - If the receiving node/namespace is removed, its namespace pointer is invalidated at all peer nodes, and their neighbor link monitoring will eventually note that this node is gone. - To ensure the "100% certainty" criteria, and prevent any possible spoofing, received discovery messages must contain a proof that the sender knows a common secret. We use the hash mix of the sending node/namespace for this purpose, since it can be accessed directly by all other namespaces in the kernel. Upon reception of a discovery message, the receiver checks this proof against all the local namespaces'hash_mix:es. If it finds a match, that, along with a matching node id and cluster id, this is deemed sufficient proof that the peer node in question is in a local namespace, and a wormhole can be opened. - We should also consider that TIPC is intended to be a cluster local IPC mechanism (just like e.g. UNIX sockets) rather than a network protocol, and hence we think it can justified to allow it to shortcut the lower protocol layers. Regarding traceability, we should notice that since commit 6c9081a3915d ("tipc: add loopback device tracking") it is possible to follow the node internal packet flow by just activating tcpdump on the loopback interface. This will be true even for this mechanism; by activating tcpdump on the involved nodes' loopback interfaces their inter-name space messaging can easily be tracked. v2: - update 'net' pointer when node left/rejoined v3: - grab read/write lock when using node ref obj v4: - clone traffics between netns to loopback Suggested-by: Jon Maloy Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/core.c | 16 + net/tipc/core.h | 6 ++ net/tipc/discover.c | 4 +- net/tipc/msg.h| 14 net/tipc/name_distr.c | 2 +- net/tipc/node.c | 155 -- net/tipc/node.h | 5 +- net/tipc/socket.c | 6 +- 8 files changed, 197 insertions(+), 11 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 23cb379a93d6..ab648dd150ee 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_sk_rht_destroy(net); } +static void __net_exit tipc_pernet_pre_exit(struct net *net) +{ + tipc_node_pre_cleanup_net(net); +} + +static struct pernet_operations tipc_pernet_pre_exit_ops = { + .pre_exit = tipc_pernet_pre_exit, +}; + static struct pernet_operations tipc_net_ops = { .init = tipc_init_net, .exit = tipc_exit_net, @@ -151,6 +160,10 @@ static int __init tipc_init(void) if (err) goto out_pernet_topsrv; + err = register_pernet_subsys(_pernet_pre_exit_ops); + if (err) + goto out_register_pernet_subsys; + err = tipc_bearer_setup(); if (err) goto out_bearer; @@ -158,6 +171,8 @@ static int __init tipc_init(void) pr_info("Started in single node mode\n"); return 0; out_bearer: + unregister_pernet_subsys(_pernet_pre_exit_ops); +out_register_pernet_subsys: unregister_pernet_device(_topsr
[tipc-discussion] [net-next v4] tipc: improve throughput between nodes in netns
Currently, TIPC transports intra-node user data messages directly socket to socket, hence shortcutting all the lower layers of the communication stack. This gives TIPC very good intra node performance, both regarding throughput and latency. We now introduce a similar mechanism for TIPC data traffic across network namespaces located in the same kernel. On the send path, the call chain is as always accompanied by the sending node's network name space pointer. However, once we have reliably established that the receiving node is represented by a namespace on the same host, we just replace the namespace pointer with the receiving node/namespace's ditto, and follow the regular socket receive patch though the receiving node. This technique gives us a throughput similar to the node internal throughput, several times larger than if we let the traffic go though the full network stacks. As a comparison, max throughput for 64k messages is four times larger than TCP throughput for the same type of traffic. To meet any security concerns, the following should be noted. - All nodes joining a cluster are supposed to have been be certified and authenticated by mechanisms outside TIPC. This is no different for nodes/namespaces on the same host; they have to auto discover each other using the attached interfaces, and establish links which are supervised via the regular link monitoring mechanism. Hence, a kernel local node has no other way to join a cluster than any other node, and have to obey to policies set in the IP or device layers of the stack. - Only when a sender has established with 100% certainty that the peer node is located in a kernel local namespace does it choose to let user data messages, and only those, take the crossover path to the receiving node/namespace. - If the receiving node/namespace is removed, its namespace pointer is invalidated at all peer nodes, and their neighbor link monitoring will eventually note that this node is gone. - To ensure the "100% certainty" criteria, and prevent any possible spoofing, received discovery messages must contain a proof that the sender knows a common secret. We use the hash mix of the sending node/namespace for this purpose, since it can be accessed directly by all other namespaces in the kernel. Upon reception of a discovery message, the receiver checks this proof against all the local namespaces'hash_mix:es. If it finds a match, that, along with a matching node id and cluster id, this is deemed sufficient proof that the peer node in question is in a local namespace, and a wormhole can be opened. - We should also consider that TIPC is intended to be a cluster local IPC mechanism (just like e.g. UNIX sockets) rather than a network protocol, and hence we think it can justified to allow it to shortcut the lower protocol layers. Regarding traceability, we should notice that since commit 6c9081a3915d ("tipc: add loopback device tracking") it is possible to follow the node internal packet flow by just activating tcpdump on the loopback interface. This will be true even for this mechanism; by activating tcpdump on the involved nodes' loopback interfaces their inter-name space messaging can easily be tracked. v2: - update 'net' pointer when node left/rejoined v3: - grab read/write lock when using node ref obj v4: - clone traffics between netns to loopback Suggested-by: Jon Maloy Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/core.c | 16 + net/tipc/core.h | 6 ++ net/tipc/discover.c | 4 +- net/tipc/msg.h| 14 net/tipc/name_distr.c | 2 +- net/tipc/node.c | 155 -- net/tipc/node.h | 5 +- net/tipc/socket.c | 6 +- 8 files changed, 197 insertions(+), 11 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 23cb379a93d6..ab648dd150ee 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_sk_rht_destroy(net); } +static void __net_exit tipc_pernet_pre_exit(struct net *net) +{ + tipc_node_pre_cleanup_net(net); +} + +static struct pernet_operations tipc_pernet_pre_exit_ops = { + .pre_exit = tipc_pernet_pre_exit, +}; + static struct pernet_operations tipc_net_ops = { .init = tipc_init_net, .exit = tipc_exit_net, @@ -151,6 +160,10 @@ static int __init tipc_init(void) if (err) goto out_pernet_topsrv; + err = register_pernet_subsys(_pernet_pre_exit_ops); + if (err) + goto out_register_pernet_subsys; + err = tipc_bearer_setup(); if (err) goto out_bearer; @@ -158,6 +171,8 @@ static int __init tipc_init(void) pr_info("Started in single node mode\n"); return 0; out_bearer: + unregister_pernet_subsys(_pernet_pre_exit_ops); +out_register_pernet_subsys: unregister_pernet_device(_topsr
[tipc-discussion] [net-next v3] tipc: improve throughput between nodes in netns
Currently, TIPC transports intra-node user data messages directly socket to socket, hence shortcutting all the lower layers of the communication stack. This gives TIPC very good intra node performance, both regarding throughput and latency. We now introduce a similar mechanism for TIPC data traffic across network namespaces located in the same kernel. On the send path, the call chain is as always accompanied by the sending node's network name space pointer. However, once we have reliably established that the receiving node is represented by a namespace on the same host, we just replace the namespace pointer with the receiving node/namespace's ditto, and follow the regular socket receive patch though the receiving node. This technique gives us a throughput similar to the node internal throughput, several times larger than if we let the traffic go though the full network stacks. As a comparison, max throughput for 64k messages is four times larger than TCP throughput for the same type of traffic. To meet any security concerns, the following should be noted. - All nodes joining a cluster are supposed to have been be certified and authenticated by mechanisms outside TIPC. This is no different for nodes/namespaces on the same host; they have to auto discover each other using the attached interfaces, and establish links which are supervised via the regular link monitoring mechanism. Hence, a kernel local node has no other way to join a cluster than any other node, and have to obey to policies set in the IP or device layers of the stack. - Only when a sender has established with 100% certainty that the peer node is located in a kernel local namespace does it choose to let user data messages, and only those, take the crossover path to the receiving node/namespace. - If the receiving node/namespace is removed, its namespace pointer is invalidated at all peer nodes, and their neighbor link monitoring will eventually note that this node is gone. - To ensure the "100% certainty" criteria, and prevent any possible spoofing, received discovery messages must contain a proof that the sender knows a common secret. We use the hash mix of the sending node/namespace for this purpose, since it can be accessed directly by all other namespaces in the kernel. Upon reception of a discovery message, the receiver checks this proof against all the local namespaces'hash_mix:es. If it finds a match, that, along with a matching node id and cluster id, this is deemed sufficient proof that the peer node in question is in a local namespace, and a wormhole can be opened. - We should also consider that TIPC is intended to be a cluster local IPC mechanism (just like e.g. UNIX sockets) rather than a network protocol, and hence we think it can justified to allow it to shortcut the lower protocol layers. Regarding traceability, we should notice that since commit 6c9081a3915d ("tipc: add loopback device tracking") it is possible to follow the node internal packet flow by just activating tcpdump on the loopback interface. This will be true even for this mechanism; by activating tcpdump on the involved nodes' loopback interfaces their inter-name space messaging can easily be tracked. v2: - update 'net' pointer when node left/rejoined v3: - grab read/write lock when using node ref obj Suggested-by: Jon Maloy Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/core.c | 16 + net/tipc/core.h | 6 ++ net/tipc/discover.c | 4 +- net/tipc/msg.h| 14 net/tipc/name_distr.c | 2 +- net/tipc/node.c | 151 -- net/tipc/node.h | 5 +- net/tipc/socket.c | 6 +- 8 files changed, 193 insertions(+), 11 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 23cb379a93d6..ab648dd150ee 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_sk_rht_destroy(net); } +static void __net_exit tipc_pernet_pre_exit(struct net *net) +{ + tipc_node_pre_cleanup_net(net); +} + +static struct pernet_operations tipc_pernet_pre_exit_ops = { + .pre_exit = tipc_pernet_pre_exit, +}; + static struct pernet_operations tipc_net_ops = { .init = tipc_init_net, .exit = tipc_exit_net, @@ -151,6 +160,10 @@ static int __init tipc_init(void) if (err) goto out_pernet_topsrv; + err = register_pernet_subsys(_pernet_pre_exit_ops); + if (err) + goto out_register_pernet_subsys; + err = tipc_bearer_setup(); if (err) goto out_bearer; @@ -158,6 +171,8 @@ static int __init tipc_init(void) pr_info("Started in single node mode\n"); return 0; out_bearer: + unregister_pernet_subsys(_pernet_pre_exit_ops); +out_register_pernet_subsys: unregister_pernet_device(_topsrv_net_ops); out_pernet_topsrv: tipc_socket_stop();
[tipc-discussion] [net-next v2] tipc: improve throughput between nodes in netns
Currently, TIPC transports intra-node user data messages directly socket to socket, hence shortcutting all the lower layers of the communication stack. This gives TIPC very good intra node performance, both regarding throughput and latency. We now introduce a similar mechanism for TIPC data traffic across network namespaces located in the same kernel. On the send path, the call chain is as always accompanied by the sending node's network name space pointer. However, once we have reliably established that the receiving node is represented by a namespace on the same host, we just replace the namespace pointer with the receiving node/namespace's ditto, and follow the regular socket receive patch though the receiving node. This technique gives us a throughput similar to the node internal throughput, several times larger than if we let the traffic go though the full network stacks. As a comparison, max throughput for 64k messages is four times larger than TCP throughput for the same type of traffic. To meet any security concerns, the following should be noted. - All nodes joining a cluster are supposed to have been be certified and authenticated by mechanisms outside TIPC. This is no different for nodes/namespaces on the same host; they have to auto discover each other using the attached interfaces, and establish links which are supervised via the regular link monitoring mechanism. Hence, a kernel local node has no other way to join a cluster than any other node, and have to obey to policies set in the IP or device layers of the stack. - Only when a sender has established with 100% certainty that the peer node is located in a kernel local namespace does it choose to let user data messages, and only those, take the crossover path to the receiving node/namespace. - If the receiving node/namespace is removed, its namespace pointer is invalidated at all peer nodes, and their neighbor link monitoring will eventually note that this node is gone. - To ensure the "100% certainty" criteria, and prevent any possible spoofing, received discovery messages must contain a proof that the sender knows a common secret. We use the hash mix of the sending node/namespace for this purpose, since it can be accessed directly by all other namespaces in the kernel. Upon reception of a discovery message, the receiver checks this proof against all the local namespaces'hash_mix:es. If it finds a match, that, along with a matching node id and cluster id, this is deemed sufficient proof that the peer node in question is in a local namespace, and a wormhole can be opened. - We should also consider that TIPC is intended to be a cluster local IPC mechanism (just like e.g. UNIX sockets) rather than a network protocol, and hence we think it can justified to allow it to shortcut the lower protocol layers. Regarding traceability, we should notice that since commit 6c9081a3915d ("tipc: add loopback device tracking") it is possible to follow the node internal packet flow by just activating tcpdump on the loopback interface. This will be true even for this mechanism; by activating tcpdump on the involved nodes' loopback interfaces their inter-name space messaging can easily be tracked. v2: - update 'net' pointer when node left/rejoined Suggested-by: Jon Maloy Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/core.c | 16 + net/tipc/core.h | 6 ++ net/tipc/discover.c | 4 +- net/tipc/msg.h| 14 net/tipc/name_distr.c | 2 +- net/tipc/node.c | 148 -- net/tipc/node.h | 5 +- net/tipc/socket.c | 6 +- 8 files changed, 190 insertions(+), 11 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 23cb379a93d6..ab648dd150ee 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_sk_rht_destroy(net); } +static void __net_exit tipc_pernet_pre_exit(struct net *net) +{ + tipc_node_pre_cleanup_net(net); +} + +static struct pernet_operations tipc_pernet_pre_exit_ops = { + .pre_exit = tipc_pernet_pre_exit, +}; + static struct pernet_operations tipc_net_ops = { .init = tipc_init_net, .exit = tipc_exit_net, @@ -151,6 +160,10 @@ static int __init tipc_init(void) if (err) goto out_pernet_topsrv; + err = register_pernet_subsys(_pernet_pre_exit_ops); + if (err) + goto out_register_pernet_subsys; + err = tipc_bearer_setup(); if (err) goto out_bearer; @@ -158,6 +171,8 @@ static int __init tipc_init(void) pr_info("Started in single node mode\n"); return 0; out_bearer: + unregister_pernet_subsys(_pernet_pre_exit_ops); +out_register_pernet_subsys: unregister_pernet_device(_topsrv_net_ops); out_pernet_topsrv: tipc_socket_stop(); @@ -177,6 +192,7 @@ static int __init tipc_init(void)
Re: [tipc-discussion] [net-next] tipc: improve throughput between nodes in netns
Hi Eric, Thanks for quick feedback. See my inline answer. Regards, Hoang -Original Message- From: Eric Dumazet Sent: Tuesday, October 22, 2019 9:41 AM To: Hoang Le ; jon.ma...@ericsson.com; ma...@donjonn.com; tipc-discussion@lists.sourceforge.net; net...@vger.kernel.org Subject: Re: [net-next] tipc: improve throughput between nodes in netns On 10/21/19 7:20 PM, Hoang Le wrote: > n->net = net; > n->capabilities = capabilities; > + n->pnet = NULL; > + for_each_net_rcu(tmp) { This does not scale well, if say you have a thousand netns ? [Hoang] This check execs only once at setup step. So we get no problem with huge namespaces. > + tn_peer = net_generic(tmp, tipc_net_id); > + if (!tn_peer) > + continue; > + /* Integrity checking whether node exists in namespace or not */ > + if (tn_peer->net_id != tn->net_id) > + continue; > + if (memcmp(peer_id, tn_peer->node_id, NODE_ID_LEN)) > + continue; > + > + hash_chk = tn_peer->random; > + hash_chk ^= net_hash_mix(_net); Why the xor with net_hash_mix(_net) is needed ? [Hoang] We're trying to eliminate a sniff at injectable discovery message. Building hash-mixes as much as possible is to prevent fake discovery messages. > + hash_chk ^= net_hash_mix(tmp); > + if (hash_chk ^ hash_mixes) > + continue; > + n->pnet = tmp; > + break; > + } How can we set n->pnet without increasing netns ->count ? Using check_net() later might trigger an use-after-free. [Hoang] In this case, peer node is down. I assume the tipc xmit function already bypassed these lines. ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: improve throughput between nodes in netns
Currently, TIPC transports intra-node user data messages directly socket to socket, hence shortcutting all the lower layers of the communication stack. This gives TIPC very good intra node performance, both regarding throughput and latency. We now introduce a similar mechanism for TIPC data traffic across network namespaces located in the same kernel. On the send path, the call chain is as always accompanied by the sending node's network name space pointer. However, once we have reliably established that the receiving node is represented by a namespace on the same host, we just replace the namespace pointer with the receiving node/namespace's ditto, and follow the regular socket receive patch though the receiving node. This technique gives us a throughput similar to the node internal throughput, several times larger than if we let the traffic go though the full network stacks. As a comparison, max throughput for 64k messages is four times larger than TCP throughput for the same type of traffic. To meet any security concerns, the following should be noted. - All nodes joining a cluster are supposed to have been be certified and authenticated by mechanisms outside TIPC. This is no different for nodes/namespaces on the same host; they have to auto discover each other using the attached interfaces, and establish links which are supervised via the regular link monitoring mechanism. Hence, a kernel local node has no other way to join a cluster than any other node, and have to obey to policies set in the IP or device layers of the stack. - Only when a sender has established with 100% certainty that the peer node is located in a kernel local namespace does it choose to let user data messages, and only those, take the crossover path to the receiving node/namespace. - If the receiving node/namespace is removed, its namespace pointer is invalidated at all peer nodes, and their neighbor link monitoring will eventually note that this node is gone. - To ensure the "100% certainty" criteria, and prevent any possible spoofing, received discovery messages must contain a proof that the sender knows a common secret. We use the hash mix of the sending node/namespace for this purpose, since it can be accessed directly by all other namespaces in the kernel. Upon reception of a discovery message, the receiver checks this proof against all the local namespaces'hash_mix:es. If it finds a match, that, along with a matching node id and cluster id, this is deemed sufficient proof that the peer node in question is in a local namespace, and a wormhole can be opened. - We should also consider that TIPC is intended to be a cluster local IPC mechanism (just like e.g. UNIX sockets) rather than a network protocol, and hence we think it can justified to allow it to shortcut the lower protocol layers. Regarding traceability, we should notice that since commit 6c9081a3915d ("tipc: add loopback device tracking") it is possible to follow the node internal packet flow by just activating tcpdump on the loopback interface. This will be true even for this mechanism; by activating tcpdump on the involved nodes' loopback interfaces their inter-name space messaging can easily be tracked. Suggested-by: Jon Maloy Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/discover.c | 10 - net/tipc/msg.h| 10 + net/tipc/name_distr.c | 2 +- net/tipc/node.c | 100 -- net/tipc/node.h | 4 +- net/tipc/socket.c | 6 +-- 6 files changed, 121 insertions(+), 11 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index c138d68e8a69..338d402fcf39 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -38,6 +38,8 @@ #include "node.h" #include "discover.h" +#include + /* min delay during bearer start up */ #define TIPC_DISC_INIT msecs_to_jiffies(125) /* max delay if bearer has no links */ @@ -83,6 +85,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, struct tipc_net *tn = tipc_net(net); u32 dest_domain = b->domain; struct tipc_msg *hdr; + u32 hash; hdr = buf_msg(skb); tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp, @@ -94,6 +97,10 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, msg_set_dest_domain(hdr, dest_domain); msg_set_bc_netid(hdr, tn->net_id); b->media->addr2msg(msg_media_addr(hdr), >addr); + hash = tn->random; + hash ^= net_hash_mix(_net); + hash ^= net_hash_mix(net); + msg_set_peer_net_hash(hdr, hash); msg_set_node_id(hdr, tipc_own_id(net)); } @@ -242,7 +249,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, if (!tipc_in_scope(legacy, b->domain, src)) return; tipc_node_check_dest(net, src, peer_id, b, caps, signature, -
Re: [tipc-discussion] [net-next] tipc: improve throughput between nodes in netns
Hi Jon, I have submitted the new code change in separate email. Please help to review again. Thanks, Hoang -Original Message- From: Jon Maloy Sent: Friday, October 18, 2019 9:21 PM To: Hoang Huu Le ; ma...@donjonn.com; tipc-...@dektech.com.au; tipc-discussion@lists.sourceforge.net Subject: RE: [net-next] tipc: improve throughput between nodes in netns Hi Hoang, Our task is to establish that the message really came from the same node we have found in a local name space. Imagine somebody is sniffing on a network, and finds there is a remote peer with proof(hash_mix)= M node id X and cluster id Y. He then creates an illegitimate local name space with the proof(hash_mix)= N , node id X, but cluster id Z, so that all its discovery messages are dropped by the receiver. He may then create fake discovery messages with proof(hash_mix)= N, node id X and cluster id Y, which will be accepted by the receiver and compared to the fake node's data. Alas, they all match, and he has succeeded in hijacking traffic to the remote node, and this may happen even if the traffic was meant to be encrypted. Admittedly there are some weaknesses in this scenario, e.g., he cannot do this if unless the remote node is temporarily down (maybe he can kill it with a fake RESET message?), and there are other reasons why this might be very hard to do. But, better safe than sorry, if we can avoid this with just a simple extra test that costs nothing. Regards ///jon > -Original Message- > From: Hoang Le > Sent: 18-Oct-19 04:24 > To: Jon Maloy ; ma...@donjonn.com; tipc- > d...@dektech.com.au; tipc-discussion@lists.sourceforge.net > Subject: RE: [net-next] tipc: improve throughput between nodes in netns > > Hi Jon, > > Thanks for good description. > However, w.r.t your comment "We even need to verify cluster ids.", I'm still > unclear why we need to isolate cluster ids here. > I guess the node had been accepted already when bypassed at function > tipc_disc_rcv. Then, we just check to apply new mechanism for kernel local > namespaces. > > Regars, > Hoang > -Original Message- > From: Jon Maloy > Sent: Friday, October 18, 2019 2:20 AM > To: Hoang Huu Le ; ma...@donjonn.com; > tipc-...@dektech.com.au; tipc-discussion@lists.sourceforge.net > Subject: RE: [net-next] tipc: improve throughput between nodes in netns > > Hi Hoang, > We need a very good log text to justify this. > > My proposal: > > "Currently, TIPC transports intra-node user data messages directly socket to > socket, hence shortcutting all the lower layers of the communication stack. > This gives TIPC very good intra node performance, both regarding throughput > and latency. > > We now introduce a similar mechanism for TIPC data traffic across network > name spaces located in the same kernel. On the send path, the call chain is as > always accompanied by the sending node's network name space pointer. > However, once we have reliably established that the receiving node is > represented by a name space on the same host, we just replace the name > space pointer with the receiving node/name space's ditto, and follow the > regular socket receive patch though the receiving node. This technique gives > us a throughput similar to the node internal throughput, several times larger > than if we let the traffic go though the full network stack. As a comparison, > max throughput for 64k messages is four times larger than TCP throughput for > the same type of traffic. > > To meet any security concerns, the following should be noted. > > - All nodes joining a cluster are supposed to have been be certified and > authenticated by mechanisms outside TIPC. This is no different for > nodes/name spaces on the same host; they have to auto discover each other > using the attached interfaces, and establish links which are supervised via > the > regular link monitoring mechanism. Hence, a kernel local node has no other > way to join a cluster than any other node, and have to obey to policies set in > the IP or device layers of the stack. > > - Only when a sender has established with 100% certainty that the peer node > is located in a kernel local name space does it choose to let user data > messages, > and only those, take the crossover path to the receiving node/name space. > > - If the receiving node/name space is removed, its name space pointer is > invalidated at all peer nodes, and their neighbor link monitoring will > eventually > note that this node is gone. > > - To ensure the "100% certainty" criteria, and prevent any possible spoofing, > received discovery messages must contain a proof that they know a common > secret. We use the hash_mix of the sending node/name space for this > purpose, since it can be a
[tipc-discussion] [net-next v2] tipc: improve throughput between nodes in netns
Currently, TIPC transports intra-node user data messages directly socket to socket, hence shortcutting all the lower layers of the communication stack. This gives TIPC very good intra node performance, both regarding throughput and latency. We now introduce a similar mechanism for TIPC data traffic across network name spaces located in the same kernel. On the send path, the call chain is as always accompanied by the sending node's network name space pointer. However, once we have reliably established that the receiving node is represented by a name space on the same host, we just replace the name space pointer with the receiving node/name space's ditto, and follow the regular socket receive patch though the receiving node. This technique gives us a throughput similar to the node internal throughput, several times larger than if we let the traffic go though the full network stack. As a comparison, max throughput for 64k messages is four times larger than TCP throughput for the same type of traffic. To meet any security concerns, the following should be noted. - All nodes joining a cluster are supposed to have been be certified and authenticated by mechanisms outside TIPC. This is no different for nodes/name spaces on the same host; they have to auto discover each other using the attached interfaces, and establish links which are supervised via the regular link monitoring mechanism. Hence, a kernel local node has no other way to join a cluster than any other node, and have to obey to policies set in the IP or device layers of the stack. - Only when a sender has established with 100% certainty that the peer node is located in a kernel local name space does it choose to let user data messages, and only those, take the crossover path to the receiving node/name space. - If the receiving node/name space is removed, its name space pointer is invalidated at all peer nodes, and their neighbor link monitoring will eventually note that this node is gone. - To ensure the "100% certainty" criteria, and prevent any possible spoofing, received discovery messages must contain a proof that they know a common secret. We use the hash_mix of the sending node/name space for this purpose, since it can be accessed directly by all other name spaces in the kernel. Upon reception of a discovery message, the receiver checks this proof against all the local name spaces' hash_mix:es. If it finds a match, that, along with a matching node id and cluster id, this is deemed sufficient proof that the peer node in question is in a local name space, and a wormhole can be opened. - We should also consider that TIPC is intended to be a cluster local IPC mechanism (just like e.g. UNIX sockets) rather than a network protocol, and hence should be given more freedom to shortcut the lower protocol than other protocols. Regarding traceability, we should notice that since commit 6c9081a3915d ("tipc: add loopback device tracking") it is possible to follow the node internal packet flow by just activating tcpdump on the loopback interface. This will be true even for this mechanism; by activating tcpdump on the invloved nodes' loopback interfaces their inter-name space messaging can easily be tracked. Suggested-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/discover.c | 10 - net/tipc/msg.h| 10 + net/tipc/name_distr.c | 2 +- net/tipc/node.c | 100 -- net/tipc/node.h | 4 +- net/tipc/socket.c | 6 +-- 6 files changed, 121 insertions(+), 11 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index c138d68e8a69..338d402fcf39 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -38,6 +38,8 @@ #include "node.h" #include "discover.h" +#include + /* min delay during bearer start up */ #define TIPC_DISC_INIT msecs_to_jiffies(125) /* max delay if bearer has no links */ @@ -83,6 +85,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, struct tipc_net *tn = tipc_net(net); u32 dest_domain = b->domain; struct tipc_msg *hdr; + u32 hash; hdr = buf_msg(skb); tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp, @@ -94,6 +97,10 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, msg_set_dest_domain(hdr, dest_domain); msg_set_bc_netid(hdr, tn->net_id); b->media->addr2msg(msg_media_addr(hdr), >addr); + hash = tn->random; + hash ^= net_hash_mix(_net); + hash ^= net_hash_mix(net); + msg_set_peer_net_hash(hdr, hash); msg_set_node_id(hdr, tipc_own_id(net)); } @@ -242,7 +249,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, if (!tipc_in_scope(legacy, b->domain, src)) return; tipc_node_check_dest(net, src, peer_id, b, caps, signature, -
Re: [tipc-discussion] [net-next] tipc: improve throughput between nodes in netns
Hi Jon, Thanks for good description. However, w.r.t your comment "We even need to verify cluster ids.", I'm still unclear why we need to isolate cluster ids here. I guess the node had been accepted already when bypassed at function tipc_disc_rcv. Then, we just check to apply new mechanism for kernel local namespaces. Regars, Hoang -Original Message- From: Jon Maloy Sent: Friday, October 18, 2019 2:20 AM To: Hoang Huu Le ; ma...@donjonn.com; tipc-...@dektech.com.au; tipc-discussion@lists.sourceforge.net Subject: RE: [net-next] tipc: improve throughput between nodes in netns Hi Hoang, We need a very good log text to justify this. My proposal: "Currently, TIPC transports intra-node user data messages directly socket to socket, hence shortcutting all the lower layers of the communication stack. This gives TIPC very good intra node performance, both regarding throughput and latency. We now introduce a similar mechanism for TIPC data traffic across network name spaces located in the same kernel. On the send path, the call chain is as always accompanied by the sending node's network name space pointer. However, once we have reliably established that the receiving node is represented by a name space on the same host, we just replace the name space pointer with the receiving node/name space's ditto, and follow the regular socket receive patch though the receiving node. This technique gives us a throughput similar to the node internal throughput, several times larger than if we let the traffic go though the full network stack. As a comparison, max throughput for 64k messages is four times larger than TCP throughput for the same type of traffic. To meet any security concerns, the following should be noted. - All nodes joining a cluster are supposed to have been be certified and authenticated by mechanisms outside TIPC. This is no different for nodes/name spaces on the same host; they have to auto discover each other using the attached interfaces, and establish links which are supervised via the regular link monitoring mechanism. Hence, a kernel local node has no other way to join a cluster than any other node, and have to obey to policies set in the IP or device layers of the stack. - Only when a sender has established with 100% certainty that the peer node is located in a kernel local name space does it choose to let user data messages, and only those, take the crossover path to the receiving node/name space. - If the receiving node/name space is removed, its name space pointer is invalidated at all peer nodes, and their neighbor link monitoring will eventually note that this node is gone. - To ensure the "100% certainty" criteria, and prevent any possible spoofing, received discovery messages must contain a proof that they know a common secret. We use the hash_mix of the sending node/name space for this purpose, since it can be accessed directly by all other name spaces in the kernel. Upon reception of a discovery message, the receiver checks this proof against all the local name spaces' hash_mix:es. If it finds a match, that, along with a matching node id and cluster id, this is deemed sufficient proof that the peer node in question is in a local name space, and a wormhole can be opened. - We should also consider that TIPC is intended to be a cluster local IPC mechanism (just like e.g. UNIX sockets) rather than a network protocol, and hence should be given more freedom to shortcut the lower protocol than other protocols. Regarding traceability, we should notice that since commit 6c9081a3915d ("add loopback device tracing") it is possible to follow the node internal packet flow by just activating tcpdump on the loopback interface. This will be true even for this mechanism; by activating tcpdump on the invloved nodes' loopback interfaces their inter-name space messaging can easily be tracked." I also think there should be a "Suggested-by: Jon Maloy " at the bottom of the patch. See more comments below. > -Original Message- > From: Hoang Le > Sent: 17-Oct-19 06:10 > To: Jon Maloy ; ma...@donjonn.com; tipc- > d...@dektech.com.au > Subject: [net-next] tipc: improve throughput between nodes in netns > > Introduce traffic cross namespaces transmission as intranode. > By this way, throughput between nodes in namespace as fast as local. > Looks though the architectural view of TIPC, the new TIPC mechanism for > containers will not introduce any security or breaking the current policies at > all: > > 1/ Extranode: > > Node A Node B > +-+ +-+ > | TIPC | | TIPC | > | Application | | Application | > |-|
[tipc-discussion] [iproute2-next v2] tipc: support 128bit node identity for peer removing
We add the support to remove a specific node down with 128bit node identifier, as an alternative to legacy 32-bit node address. v2: improve usage for 'tipc peer remove' command Signed-off-by: Hoang Le --- tipc/peer.c | 53 - 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/tipc/peer.c b/tipc/peer.c index f6380777033d..f14ec35e6f71 100644 --- a/tipc/peer.c +++ b/tipc/peer.c @@ -59,17 +59,68 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_doit(nlh, NULL, NULL); } +static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + __u8 id[16] = {0,}; + __u64 *w0 = (__u64 *)[0]; + __u64 *w1 = (__u64 *)[8]; + struct nlattr *nest; + char *str; + + if (cmdl->argc != cmdl->optind + 1) { + fprintf(stderr, "Usage: %s peer remove identity NODEID\n", + cmdl->argv[0]); + return -EINVAL; + } + + str = shift_cmdl(cmdl); + if (str2nodeid(str, id)) { + fprintf(stderr, "Invalid node identity\n"); + return -EINVAL; + } + + nlh = msg_init(buf, TIPC_NL_PEER_REMOVE); + if (!nlh) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + + nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1); + mnl_attr_nest_end(nlh, nest); + + return msg_doit(nlh, NULL, NULL); +} + static void cmd_peer_rm_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove PROPERTY\n\n" + "PROPERTIES\n" + " identity NODEID - Remove peer node identity\n", + cmdl->argv[0]); +} + +static void cmd_peer_rm_addr_help(struct cmdl *cmdl) { fprintf(stderr, "Usage: %s peer remove address ADDRESS\n", cmdl->argv[0]); } +static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove identity NODEID\n", + cmdl->argv[0]); +} + static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { const struct cmd cmds[] = { - { "address",cmd_peer_rm_addr, cmd_peer_rm_help }, + { "address", cmd_peer_rm_addr, cmd_peer_rm_addr_help }, + { "identity", cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help }, { NULL } }; -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next v2] tipc: support 128bit node identity for peer removing
We add the support to remove a specific node down with 128bit node identifier, as an alternative to legacy 32-bit node address. v2: improve usage for 'tipc peer remove' command Signed-off-by: Hoang Le --- tipc/peer.c | 55 - 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/tipc/peer.c b/tipc/peer.c index f6380777033d..e1517743f80f 100644 --- a/tipc/peer.c +++ b/tipc/peer.c @@ -59,17 +59,70 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_doit(nlh, NULL, NULL); } +static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + __u8 id[16] = {0,}; + __u64 *w0 = (__u64 *)[0]; + __u64 *w1 = (__u64 *)[8]; + struct nlattr *nest; + char *str; + + if (cmdl->argc != cmdl->optind + 1) { + fprintf(stderr, "Usage: %s peer remove nodeid NODEID\n", + cmdl->argv[0]); + return -EINVAL; + } + + str = shift_cmdl(cmdl); + if (str2nodeid(str, id)) { + fprintf(stderr, "Invalid node identity\n"); + return -EINVAL; + } + + nlh = msg_init(buf, TIPC_NL_PEER_REMOVE); + if (!nlh) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + + nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1); + mnl_attr_nest_end(nlh, nest); + + return msg_doit(nlh, NULL, NULL); +} + static void cmd_peer_rm_help(struct cmdl *cmdl) +{ + fprintf(stderr, + "Usage: %s peer remove PROPERTY\n\n" + "PROPERTIES\n" + " address - Remove peer node address\n" + " nodeid - Remove peer node identity\n", + cmdl->argv[0]); +} + +static void cmd_peer_rm_addr_help(struct cmdl *cmdl) { fprintf(stderr, "Usage: %s peer remove address ADDRESS\n", cmdl->argv[0]); } +static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove nodeid NODEID\n", + cmdl->argv[0]); +} + static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { const struct cmd cmds[] = { - { "address",cmd_peer_rm_addr, cmd_peer_rm_help }, + { "address", cmd_peer_rm_addr, cmd_peer_rm_addr_help }, + { "nodeid", cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help }, { NULL } }; -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: support 128bit node identity for peer removing
We add the support to remove a specific node down with 128bit node identifier, as an alternative to legacy 32-bit node address. Signed-off-by: Hoang Le --- net/tipc/node.c | 29 + 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index c8f6177dd5a2..152b98b2e8f5 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1926,8 +1926,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + u8 node_id[NODE_ID_LEN]; + u64 *w0 = (u64 *)_id[0]; + u64 *w1 = (u64 *)_id[8]; struct tipc_node *peer; - u32 addr; + u32 addr = 0; int err; /* We identify the peer by its net */ @@ -1940,16 +1943,26 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (!attrs[TIPC_NLA_NET_ADDR]) - return -EINVAL; - - addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (attrs[TIPC_NLA_NET_ADDR]) { + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (!addr) + return -EINVAL; + if (in_own_node(net, addr)) + return -ENOTSUPP; + } - if (in_own_node(net, addr)) - return -ENOTSUPP; + if (attrs[TIPC_NLA_NET_NODEID]) { + if (!attrs[TIPC_NLA_NET_NODEID_W1]) + return -EINVAL; + *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); + *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); + } spin_lock_bh(>node_list_lock); - peer = tipc_node_find(net, addr); + if (!addr) + peer = tipc_node_find_by_id(net, node_id); + else + peer = tipc_node_find(net, addr); if (!peer) { spin_unlock_bh(>node_list_lock); return -ENXIO; -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next] tipc: support 128bit node identity for peer removing
We add the support to remove a specific node down with 128bit node identifier, as an alternative to legacy 32-bit node address. Signed-off-by: Hoang Le --- tipc/peer.c | 54 - 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/tipc/peer.c b/tipc/peer.c index f6380777033d..9f116b257fda 100644 --- a/tipc/peer.c +++ b/tipc/peer.c @@ -59,17 +59,69 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_doit(nlh, NULL, NULL); } +static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + __u8 id[16] = {0,}; + __u64 *w0 = (__u64 *)[0]; + __u64 *w1 = (__u64 *)[8]; + struct nlattr *nest; + char *str; + + if (cmdl->argc != cmdl->optind + 1) { + fprintf(stderr, "Usage: %s peer remove nodeid NODEID\n", + cmdl->argv[0]); + return -EINVAL; + } + + str = shift_cmdl(cmdl); + if (str2nodeid(str, id)) { + fprintf(stderr, "Invalid node identity\n"); + return -EINVAL; + } + + nlh = msg_init(buf, TIPC_NL_PEER_REMOVE); + if (!nlh) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + + nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1); + mnl_attr_nest_end(nlh, nest); + + return msg_doit(nlh, NULL, NULL); +} + static void cmd_peer_rm_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove PROPERTY\n\n", + "PROPERTIES\n" + " address - Remove peer node address\n" + " nodeid - Remove peer node identity\n", + cmdl->argv[0]); +} + +static void cmd_peer_rm_addr_help(struct cmdl *cmdl) { fprintf(stderr, "Usage: %s peer remove address ADDRESS\n", cmdl->argv[0]); } +static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove nodeid NODEID\n", + cmdl->argv[0]); +} + static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { const struct cmd cmds[] = { - { "address",cmd_peer_rm_addr, cmd_peer_rm_help }, + { "address", cmd_peer_rm_addr, cmd_peer_rm_addr_help }, + { "nodeid", cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help }, { NULL } }; -- 2.20.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: improve throughput between nodes in netns
Introduce traffic cross namespaces transmission as local node. By this way, throughput between nodes in namespace as fast as local. Testcase: $ip netns exec 1 benchmark_client -c 100 $ip netns exec 2 benchmark_server Before: +-+ | Msg Size | # | # Msgs/ | Elapsed |Throughput | | [octets] | Conns |Conn | [ms] ++ || | | | Total [Msg/s] | Total [Mb/s] | Per Conn [Mb/s] | +-+ |64 | 100 |64000 |13005 | 492103 | 251 | 2 | +-+ | 256 | 100 |32000 | 4964 | 644627 |1320 | 13 | +-+ | 1024 | 100 |16000 | 4524 | 353612 |2896 | 28 | +-+ | 4096 | 100 | 8000 | 3675 | 217644 |7131 | 71 | +-+ | 16384 | 100 | 4000 | 7914 |50540 |6624 | 66 | +-+ | 65536 | 100 | 2000 |13000 |15384 |8065 | 80 | +-+ After: +-+ | Msg Size | # | # Msgs/ | Elapsed |Throughput | | [octets] | Conns |Conn | [ms] ++ || | | | Total [Msg/s] | Total [Mb/s] | Per Conn [Mb/s] | +-+ |64 | 100 |64000 | 7842 | 816090 | 417 | 4 | +-+ | 256 | 100 |32000 | 3593 | 890469 |1823 | 18 | +-+ | 1024 | 100 |16000 | 1835 | 871828 |7142 | 71 | +-+ | 4096 | 100 | 8000 | 1134 | 704904 | 23098 | 230 | +-+ | 16384 | 100 | 4000 | 878 | 455295 | 59676 | 596 | +-+ | 65536 | 100 | 2000 | 1007 | 198487 | 104064 | 1040 | +-+ Signed-off-by: Hoang Le --- net/tipc/discover.c | 6 ++- net/tipc/msg.h| 10 + net/tipc/name_distr.c | 2 +- net/tipc/node.c | 94 +-- net/tipc/node.h | 4 +- net/tipc/socket.c | 6 +-- 6 files changed, 111 insertions(+), 11 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index c138d68e8a69..98d4eea97eb7 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -38,6 +38,8 @@ #include "node.h" #include "discover.h" +#include + /* min delay during bearer start up */ #define TIPC_DISC_INIT msecs_to_jiffies(125) /* max delay if bearer has no links */ @@ -94,6 +96,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, msg_set_dest_domain(hdr, dest_domain); msg_set_bc_netid(hdr, tn->net_id); b->media->addr2msg(msg_media_addr(hdr), >addr); + msg_set_peer_net_hash(hdr, net_hash_mix(net)); msg_set_node_id(hdr, tipc_own_id(net)); } @@ -200,6 +203,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u8 peer_id[NODE_ID_LEN] = {0,}; u32 dst = msg_dest_domain(hdr); u32 net_id = msg_bc_netid(hdr); + u32 pnet_hash = msg_peer_net_hash(hdr); struct tipc_media_addr maddr; u32 src = msg_prevnode(hdr); u32 mtyp = msg_type(hdr); @@ -242,7 +246,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, if (
Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation
Hi Jon, Please ignore previous results because I'm wrong on testing ICMP starved by TIPC. Regards, Hoang -Original Message- From: Hoang Le Sent: Tuesday, July 30, 2019 11:24 AM To: 'tung quang nguyen' ; tipc-discussion@lists.sourceforge.net; 'Jon Maloy' ; 'Jon Maloy' ; ying@windriver.com Subject: Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation Hi Jon, I combine benchmark test with 50 connections and ping cmd from two nodes. You can compare results from original code, your fix and Tung's fix as following: Original code: node1 ~ # ping -s 1400 10.0.0.2 -c 300 PING 10.0.0.2 (10.0.0.2): 1400 data bytes 1408 bytes from 10.0.0.2: seq=22 ttl=64 time=1.337 ms 1408 bytes from 10.0.0.2: seq=24 ttl=64 time=1.208 ms 1408 bytes from 10.0.0.2: seq=25 ttl=64 time=1.145 ms 1408 bytes from 10.0.0.2: seq=76 ttl=64 time=1.145 ms 1408 bytes from 10.0.0.2: seq=78 ttl=64 time=1.449 ms 1408 bytes from 10.0.0.2: seq=130 ttl=64 time=1.230 ms 1408 bytes from 10.0.0.2: seq=134 ttl=64 time=1.020 ms 1408 bytes from 10.0.0.2: seq=185 ttl=64 time=1.743 ms 1408 bytes from 10.0.0.2: seq=186 ttl=64 time=1.502 ms 1408 bytes from 10.0.0.2: seq=187 ttl=64 time=1.289 ms 1408 bytes from 10.0.0.2: seq=189 ttl=64 time=1.306 ms 1408 bytes from 10.0.0.2: seq=239 ttl=64 time=1.254 ms 1408 bytes from 10.0.0.2: seq=241 ttl=64 time=1.114 ms 1408 bytes from 10.0.0.2: seq=242 ttl=64 time=1.058 ms --- 10.0.0.2 ping statistics --- 301 packets transmitted, 301 packets received, 0% packet loss round-trip min/avg/max = 0.077/0.361/1.743 ms - JON's fix node1 ~ # ping -s 1400 10.0.0.2 -c 300 1408 bytes from 10.0.0.2: seq=22 ttl=64 time=1.013 ms 1408 bytes from 10.0.0.2: seq=87 ttl=64 time=2.468 ms --- 10.0.0.2 ping statistics --- 300 packets transmitted, 300 packets received, 0% packet loss round-trip min/avg/max = 0.119/0.323/2.468 ms node1 ~ # - Tung's fix node1 ~ # ping -s 1400 10.0.0.2 -c 300 --- 10.0.0.2 ping statistics --- 300 packets transmitted, 300 packets received, 0% packet loss round-trip min/avg/max = 0.101/0.303/0.864 ms >From ping statistics, I could see your solution starved twice and maximum time >is 2.468 ms. Then, we're not completely solve the issue yet. But test results from Tung's fix, I don't see a starvation happen. So, I think we can go ahead with Tung's code fixed. Please give me your idea. Regards, Hoang -Original Message- From: tung quang nguyen Sent: Thursday, July 25, 2019 5:50 PM To: 'Jon Maloy' ; 'Jon Maloy' ; tipc-discussion@lists.sourceforge.net; ying@windriver.com Subject: Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation Hi Jon, Let's go for this way for now. Thanks. Best regards, Tung Nguyen -Original Message- From: Jon Maloy Sent: Friday, July 19, 2019 10:06 AM To: Jon Maloy ; Jon Maloy Cc: mohan.krishna.ghanta.krishnamur...@ericsson.com; parthasarathy.bhuvara...@gmail.com; tung.q.ngu...@dektech.com.au; hoang.h...@dektech.com.au; canh.d@dektech.com.au; tuong.t.l...@dektech.com.au; gordan.mihalje...@dektech.com.au; ying@windriver.com; tipc-discussion@lists.sourceforge.net Subject: [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation In commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") we allowed senders to add exactly one list of extra buffers to the link backlog queues during link congestion (aka "oversubscription"). However, the criteria for when to stop adding wakeup messages to the input queue when the overload abates is inaccurate, and may cause starvation problems during very high load. Currently, we stop adding wakeup messages after 10 total failed attempts where we find that there is no space left in the backlog queue for a certain importance level. The counter for this is accumulated across all levels, which may lead the algorithm to leave the loop prematurely, although there may still be plenty of space available at some levels. The result is sometimes that messages near the wakeup queue tail are not added to the input queue as they should be. We now introduce a more exact algorithm, where we keep adding wakeup messages to a level as long as the backlog queue has free slots for the corresponding level, and stop at the moment there are no more such slots or when there are no more wakeup messages to dequeue. Fixes: 365ad35 ("tipc: reduce risk of user starvation during link congestion") Reported-by: Tung Nguyen Signed-off-by: Jon Maloy --- net/tipc/link.c | 29 + 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 66d3a07..f1d2732 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -853,18 +853,31 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) */ static void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = >wakeupq; + struct sk_buff_h
Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation
Hi Jon, I combine benchmark test with 50 connections and ping cmd from two nodes. You can compare results from original code, your fix and Tung's fix as following: Original code: node1 ~ # ping -s 1400 10.0.0.2 -c 300 PING 10.0.0.2 (10.0.0.2): 1400 data bytes 1408 bytes from 10.0.0.2: seq=22 ttl=64 time=1.337 ms 1408 bytes from 10.0.0.2: seq=24 ttl=64 time=1.208 ms 1408 bytes from 10.0.0.2: seq=25 ttl=64 time=1.145 ms 1408 bytes from 10.0.0.2: seq=76 ttl=64 time=1.145 ms 1408 bytes from 10.0.0.2: seq=78 ttl=64 time=1.449 ms 1408 bytes from 10.0.0.2: seq=130 ttl=64 time=1.230 ms 1408 bytes from 10.0.0.2: seq=134 ttl=64 time=1.020 ms 1408 bytes from 10.0.0.2: seq=185 ttl=64 time=1.743 ms 1408 bytes from 10.0.0.2: seq=186 ttl=64 time=1.502 ms 1408 bytes from 10.0.0.2: seq=187 ttl=64 time=1.289 ms 1408 bytes from 10.0.0.2: seq=189 ttl=64 time=1.306 ms 1408 bytes from 10.0.0.2: seq=239 ttl=64 time=1.254 ms 1408 bytes from 10.0.0.2: seq=241 ttl=64 time=1.114 ms 1408 bytes from 10.0.0.2: seq=242 ttl=64 time=1.058 ms --- 10.0.0.2 ping statistics --- 301 packets transmitted, 301 packets received, 0% packet loss round-trip min/avg/max = 0.077/0.361/1.743 ms - JON's fix node1 ~ # ping -s 1400 10.0.0.2 -c 300 1408 bytes from 10.0.0.2: seq=22 ttl=64 time=1.013 ms 1408 bytes from 10.0.0.2: seq=87 ttl=64 time=2.468 ms --- 10.0.0.2 ping statistics --- 300 packets transmitted, 300 packets received, 0% packet loss round-trip min/avg/max = 0.119/0.323/2.468 ms node1 ~ # - Tung's fix node1 ~ # ping -s 1400 10.0.0.2 -c 300 --- 10.0.0.2 ping statistics --- 300 packets transmitted, 300 packets received, 0% packet loss round-trip min/avg/max = 0.101/0.303/0.864 ms >From ping statistics, I could see your solution starved twice and maximum time >is 2.468 ms. Then, we're not completely solve the issue yet. But test results from Tung's fix, I don't see a starvation happen. So, I think we can go ahead with Tung's code fixed. Please give me your idea. Regards, Hoang -Original Message- From: tung quang nguyen Sent: Thursday, July 25, 2019 5:50 PM To: 'Jon Maloy' ; 'Jon Maloy' ; tipc-discussion@lists.sourceforge.net; ying@windriver.com Subject: Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation Hi Jon, Let's go for this way for now. Thanks. Best regards, Tung Nguyen -Original Message- From: Jon Maloy Sent: Friday, July 19, 2019 10:06 AM To: Jon Maloy ; Jon Maloy Cc: mohan.krishna.ghanta.krishnamur...@ericsson.com; parthasarathy.bhuvara...@gmail.com; tung.q.ngu...@dektech.com.au; hoang.h...@dektech.com.au; canh.d@dektech.com.au; tuong.t.l...@dektech.com.au; gordan.mihalje...@dektech.com.au; ying@windriver.com; tipc-discussion@lists.sourceforge.net Subject: [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation In commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") we allowed senders to add exactly one list of extra buffers to the link backlog queues during link congestion (aka "oversubscription"). However, the criteria for when to stop adding wakeup messages to the input queue when the overload abates is inaccurate, and may cause starvation problems during very high load. Currently, we stop adding wakeup messages after 10 total failed attempts where we find that there is no space left in the backlog queue for a certain importance level. The counter for this is accumulated across all levels, which may lead the algorithm to leave the loop prematurely, although there may still be plenty of space available at some levels. The result is sometimes that messages near the wakeup queue tail are not added to the input queue as they should be. We now introduce a more exact algorithm, where we keep adding wakeup messages to a level as long as the backlog queue has free slots for the corresponding level, and stop at the moment there are no more such slots or when there are no more wakeup messages to dequeue. Fixes: 365ad35 ("tipc: reduce risk of user starvation during link congestion") Reported-by: Tung Nguyen Signed-off-by: Jon Maloy --- net/tipc/link.c | 29 + 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 66d3a07..f1d2732 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -853,18 +853,31 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) */ static void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = >wakeupq; + struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; - int imp, i = 0; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; + + __skb_queue_head_init(); - skb_queue_walk_safe(>wakeupq, skb, tmp) { + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; + + skb_queue_walk_safe(wakeupq, skb, tmp) {
[tipc-discussion] [net-next] tipc: fix retransmission failure when link re-established
Currently a link is declared stale and reset if stale limit time is longer than link tolerance time. But, this stale limit does not initial correctly when the link is resetting. This lead to a link declared as failure because reset criteria always passed though no packet re-transmitted when link is re-establishing. To fix this, we set stale limit time far into the future in two places: reset a link and acked from peer. Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria") Signed-off-by: Hoang Le --- net/tipc/link.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 66d3a07bc571..2ba79d451f08 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -909,6 +909,7 @@ void tipc_link_reset(struct tipc_link *l) l->silent_intv_cnt = 0; l->rst_cnt = 0; l->bc_peer_is_up = false; + l->stale_limit = msecs_to_jiffies(~0); memset(>mon_state, 0, sizeof(l->mon_state)); tipc_link_reset_stats(l); } @@ -1510,6 +1511,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Forward queues and wake up waiting users */ if (likely(tipc_link_release_pkts(l, msg_ack(hdr { + l->stale_limit = msecs_to_jiffies(~0); tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(>wakeupq))) link_prepare_wakeup(l); -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next v6] tipc: support interface name when activating UDP bearer
Support for indicating interface name has an ip address in parallel with specifying ip address when activating UDP bearer. This liberates the user from keeping track of the current ip address for each device. Old command syntax: $tipc bearer enable media udp name NAME localip IP New command syntax: $tipc bearer enable media udp name NAME [localip IP|dev DEVICE] v2: - Removed initial value for fd - Fixed the returning value for cmd_bearer_validate_and_get_addr to make its consistent with using: zero or non-zero v3: - Switch to use helper 'get_ifname' to retrieve interface name v4: - Replace legacy SIOCGIFADDR by netlink v5: - Fix leaky rtnl_handle Acked-by: Ying Xue Signed-off-by: Hoang Le --- tipc/bearer.c | 94 --- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/tipc/bearer.c b/tipc/bearer.c index 1f3a4d1e..4470819e4a96 100644 --- a/tipc/bearer.c +++ b/tipc/bearer.c @@ -19,10 +19,12 @@ #include #include #include +#include #include #include +#include "utils.h" #include "cmdl.h" #include "msg.h" #include "bearer.h" @@ -68,7 +70,7 @@ static void cmd_bearer_enable_l2_help(struct cmdl *cmdl, char *media) static void cmd_bearer_enable_udp_help(struct cmdl *cmdl, char *media) { fprintf(stderr, - "Usage: %s bearer enable [OPTIONS] media %s name NAME localip IP [UDP OPTIONS]\n\n" + "Usage: %s bearer enable [OPTIONS] media %s name NAME [localip IP|device DEVICE] [UDP OPTIONS]\n\n" "OPTIONS\n" " domain DOMAIN - Discovery domain\n" " priority PRIORITY - Bearer priority\n\n" @@ -119,6 +121,76 @@ static int generate_multicast(short af, char *buf, int bufsize) return 0; } +static struct ifreq ifr; +static int nl_dump_req_filter(struct nlmsghdr *nlh, int reqlen) +{ + struct ifaddrmsg *ifa = NLMSG_DATA(nlh); + + ifa->ifa_index = ifr.ifr_ifindex; + + return 0; +} + +static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg) +{ + struct ifaddrmsg *ifa = NLMSG_DATA(nlh); + char *r_addr = (char *)arg; + int len = nlh->nlmsg_len; + struct rtattr *addr_attr; + + if (ifr.ifr_ifindex != ifa->ifa_index) + return 0; + + if (strlen(r_addr) > 0) + return 0; + + addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa), +len - NLMSG_LENGTH(sizeof(*ifa))); + if (!addr_attr) + return 0; + + if (ifa->ifa_family == AF_INET) { + struct sockaddr_in ip4addr; + memcpy(_addr, RTA_DATA(addr_attr), + sizeof(struct in_addr)); + inet_ntop(AF_INET, _addr, r_addr, + INET_ADDRSTRLEN); + } else if (ifa->ifa_family == AF_INET6) { + struct sockaddr_in6 ip6addr; + memcpy(_addr, RTA_DATA(addr_attr), + sizeof(struct in6_addr)); + inet_ntop(AF_INET6, _addr, r_addr, + INET6_ADDRSTRLEN); + } + return 0; +} + +static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr) +{ + struct rtnl_handle rth = { .fd = -1 }; + int err = -1; + + memset(, 0, sizeof(ifr)); + if (!name || !r_addr || get_ifname(ifr.ifr_name, name)) + return err; + + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name); + if (!ifr.ifr_ifindex) + return err; + + /* remove from cache */ + ll_drop_by_index(ifr.ifr_ifindex); + + if ((err = rtnl_open(, 0)) < 0) + return err; + + if ((err = rtnl_addrdump_req(, AF_UNSPEC, nl_dump_req_filter)) > 0) + err = rtnl_dump_filter(, nl_dump_addr_filter, r_addr); + + rtnl_close(); + return err; +} + static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, struct cmdl *cmdl) { @@ -136,13 +208,25 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, .ai_family = AF_UNSPEC, .ai_socktype = SOCK_DGRAM }; + char addr[INET6_ADDRSTRLEN] = {0}; - if (!(opt = get_opt(opts, "localip"))) { - fprintf(stderr, "error, udp bearer localip missing\n"); - cmd_bearer_enable_udp_help(cmdl, "udp"); + opt = get_opt(opts, "device"); + if (opt && cmd_bearer_validate_and_get_addr(opt->val, addr) < 0) { + fprintf(stderr, "error, no device name available\n"); return -EINVAL; } - locip = opt->val; + + if (strlen(addr) > 0) { + locip = addr; + } else { + opt = get_opt(
Re: [tipc-discussion] [iproute2-next v5] tipc: support interface name when activating UDP bearer
Thanks David. I will update code change as your comments. For the item: > + /* remove from cache */ > + ll_drop_by_index(ifr.ifr_ifindex); why the call to ll_drop_by_index? doing so means that ifindex is looked up again. [Hoang] > + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name); This function stored an entry ll_cache in hash map table. We have to call this function to prevent memory leaked. Regards, Hoang -Original Message- From: David Ahern Sent: Saturday, June 22, 2019 5:50 AM To: Hoang Le ; dsah...@gmail.com; jon.ma...@ericsson.com; ma...@donjonn.com; ying@windriver.com; net...@vger.kernel.org; tipc-discussion@lists.sourceforge.net Subject: Re: [iproute2-next v5] tipc: support interface name when activating UDP bearer On 6/13/19 2:07 AM, Hoang Le wrote: > @@ -119,6 +121,74 @@ static int generate_multicast(short af, char *buf, int > bufsize) > return 0; > } > > +static struct ifreq ifr = {}; you don't need to initialize globals, but you could pass a a struct as the arg to the filter here which is both the addr buffer and the ifindex of interest. > +static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg) > +{ > + struct ifaddrmsg *ifa = NLMSG_DATA(nlh); > + char *r_addr = (char *)arg; > + int len = nlh->nlmsg_len; > + struct rtattr *addr_attr; > + > + if (ifr.ifr_ifindex != ifa->ifa_index) > + return 0; > + > + if (strlen(r_addr) > 0) > + return 1; > + > + addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa), > + len - NLMSG_LENGTH(sizeof(*ifa))); > + if (!addr_attr) > + return 0; > + > + if (ifa->ifa_family == AF_INET) { > + struct sockaddr_in ip4addr; > + memcpy(_addr, RTA_DATA(addr_attr), > +sizeof(struct in_addr)); > + if (inet_ntop(AF_INET, _addr, r_addr, > + INET_ADDRSTRLEN) == NULL) > + return 0; > + } else if (ifa->ifa_family == AF_INET6) { > + struct sockaddr_in6 ip6addr; > + memcpy(_addr, RTA_DATA(addr_attr), > +sizeof(struct in6_addr)); > + if (inet_ntop(AF_INET6, _addr, r_addr, > + INET6_ADDRSTRLEN) == NULL) > + return 0; > + } > + return 1; > +} > + > +static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr) > +{ > + struct rtnl_handle rth ={ .fd = -1 }; space between '={' > + > + memset(, 0, sizeof(ifr)); > + if (!name || !r_addr || get_ifname(ifr.ifr_name, name)) > + return 0; > + > + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name); > + if (!ifr.ifr_ifindex) > + return 0; > + > + /* remove from cache */ > + ll_drop_by_index(ifr.ifr_ifindex); why the call to ll_drop_by_index? doing so means that ifindex is looked up again. > + > + if (rtnl_open(, 0) < 0) > + return 0; > + > + if (rtnl_addrdump_req(, AF_UNSPEC, 0) < 0) { If you pass a filter here to set ifa_index, this command on newer kernels will be much more efficient. See ipaddr_dump_filter. > + rtnl_close(); > + return 0; > + } > + > + if (rtnl_dump_filter(, nl_dump_addr_filter, r_addr) < 0) { > + rtnl_close(); > + return 0; > + } > + rtnl_close(); > + return 1; > +} it would better to have 1 exit with the rtnl_close and return rc based on above. ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next v5] tipc: support interface name when activating UDP bearer
Support for indicating interface name has an ip address in parallel with specifying ip address when activating UDP bearer. This liberates the user from keeping track of the current ip address for each device. Old command syntax: $tipc bearer enable media udp name NAME localip IP New command syntax: $tipc bearer enable media udp name NAME [localip IP|dev DEVICE] v2: - Removed initial value for fd - Fixed the returning value for cmd_bearer_validate_and_get_addr to make its consistent with using: zero or non-zero v3: - Switch to use helper 'get_ifname' to retrieve interface name v4: - Replace legacy SIOCGIFADDR by netlink v5: - Fix leaky rtnl_handle Acked-by: Ying Xue Signed-off-by: Hoang Le --- tipc/bearer.c | 92 --- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/tipc/bearer.c b/tipc/bearer.c index 1f3a4d1e..e17e2477c1ad 100644 --- a/tipc/bearer.c +++ b/tipc/bearer.c @@ -19,10 +19,12 @@ #include #include #include +#include #include #include +#include "utils.h" #include "cmdl.h" #include "msg.h" #include "bearer.h" @@ -68,7 +70,7 @@ static void cmd_bearer_enable_l2_help(struct cmdl *cmdl, char *media) static void cmd_bearer_enable_udp_help(struct cmdl *cmdl, char *media) { fprintf(stderr, - "Usage: %s bearer enable [OPTIONS] media %s name NAME localip IP [UDP OPTIONS]\n\n" + "Usage: %s bearer enable [OPTIONS] media %s name NAME [localip IP|device DEVICE] [UDP OPTIONS]\n\n" "OPTIONS\n" " domain DOMAIN - Discovery domain\n" " priority PRIORITY - Bearer priority\n\n" @@ -119,6 +121,74 @@ static int generate_multicast(short af, char *buf, int bufsize) return 0; } +static struct ifreq ifr = {}; +static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg) +{ + struct ifaddrmsg *ifa = NLMSG_DATA(nlh); + char *r_addr = (char *)arg; + int len = nlh->nlmsg_len; + struct rtattr *addr_attr; + + if (ifr.ifr_ifindex != ifa->ifa_index) + return 0; + + if (strlen(r_addr) > 0) + return 1; + + addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa), +len - NLMSG_LENGTH(sizeof(*ifa))); + if (!addr_attr) + return 0; + + if (ifa->ifa_family == AF_INET) { + struct sockaddr_in ip4addr; + memcpy(_addr, RTA_DATA(addr_attr), + sizeof(struct in_addr)); + if (inet_ntop(AF_INET, _addr, r_addr, + INET_ADDRSTRLEN) == NULL) + return 0; + } else if (ifa->ifa_family == AF_INET6) { + struct sockaddr_in6 ip6addr; + memcpy(_addr, RTA_DATA(addr_attr), + sizeof(struct in6_addr)); + if (inet_ntop(AF_INET6, _addr, r_addr, + INET6_ADDRSTRLEN) == NULL) + return 0; + } + return 1; +} + +static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr) +{ + struct rtnl_handle rth ={ .fd = -1 }; + + memset(, 0, sizeof(ifr)); + if (!name || !r_addr || get_ifname(ifr.ifr_name, name)) + return 0; + + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name); + if (!ifr.ifr_ifindex) + return 0; + + /* remove from cache */ + ll_drop_by_index(ifr.ifr_ifindex); + + if (rtnl_open(, 0) < 0) + return 0; + + if (rtnl_addrdump_req(, AF_UNSPEC, 0) < 0) { + rtnl_close(); + return 0; + } + + if (rtnl_dump_filter(, nl_dump_addr_filter, r_addr) < 0) { + rtnl_close(); + return 0; + } + rtnl_close(); + return 1; +} + static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, struct cmdl *cmdl) { @@ -136,13 +206,25 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, .ai_family = AF_UNSPEC, .ai_socktype = SOCK_DGRAM }; + char addr[INET6_ADDRSTRLEN] = {0}; - if (!(opt = get_opt(opts, "localip"))) { - fprintf(stderr, "error, udp bearer localip missing\n"); - cmd_bearer_enable_udp_help(cmdl, "udp"); + opt = get_opt(opts, "device"); + if (opt && !cmd_bearer_validate_and_get_addr(opt->val, addr)) { + fprintf(stderr, "error, no device name available\n"); return -EINVAL; } - locip = opt->val; + + if (strlen(addr) > 0) { + locip = addr; + } else { + opt = get_opt(opts, "lo
[tipc-discussion] [iproute2-next v4] tipc: support interface name when activating UDP bearer
Support for indicating interface name has an ip address in parallel with specifying ip address when activating UDP bearer. This liberates the user from keeping track of the current ip address for each device. Old command syntax: $tipc bearer enable media udp name NAME localip IP New command syntax: $tipc bearer enable media udp name NAME [localip IP|dev DEVICE] v2: - Removed initial value for fd - Fixed the returning value for cmd_bearer_validate_and_get_addr to make its consistent with using: zero or non-zero v3: - Switch to use helper 'get_ifname' to retrieve interface name v4: - Replace legacy SIOCGIFADDR using by netlink Acked-by: Ying Xue Signed-off-by: Hoang Le --- tipc/bearer.c | 89 --- 1 file changed, 84 insertions(+), 5 deletions(-) diff --git a/tipc/bearer.c b/tipc/bearer.c index 1f3a4d1e..367ec8a2630f 100644 --- a/tipc/bearer.c +++ b/tipc/bearer.c @@ -19,10 +19,12 @@ #include #include #include +#include #include #include +#include "utils.h" #include "cmdl.h" #include "msg.h" #include "bearer.h" @@ -68,7 +70,7 @@ static void cmd_bearer_enable_l2_help(struct cmdl *cmdl, char *media) static void cmd_bearer_enable_udp_help(struct cmdl *cmdl, char *media) { fprintf(stderr, - "Usage: %s bearer enable [OPTIONS] media %s name NAME localip IP [UDP OPTIONS]\n\n" + "Usage: %s bearer enable [OPTIONS] media %s name NAME [localip IP|device DEVICE] [UDP OPTIONS]\n\n" "OPTIONS\n" " domain DOMAIN - Discovery domain\n" " priority PRIORITY - Bearer priority\n\n" @@ -119,6 +121,71 @@ static int generate_multicast(short af, char *buf, int bufsize) return 0; } +static struct ifreq ifr; +static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg) +{ + struct ifaddrmsg *ifa = NLMSG_DATA(nlh); + char *r_addr = (char *)arg; + int len = nlh->nlmsg_len; + struct rtattr *addr_attr; + + if (ifr.ifr_ifindex != ifa->ifa_index) + return 0; + + if (strlen(r_addr) > 0) + return 1; + + addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa), +len - NLMSG_LENGTH(sizeof(*ifa))); + if (!addr_attr) + return 0; + + if (ifa->ifa_family == AF_INET) { + struct sockaddr_in ip4addr; + memcpy(_addr, RTA_DATA(addr_attr), + sizeof(struct in_addr)); + if (inet_ntop(AF_INET, _addr, r_addr, + INET_ADDRSTRLEN) == NULL) + return 0; + } else if (ifa->ifa_family == AF_INET6) { + struct sockaddr_in6 ip6addr; + memcpy(_addr, RTA_DATA(addr_attr), + sizeof(struct in6_addr)); + if (inet_ntop(AF_INET6, _addr, r_addr, + INET6_ADDRSTRLEN) == NULL) + return 0; + } + return 1; +} + +static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr) +{ + struct rtnl_handle rth = { .fd = -1 }; + + memset(, 0, sizeof(ifr)); + if (!name || !r_addr || get_ifname(ifr.ifr_name, name)) + return 0; + + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name); + if (!ifr.ifr_ifindex) + return 0; + + /* remove from cache */ + ll_drop_by_index(ifr.ifr_ifindex); + + if (rtnl_open(, 0) < 0) + return 0; + + if (rtnl_addrdump_req(, AF_UNSPEC, 0) < 0) + return 0; + + if (rtnl_dump_filter(, nl_dump_addr_filter, r_addr) < 0) + return 0; + + rtnl_close(); + return 1; +} + static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, struct cmdl *cmdl) { @@ -136,13 +203,25 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, .ai_family = AF_UNSPEC, .ai_socktype = SOCK_DGRAM }; + char addr[INET6_ADDRSTRLEN] = {0}; - if (!(opt = get_opt(opts, "localip"))) { - fprintf(stderr, "error, udp bearer localip missing\n"); - cmd_bearer_enable_udp_help(cmdl, "udp"); + opt = get_opt(opts, "device"); + if (opt && !cmd_bearer_validate_and_get_addr(opt->val, addr)) { + fprintf(stderr, "error, no device name available\n"); return -EINVAL; } - locip = opt->val; + + if (strlen(addr) > 0) { + locip = addr; + } else { + opt = get_opt(opts, "localip"); + if (!opt) { + fprintf(stderr, "error,
[tipc-discussion] [net] tipc: missing entries in name table of publications
When binding multiple services with specific type 1Ki, 2Ki.., this leads to some entries in the name table of publications missing when listed out via 'tipc name show'. The problem is at identify zero last_type conditional provided via netlink. The first is initial 'type' when starting name table dummping. The second is continuously with zero type (node state service type). Then, lookup function failure to finding node state service type in next iteration. To solve this, adding more conditional to marked as dirty type and lookup correct service type for the next iteration instead of select the first service as initial 'type' zero. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/name_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bff241f03525..89993afe0fbd 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, for (; i < TIPC_NAMETBL_SIZE; i++) { head = >nametbl->services[i]; - if (*last_type) { + if (*last_type || + (!i && *last_key && (*last_lower == *last_key))) { service = tipc_service_find(net, *last_type); if (!service) return -EPIPE; -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: add NULL pointer check
skb somehow dequeued out of inputq before processing, it causes to NULL pointer and kernel crashed. Add checking skb valid before using. Fixes: c55c8edafa9 ("tipc: smooth change between replicast and broadcast") Reported-by: Tuong Lien Tong Acked-by: Ying Xue Signed-off-by: Hoang Le --- net/tipc/bcast.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 76e14dc08bb9..6c997d4a6218 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -769,6 +769,9 @@ void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, u32 node, port; skb = skb_peek(inputq); + if (!skb) + return; + hdr = buf_msg(skb); if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next] tipc: add NULL pointer check
skb somehow dequeued out of inputq before processing, it causes to NULL pointer and kernel crashed. Add checking skb valid before using. Fixes: c55c8edafa9 ("tipc: smooth change between replicast and broadcast") Reported-by: Tuong Lien Tong Signed-off-by: Hoang Le --- net/tipc/bcast.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 76e14dc08bb9..6c997d4a6218 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -769,6 +769,9 @@ void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, u32 node, port; skb = skb_peek(inputq); + if (!skb) + return; + hdr = buf_msg(skb); if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next v2 1/3] tipc: add link broadcast set method and ratio
The command added here makes it possible to forcibly configure the broadcast link to use either broadcast or replicast, in addition to the already existing auto selection algorithm. A sample usage is shown below: $tipc link set broadcast BROADCAST $tipc link set broadcast AUTOSELECT ratio 25 $tipc link set broadcast -h Usage: tipc link set broadcast PROPERTY PROPERTIES BROADCAST - Forces all multicast traffic to be transmitted via broadcast only, irrespective of cluster size and number of destinations REPLICAST - Forces all multicast traffic to be transmitted via replicast only, irrespective of cluster size and number of destinations AUTOSELECT- Auto switching to broadcast or replicast depending on cluster size and destination node number ratio SIZE- Set the AUTOSELECT criteria, percentage of destination nodes vs cluster size Acked-by: Jon Maloy Signed-off-by: Hoang Le --- include/uapi/linux/tipc_netlink.h | 2 + tipc/link.c | 96 ++- 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0ebe02ef1a86..efb958fd167d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -281,6 +281,8 @@ enum { TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */ + TIPC_NLA_PROP_BROADCAST,/* u32 */ + TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/tipc/link.c b/tipc/link.c index 43e26da3fa6b..e3b10bb7b3d4 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -28,6 +28,9 @@ #define PRIORITY_STR "priority" #define TOLERANCE_STR "tolerance" #define WINDOW_STR "window" +#define BROADCAST_STR "broadcast" + +static const char tipc_bclink_name[] = "broadcast-link"; static int link_list_cb(const struct nlmsghdr *nlh, void *data) { @@ -521,7 +524,8 @@ static void cmd_link_set_help(struct cmdl *cmdl) "PROPERTIES\n" " tolerance TOLERANCE - Set link tolerance\n" " priority PRIORITY - Set link priority\n" - " window WINDOW - Set link window\n", + " window WINDOW - Set link window\n" + " broadcast BROADCAST - Set link broadcast\n", cmdl->argv[0]); } @@ -585,6 +589,95 @@ static int cmd_link_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_doit(nlh, link_get_cb, ); } +static void cmd_link_set_bcast_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s link set broadcast PROPERTY\n\n" + "PROPERTIES\n" + " BROADCAST - Forces all multicast traffic to be\n" + " transmitted via broadcast only,\n" + " irrespective of cluster size and number\n" + " of destinations\n\n" + " REPLICAST - Forces all multicast traffic to be\n" + " transmitted via replicast only,\n" + " irrespective of cluster size and number\n" + " of destinations\n\n" + " AUTOSELECT- Auto switching to broadcast or replicast\n" + " depending on cluster size and destination\n" + " node number\n\n" + " ratio SIZE- Set the AUTOSELECT criteria, percentage of\n" + " destination nodes vs cluster size\n\n", + cmdl->argv[0]); +} + +static int cmd_link_set_bcast(struct nlmsghdr *nlh, const struct cmd *cmd, +struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlattr *props; + struct nlattr *attrs; + struct opt *opt; + struct opt opts[] = { + { "BROADCAST", OPT_KEY, NULL }, + { "REPLICAST", OPT_KEY, NULL }, + { "AUTOSELECT", OPT_KEY, NULL }, + { "ratio", OPT_KEYVAL, NULL }, + { NULL } + }; + int method = 0; + + if (help_flag) { + (cmd->help)(cmdl); + return -EINVAL; + } + +
[tipc-discussion] [iproute2-next v2 2/3] tipc: add link broadcast get
The command prints the actually method that multicast is running in the system. Also 'ratio' value for AUTOSELECT method. A sample usage is shown below: $tipc link get broadcast BROADCAST $tipc link get broadcast AUTOSELECT ratio:30% $tipc link get broadcast -j -p [ { "method": "AUTOSELECT" },{ "ratio": 30 } ] Acked-by: Jon Maloy Signed-off-by: Hoang Le --- tipc/link.c | 85 - 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/tipc/link.c b/tipc/link.c index e3b10bb7b3d4..e123c1863575 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -175,10 +175,92 @@ static void cmd_link_get_help(struct cmdl *cmdl) "PROPERTIES\n" " tolerance - Get link tolerance\n" " priority - Get link priority\n" - " window- Get link window\n", + " window- Get link window\n" + " broadcast - Get link broadcast\n", cmdl->argv[0]); } +static int cmd_link_get_bcast_cb(const struct nlmsghdr *nlh, void *data) +{ + int *prop = data; + int prop_ratio = TIPC_NLA_PROP_BROADCAST_RATIO; + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *info[TIPC_NLA_MAX + 1] = {}; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1] = {}; + struct nlattr *props[TIPC_NLA_PROP_MAX + 1] = {}; + int bc_mode; + + mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info); + if (!info[TIPC_NLA_LINK]) + return MNL_CB_ERROR; + + mnl_attr_parse_nested(info[TIPC_NLA_LINK], parse_attrs, attrs); + if (!attrs[TIPC_NLA_LINK_PROP]) + return MNL_CB_ERROR; + + mnl_attr_parse_nested(attrs[TIPC_NLA_LINK_PROP], parse_attrs, props); + if (!props[*prop]) + return MNL_CB_ERROR; + + bc_mode = mnl_attr_get_u32(props[*prop]); + + new_json_obj(json); + open_json_object(NULL); + switch (bc_mode) { + case 0x1: + print_string(PRINT_ANY, "method", "%s\n", "BROADCAST"); + break; + case 0x2: + print_string(PRINT_ANY, "method", "%s\n", "REPLICAST"); + break; + case 0x4: + print_string(PRINT_ANY, "method", "%s", "AUTOSELECT"); + close_json_object(); + open_json_object(NULL); + print_uint(PRINT_ANY, "ratio", " ratio:%u%\n", + mnl_attr_get_u32(props[prop_ratio])); + break; + default: + print_string(PRINT_ANY, NULL, "UNKNOWN\n", NULL); + break; + } + close_json_object(); + delete_json_obj(); + return MNL_CB_OK; +} + +static void cmd_link_get_bcast_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s link get PPROPERTY\n\n" + "PROPERTIES\n" + " broadcast - Get link broadcast\n", + cmdl->argv[0]); +} + +static int cmd_link_get_bcast(struct nlmsghdr *nlh, const struct cmd *cmd, +struct cmdl *cmdl, void *data) +{ + int prop = TIPC_NLA_PROP_BROADCAST; + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlattr *attrs; + + if (help_flag) { + (cmd->help)(cmdl); + return -EINVAL; + } + + nlh = msg_init(buf, TIPC_NL_LINK_GET); + if (!nlh) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + attrs = mnl_attr_nest_start(nlh, TIPC_NLA_LINK); + /* Direct to broadcast-link setting */ + mnl_attr_put_strz(nlh, TIPC_NLA_LINK_NAME, tipc_bclink_name); + mnl_attr_nest_end(nlh, attrs); + return msg_doit(nlh, cmd_link_get_bcast_cb, ); +} + static int cmd_link_get(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { @@ -186,6 +268,7 @@ static int cmd_link_get(struct nlmsghdr *nlh, const struct cmd *cmd, { PRIORITY_STR, cmd_link_get_prop, cmd_link_get_help }, { TOLERANCE_STR,cmd_link_get_prop, cmd_link_get_help }, { WINDOW_STR, cmd_link_get_prop, cmd_link_get_help }, + { BROADCAST_STR, cmd_link_get_bcast, cmd_link_get_bcast_help }, { NULL } }; -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next v2 3/3] tipc: add link broadcast man page
Add a man page describing tipc link broadcast command get and set Signed-off-by: Hoang Le --- man/man8/tipc-link.8 | 53 +++- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/man/man8/tipc-link.8 b/man/man8/tipc-link.8 index 01afa1c3ad9f..47dae25d3626 100644 --- a/man/man8/tipc-link.8 +++ b/man/man8/tipc-link.8 @@ -1,4 +1,4 @@ -.TH TIPC-LINK 8 "02 Jun 2015" "iproute2" "Linux" +.TH TIPC-LINK 8 "22 Mar 2019" "iproute2" "Linux" .\" For consistency, please keep padding right aligned. .\" For example '.B "foo " bar' and not '.B foo " bar"' @@ -14,18 +14,36 @@ tipc-link \- show links or modify link properties .ti -8 .B tipc link set -.RB "{ " "priority " +.br +.RB "[ " "{ " "priority " .IR PRIORITY .RB "| " tolerance .IR TOLERANCE .RB "| " window .IR "WINDOW " } -.BI "link " LINK +.BI "link " LINK " ]" +.RB "|" +.br +.RB "[ " +.RB "{ " broadcast " [ " +.IR BROADCAST +.RB " | " +.IR REPLICAST +.RB " | " +.IR AUTOSELECT +.RB "[ " ratio +.IR SIZE +.RB "] " ] " } " "]" .ti -8 .B tipc link get -.RB "{ " "priority" " | " tolerance " | " window " } " link -.I LINK +.br +.RB "[ " "{ " "priority" " | " tolerance " | " window " } " link +.IR LINK " ] " +.RB "|" +.br +.RB "[ " { " broadcast " } " ]" +.br .ti -8 .B tipc link statistics @@ -306,6 +324,31 @@ They are usually transient and occur during the cluster startup phase or network reconfiguration. Possible status are: U or D. The status U implies up and D down. +.SS Broadcast properties +.TP +.B BROADCAST +.br +Forces all multicast traffic to be transmitted via broadcast only, +irrespective of cluster size and number of destinations. + +.TP +.B REPLICAST +.br +Forces all multicast traffic to be transmitted via replicast only, +irrespective of cluster size and number of destinations. + +.TP +.B AUTOSELECT +.br +Auto switching to broadcast or replicast depending on cluster size and +destination node number. + +.TP +.B ratio SIZE +.br +Set the AUTOSELECT criteria, percentage of destination nodes vs cluster +size. + .SH EXAMPLES .PP tipc link monitor list -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next v1 2/2] tipc: fix a null pointer deref
In commit c55c8edafa91 ("tipc: smooth change between replicast and broadcast") we introduced new method to eliminate the risk of message reordering that happen in between different nodes. Unfortunately, we forgot checking at receiving side to ignore intra node. We fix this by checking and returning if arrived message from intra node. syzbot report: == kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: [#1] PREEMPT SMP KASAN CPU: 0 PID: 7820 Comm: syz-executor418 Not tainted 5.0.0+ #61 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:tipc_mcast_filter_msg+0x21b/0x13d0 net/tipc/bcast.c:782 Code: 45 c0 0f 84 39 06 00 00 48 89 5d 98 e8 ce ab a5 fa 49 8d bc 24 c8 00 00 00 48 b9 00 00 00 00 00 fc ff df 48 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85 9a 0e 00 00 49 8b 9c 24 c8 00 00 00 48 be 00 00 RSP: 0018:8880959defc8 EFLAGS: 00010202 RAX: 0019 RBX: 888081258a48 RCX: dc00 RDX: RSI: 86cab862 RDI: 00c8 RBP: 8880959df030 R08: 8880813d0200 R09: ed1015d05bc8 R10: ed1015d05bc7 R11: 8880ae82de3b R12: R13: 002c R14: R15: 888081258a48 FS: 0106a880() GS:8880ae80() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 20001cc0 CR3: 94a2 CR4: 001406f0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: tipc_sk_filter_rcv+0x182d/0x34f0 net/tipc/socket.c:2168 tipc_sk_enqueue net/tipc/socket.c:2254 [inline] tipc_sk_rcv+0xc45/0x25a0 net/tipc/socket.c:2305 tipc_sk_mcast_rcv+0x724/0x1020 net/tipc/socket.c:1209 tipc_mcast_xmit+0x7fe/0x1200 net/tipc/bcast.c:410 tipc_sendmcast+0xb36/0xfc0 net/tipc/socket.c:820 __tipc_sendmsg+0x10df/0x18d0 net/tipc/socket.c:1358 tipc_sendmsg+0x53/0x80 net/tipc/socket.c:1291 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:661 ___sys_sendmsg+0x806/0x930 net/socket.c:2260 __sys_sendmsg+0x105/0x1d0 net/socket.c:2298 __do_sys_sendmsg net/socket.c:2307 [inline] __se_sys_sendmsg net/socket.c:2305 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2305 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4401c9 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:7ffd887fa9d8 EFLAGS: 0246 ORIG_RAX: 002e RAX: ffda RBX: 004002c8 RCX: 004401c9 RDX: RSI: 20002140 RDI: 0003 RBP: 006ca018 R08: R09: 004002c8 R10: R11: 0246 R12: 00401a50 R13: 00401ae0 R14: R15: Modules linked in: ---[ end trace ba79875754e1708f ]--- Reported-by: syzbot+be4bdf2cc3e85e952...@syzkaller.appspotmail.com Fixes: c55c8eda ("tipc: smooth change between replicast and broadcast") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bcast.c | 5 - net/tipc/bcast.h | 2 +- net/tipc/socket.c | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 5264a8ff6e01..88edfb358ae7 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -760,7 +760,7 @@ u32 tipc_bcast_get_broadcast_ratio(struct net *net) return bb->rc_ratio; } -void tipc_mcast_filter_msg(struct sk_buff_head *defq, +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq) { struct sk_buff *skb, *_skb, *tmp; @@ -775,6 +775,9 @@ void tipc_mcast_filter_msg(struct sk_buff_head *defq, return; node = msg_orignode(hdr); + if (node == tipc_own_addr(net)) + return; + port = msg_origport(hdr); /* Has the twin SYN message already arrived ? */ diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 484bde289d3a..dadad953e2be 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -101,7 +101,7 @@ int tipc_bclink_reset_stats(struct net *net); u32 tipc_bcast_get_broadcast_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); -void tipc_mcast_filter_msg(struct sk_buff_head *defq, +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq); static inline void tipc_bcast_lock(struct net *net) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a7b3e1a070e4..8ac8ddf1e324 100644 --- a/net/tipc/socket.c
[tipc-discussion] [net-next v1 1/2] tipc: fix use-after-free in tipc_sk_filter_rcv
skb free-ed in: 1/ condition 1: tipc_sk_filter_rcv -> tipc_sk_proto_rcv 2/ condition 2: tipc_sk_filter_rcv -> tipc_group_filter_msg This leads to a "use-after-free" access in the next condition. We fix this by intializing the variable at declaration, then it is safe to check this variable to continue processing if condition matches. syzbot report: == BUG: KASAN: use-after-free in tipc_sk_filter_rcv+0x2166/0x34f0 net/tipc/socket.c:2167 Read of size 4 at addr 88808ea58534 by task kworker/u4:0/7 CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.0.0+ #61 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: tipc_send tipc_conn_send_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131 tipc_sk_filter_rcv+0x2166/0x34f0 net/tipc/socket.c:2167 tipc_sk_enqueue net/tipc/socket.c:2254 [inline] tipc_sk_rcv+0xc45/0x25a0 net/tipc/socket.c:2305 tipc_topsrv_kern_evt+0x3b7/0x580 net/tipc/topsrv.c:610 tipc_conn_send_to_sock+0x43e/0x5f0 net/tipc/topsrv.c:283 tipc_conn_send_work+0x65/0x80 net/tipc/topsrv.c:303 process_one_work+0x98e/0x1790 kernel/workqueue.c:2269 worker_thread+0x98/0xe40 kernel/workqueue.c:2415 kthread+0x357/0x430 kernel/kthread.c:253 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Reported-by: syzbot+e863893591cc7a622...@syzkaller.appspotmail.com Fixes: c55c8eda ("tipc: smooth change between replicast and broadcast") Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 922b75ff56d3..a7b3e1a070e4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2151,6 +2151,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct net *net = sock_net(sk); struct sk_buff_head inputq; + int mtyp = msg_type(hdr); int limit, err = TIPC_OK; trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); @@ -2164,7 +2165,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(grp)) tipc_group_filter_msg(grp, , xmitq); - if (msg_type(hdr) == TIPC_MCAST_MSG) + if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) tipc_mcast_filter_msg(>mc_method.deferredq, ); /* Validate and add to receive buffer if there is space */ -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH 2/2] tipc: fix a null pointer deref
In commit c55c8edafa91 ("tipc: smooth change between replicast and broadcast") we introduced new method to eliminate the risk of message reordering that happen in between different nodes. Unfortunately, we forgot checking at receiving side to ignore intra node. We fix this by checking and returning if arrived message from intra node. syzbot report: == kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: [#1] PREEMPT SMP KASAN CPU: 0 PID: 7820 Comm: syz-executor418 Not tainted 5.0.0+ #61 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:tipc_mcast_filter_msg+0x21b/0x13d0 net/tipc/bcast.c:782 Code: 45 c0 0f 84 39 06 00 00 48 89 5d 98 e8 ce ab a5 fa 49 8d bc 24 c8 00 00 00 48 b9 00 00 00 00 00 fc ff df 48 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85 9a 0e 00 00 49 8b 9c 24 c8 00 00 00 48 be 00 00 RSP: 0018:8880959defc8 EFLAGS: 00010202 RAX: 0019 RBX: 888081258a48 RCX: dc00 RDX: RSI: 86cab862 RDI: 00c8 RBP: 8880959df030 R08: 8880813d0200 R09: ed1015d05bc8 R10: ed1015d05bc7 R11: 8880ae82de3b R12: R13: 002c R14: R15: 888081258a48 FS: 0106a880() GS:8880ae80() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 20001cc0 CR3: 94a2 CR4: 001406f0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: tipc_sk_filter_rcv+0x182d/0x34f0 net/tipc/socket.c:2168 tipc_sk_enqueue net/tipc/socket.c:2254 [inline] tipc_sk_rcv+0xc45/0x25a0 net/tipc/socket.c:2305 tipc_sk_mcast_rcv+0x724/0x1020 net/tipc/socket.c:1209 tipc_mcast_xmit+0x7fe/0x1200 net/tipc/bcast.c:410 tipc_sendmcast+0xb36/0xfc0 net/tipc/socket.c:820 __tipc_sendmsg+0x10df/0x18d0 net/tipc/socket.c:1358 tipc_sendmsg+0x53/0x80 net/tipc/socket.c:1291 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:661 ___sys_sendmsg+0x806/0x930 net/socket.c:2260 __sys_sendmsg+0x105/0x1d0 net/socket.c:2298 __do_sys_sendmsg net/socket.c:2307 [inline] __se_sys_sendmsg net/socket.c:2305 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2305 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4401c9 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:7ffd887fa9d8 EFLAGS: 0246 ORIG_RAX: 002e RAX: ffda RBX: 004002c8 RCX: 004401c9 RDX: RSI: 20002140 RDI: 0003 RBP: 006ca018 R08: R09: 004002c8 R10: R11: 0246 R12: 00401a50 R13: 00401ae0 R14: R15: Modules linked in: ---[ end trace ba79875754e1708f ]--- Reported-by: syzbot+be4bdf2cc3e85e952...@syzkaller.appspotmail.com Fixes: c55c8eda ("tipc: smooth change between replicast and broadcast") Signed-off-by: Hoang Le --- net/tipc/bcast.c | 6 +- net/tipc/bcast.h | 2 +- net/tipc/socket.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 5264a8ff6e01..b3e6b4892425 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -760,10 +760,11 @@ u32 tipc_bcast_get_broadcast_ratio(struct net *net) return bb->rc_ratio; } -void tipc_mcast_filter_msg(struct sk_buff_head *defq, +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq) { struct sk_buff *skb, *_skb, *tmp; + u32 self = tipc_own_addr(net); struct tipc_msg *hdr, *_hdr; bool match = false; u32 node, port; @@ -775,6 +776,9 @@ void tipc_mcast_filter_msg(struct sk_buff_head *defq, return; node = msg_orignode(hdr); + if (node == self) + return; + port = msg_origport(hdr); /* Has the twin SYN message already arrived ? */ diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 484bde289d3a..dadad953e2be 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -101,7 +101,7 @@ int tipc_bclink_reset_stats(struct net *net); u32 tipc_bcast_get_broadcast_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); -void tipc_mcast_filter_msg(struct sk_buff_head *defq, +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq); static inline void tipc_bcast_lock(struct net *net) diff --git a/net/tipc
[tipc-discussion] [PATCH 1/2] tipc: fix use-after-free tipc_sk_filter_rcv
skb free-ed in: 1/ condition 1: tipc_sk_filter_rcv -> tipc_sk_proto_rcv 2/ condition 2: tipc_sk_filter_rcv -> tipc_group_filter_msg This leads to a "use-after-free" access in the next condition. We fix this by intializing the variable at declaration, then it is safe to check this variable to continue processing if condition matches. syzbot report: == BUG: KASAN: use-after-free in tipc_sk_filter_rcv+0x2166/0x34f0 net/tipc/socket.c:2167 Read of size 4 at addr 88808ea58534 by task kworker/u4:0/7 CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.0.0+ #61 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: tipc_send tipc_conn_send_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131 tipc_sk_filter_rcv+0x2166/0x34f0 net/tipc/socket.c:2167 tipc_sk_enqueue net/tipc/socket.c:2254 [inline] tipc_sk_rcv+0xc45/0x25a0 net/tipc/socket.c:2305 tipc_topsrv_kern_evt+0x3b7/0x580 net/tipc/topsrv.c:610 tipc_conn_send_to_sock+0x43e/0x5f0 net/tipc/topsrv.c:283 tipc_conn_send_work+0x65/0x80 net/tipc/topsrv.c:303 process_one_work+0x98e/0x1790 kernel/workqueue.c:2269 worker_thread+0x98/0xe40 kernel/workqueue.c:2415 kthread+0x357/0x430 kernel/kthread.c:253 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Reported-by: syzbot+e863893591cc7a622...@syzkaller.appspotmail.com Fixes: c55c8eda ("tipc: smooth change between replicast and broadcast") Signed-off-by: Hoang Le --- net/tipc/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 922b75ff56d3..a7b3e1a070e4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2151,6 +2151,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct net *net = sock_net(sk); struct sk_buff_head inputq; + int mtyp = msg_type(hdr); int limit, err = TIPC_OK; trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); @@ -2164,7 +2165,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(grp)) tipc_group_filter_msg(grp, , xmitq); - if (msg_type(hdr) == TIPC_MCAST_MSG) + if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) tipc_mcast_filter_msg(>mc_method.deferredq, ); /* Validate and add to receive buffer if there is space */ -- 2.1.4 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next v3 1/3] tipc: support broadcast/replicast configurable for bc-link
Currently, a multicast stream uses either broadcast or replicast as transmission method, based on the ratio between number of actual destinations nodes and cluster size. However, when an L2 interface (e.g., VXLAN) provides pseudo broadcast support, this becomes very inefficient, as it blindly replicates multicast packets to all cluster/subnet nodes, irrespective of whether they host actual target sockets or not. The TIPC multicast algorithm is able to distinguish real destination nodes from other nodes, and hence provides a smarter and more efficient method for transferring multicast messages than pseudo broadcast can do. Because of this, we now make it possible for users to force the broadcast link to permanently switch to using replicast, irrespective of which capabilities the bearer provides, or pretend to provide. Conversely, we also make it possible to force the broadcast link to always use true broadcast. While maybe less useful in deployed systems, this may at least be useful for testing the broadcast algorithm in small clusters. We retain the current AUTOSELECT ability, i.e., to let the broadcast link automatically select which algorithm to use, and to switch back and forth between broadcast and replicast as the ratio between destination node number and cluster size changes. This remains the default method. Furthermore, we make it possible to configure the threshold ratio for such switches. The default ratio is now set to 10%, down from 25% in the earlier implementation. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- include/uapi/linux/tipc_netlink.h | 2 + net/tipc/bcast.c | 104 -- net/tipc/bcast.h | 7 ++ net/tipc/link.c | 8 +++ net/tipc/netlink.c| 4 +- 5 files changed, 120 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0ebe02ef1a86..efb958fd167d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -281,6 +281,8 @@ enum { TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */ + TIPC_NLA_PROP_BROADCAST,/* u32 */ + TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d8026543bf4c..12b59268bdd6 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link"; * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast + * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ @@ -64,7 +66,9 @@ struct tipc_bc_base { int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; + bool force_bcast; bool rcast_support; + bool force_rcast; int rc_ratio; int bc_threshold; }; @@ -485,10 +489,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) return 0; } +static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + switch (bc_mode) { + case BCLINK_MODE_BCAST: + if (!bb->bcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = true; + bb->force_rcast = false; + break; + case BCLINK_MODE_RCAST: + if (!bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = true; + break; + case BCLINK_MODE_SEL: + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + if (bc_ratio > 100 || bc_ratio <= 0) + return -EINVAL; + + bb->rc_ratio = bc_ratio; + tipc_bcast_lock(net); + tipc_bcbase_calc_bc_threshold(net); +
[tipc-discussion] [net-next v3 3/3] tipc: smooth change between replicast and broadcast
Currently, a multicast stream may start out using replicast, because there are few destinations, and then it should ideally switch to L2/broadcast IGMP/multicast when the number of destinations grows beyond a certain limit. The opposite should happen when the number decreases below the limit. To eliminate the risk of message reordering caused by method change, a sending socket must stick to a previously selected method until it enters an idle period of 5 seconds. Means there is a 5 seconds pause in the traffic from the sender socket. If the sender never makes such a pause, the method will never change, and transmission may become very inefficient as the cluster grows. With this commit, we allow such a switch between replicast and broadcast without any need for a traffic pause. Solution is to send a dummy message with only the header, also with the SYN bit set, via broadcast or replicast. For the data message, the SYN bit is set and sending via replicast or broadcast (inverse method with dummy). Then, at receiving side any messages follow first SYN bit message (data or dummy message), they will be held in deferred queue until another pair (dummy or data message) arrived in other link. v2: reverse christmas tree declaration Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bcast.c | 165 +- net/tipc/bcast.h | 5 ++ net/tipc/msg.h| 10 +++ net/tipc/socket.c | 5 ++ 4 files changed, 184 insertions(+), 1 deletion(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 12b59268bdd6..5264a8ff6e01 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -220,9 +220,24 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests, } /* Can current method be changed ? */ method->expires = jiffies + TIPC_METHOD_EXPIRE; - if (method->mandatory || time_before(jiffies, exp)) + if (method->mandatory) return; + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && + time_before(jiffies, exp)) + return; + + /* Configuration as force 'broadcast' method */ + if (bb->force_bcast) { + method->rcast = false; + return; + } + /* Configuration as force 'replicast' method */ + if (bb->force_rcast) { + method->rcast = true; + return; + } + /* Configuration as 'autoselect' or default method */ /* Determine method to use now */ method->rcast = dests <= bb->bc_threshold; } @@ -285,6 +300,63 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, return 0; } +/* tipc_mcast_send_sync - deliver a dummy message with SYN bit + * @net: the applicable net namespace + * @skb: socket buffer to copy + * @method: send method to be used + * @dests: destination nodes for message. + * @cong_link_cnt: returns number of encountered congested destination links + * Returns 0 if success, otherwise errno + */ +static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, + struct tipc_mc_method *method, + struct tipc_nlist *dests, + u16 *cong_link_cnt) +{ + struct tipc_msg *hdr, *_hdr; + struct sk_buff_head tmpq; + struct sk_buff *_skb; + + /* Is a cluster supporting with new capabilities ? */ + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) + return 0; + + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_get_wrapped(hdr); + if (msg_type(hdr) != TIPC_MCAST_MSG) + return 0; + + /* Allocate dummy message */ + _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* Preparing for 'synching' header */ + msg_set_syn(hdr, 1); + + /* Copy skb's header into a dummy header */ + skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); + skb_orphan(_skb); + + /* Reverse method for dummy message */ + _hdr = buf_msg(_skb); + msg_set_size(_hdr, MCAST_H_SIZE); + msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); + + skb_queue_head_init(); + __skb_queue_tail(, _skb); + if (method->rcast) + tipc_bcast_xmit(net, , cong_link_cnt); + else + tipc_rcast_xmit(net, , dests, cong_link_cnt); + + /* This queue should normally be empty by now */ + __skb_queue_purge(); + + return 0; +} + /* tipc_mcast_xmit - deliver message to indicated destination nodes * and to identified node local sockets * @net: the applicable net namespace @@ -300,6 +372,9 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt) { struct sk_buff_head inputq, localq; + b
[tipc-discussion] [net-next v3 2/3] tipc: introduce new capability flag for cluster
As a preparation for introducing a smooth switching between replicast and broadcast method for multicast message, We have to introduce a new capability flag TIPC_MCAST_RBCTL to handle this new feature. During a cluster upgrade a node can come back with this new capabilities which also must be reflected in the cluster capabilities field. The new feature is only applicable if all node in the cluster supports this new capability. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/core.c | 2 ++ net/tipc/core.h | 3 +++ net/tipc/node.c | 18 ++ net/tipc/node.h | 6 -- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 5b38f5164281..27cccd101ef6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -43,6 +43,7 @@ #include "net.h" #include "socket.h" #include "bcast.h" +#include "node.h" #include @@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->node_addr = 0; tn->trial_addr = 0; tn->addr_trial_end = 0; + tn->capabilities = TIPC_NODE_CAPABILITIES; memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; diff --git a/net/tipc/core.h b/net/tipc/core.h index 8020a6c360ff..7a68e1b6a066 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -122,6 +122,9 @@ struct tipc_net { /* Topology subscription server */ struct tipc_topsrv *topsrv; atomic_t subscription_count; + + /* Cluster capabilities */ + u16 capabilities; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/node.c b/net/tipc/node.c index 2dc4919ab23c..2717893e9dbe 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -383,6 +383,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_link_update_caps(l, capabilities); } write_unlock_bh(>lock); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -433,6 +438,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, break; } list_add_tail_rcu(>list, _node->list); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(>node_list_lock); @@ -589,6 +599,7 @@ static void tipc_node_clear_links(struct tipc_node *node) */ static bool tipc_node_cleanup(struct tipc_node *peer) { + struct tipc_node *temp_node; struct tipc_net *tn = tipc_net(peer->net); bool deleted = false; @@ -604,6 +615,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer) deleted = true; } tipc_node_write_unlock(peer); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + spin_unlock_bh(>node_list_lock); return deleted; } diff --git a/net/tipc/node.h b/net/tipc/node.h index 4f59a30e989a..2404225c5d58 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,7 +51,8 @@ enum { TIPC_BLOCK_FLOWCTL= (1 << 3), TIPC_BCAST_RCAST = (1 << 4), TIPC_NODE_ID128 = (1 << 5), - TIPC_LINK_PROTO_SEQNO = (1 << 6) + TIPC_LINK_PROTO_SEQNO = (1 << 6), + TIPC_MCAST_RBCTL = (1 << 7) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -60,7 +61,8 @@ enum { TIPC_BCAST_RCAST | \ TIPC_BLOCK_FLOWCTL | \ TIPC_NODE_ID128| \ - TIPC_LINK_PROTO_SEQNO) + TIPC_LINK_PROTO_SEQNO | \ + TIPC_MCAST_RBCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next 2/2] tipc: add link broadcast get
The command prints the actually method that multicast is running in the system. Also 'ratio' value for AUTOSELECT method. A sample usage is shown below: $tipc link get broadcast BROADCAST $tipc link get broadcast AUTOSELECT ratio:30% $tipc link get broadcast -j -p [ { "method": "AUTOSELECT" },{ "ratio": 30 } ] Acked-by: Jon Maloy Signed-off-by: Hoang Le --- tipc/link.c | 85 - 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/tipc/link.c b/tipc/link.c index e3b10bb7b3d4..e123c1863575 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -175,10 +175,92 @@ static void cmd_link_get_help(struct cmdl *cmdl) "PROPERTIES\n" " tolerance - Get link tolerance\n" " priority - Get link priority\n" - " window- Get link window\n", + " window- Get link window\n" + " broadcast - Get link broadcast\n", cmdl->argv[0]); } +static int cmd_link_get_bcast_cb(const struct nlmsghdr *nlh, void *data) +{ + int *prop = data; + int prop_ratio = TIPC_NLA_PROP_BROADCAST_RATIO; + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *info[TIPC_NLA_MAX + 1] = {}; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1] = {}; + struct nlattr *props[TIPC_NLA_PROP_MAX + 1] = {}; + int bc_mode; + + mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info); + if (!info[TIPC_NLA_LINK]) + return MNL_CB_ERROR; + + mnl_attr_parse_nested(info[TIPC_NLA_LINK], parse_attrs, attrs); + if (!attrs[TIPC_NLA_LINK_PROP]) + return MNL_CB_ERROR; + + mnl_attr_parse_nested(attrs[TIPC_NLA_LINK_PROP], parse_attrs, props); + if (!props[*prop]) + return MNL_CB_ERROR; + + bc_mode = mnl_attr_get_u32(props[*prop]); + + new_json_obj(json); + open_json_object(NULL); + switch (bc_mode) { + case 0x1: + print_string(PRINT_ANY, "method", "%s\n", "BROADCAST"); + break; + case 0x2: + print_string(PRINT_ANY, "method", "%s\n", "REPLICAST"); + break; + case 0x4: + print_string(PRINT_ANY, "method", "%s", "AUTOSELECT"); + close_json_object(); + open_json_object(NULL); + print_uint(PRINT_ANY, "ratio", " ratio:%u%\n", + mnl_attr_get_u32(props[prop_ratio])); + break; + default: + print_string(PRINT_ANY, NULL, "UNKNOWN\n", NULL); + break; + } + close_json_object(); + delete_json_obj(); + return MNL_CB_OK; +} + +static void cmd_link_get_bcast_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s link get PPROPERTY\n\n" + "PROPERTIES\n" + " broadcast - Get link broadcast\n", + cmdl->argv[0]); +} + +static int cmd_link_get_bcast(struct nlmsghdr *nlh, const struct cmd *cmd, +struct cmdl *cmdl, void *data) +{ + int prop = TIPC_NLA_PROP_BROADCAST; + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlattr *attrs; + + if (help_flag) { + (cmd->help)(cmdl); + return -EINVAL; + } + + nlh = msg_init(buf, TIPC_NL_LINK_GET); + if (!nlh) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + attrs = mnl_attr_nest_start(nlh, TIPC_NLA_LINK); + /* Direct to broadcast-link setting */ + mnl_attr_put_strz(nlh, TIPC_NLA_LINK_NAME, tipc_bclink_name); + mnl_attr_nest_end(nlh, attrs); + return msg_doit(nlh, cmd_link_get_bcast_cb, ); +} + static int cmd_link_get(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { @@ -186,6 +268,7 @@ static int cmd_link_get(struct nlmsghdr *nlh, const struct cmd *cmd, { PRIORITY_STR, cmd_link_get_prop, cmd_link_get_help }, { TOLERANCE_STR,cmd_link_get_prop, cmd_link_get_help }, { WINDOW_STR, cmd_link_get_prop, cmd_link_get_help }, + { BROADCAST_STR, cmd_link_get_bcast, cmd_link_get_bcast_help }, { NULL } }; -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [iproute2-next 1/2] tipc: add link broadcast set method and ratio
The command added here makes it possible to forcibly configure the broadcast link to use either broadcast or replicast, in addition to the already existing auto selection algorithm. A sample usage is shown below: $tipc link set broadcast BROADCAST $tipc link set broadcast AUTOSELECT ratio 25 $tipc link set broadcast -h Usage: tipc link set broadcast PROPERTY PROPERTIES BROADCAST - Forces all multicast traffic to be transmitted via broadcast only, irrespective of cluster size and number of destinations REPLICAST - Forces all multicast traffic to be transmitted via replicast only, irrespective of cluster size and number of destinations AUTOSELECT- Auto switching to broadcast or replicast depending on cluster size and destination node number ratio SIZE- Set the AUTOSELECT criteria, percentage of destination nodes vs cluster size Acked-by: Jon Maloy Signed-off-by: Hoang Le --- include/uapi/linux/tipc_netlink.h | 2 + tipc/link.c | 96 ++- 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0ebe02ef1a86..efb958fd167d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -281,6 +281,8 @@ enum { TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */ + TIPC_NLA_PROP_BROADCAST,/* u32 */ + TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/tipc/link.c b/tipc/link.c index 43e26da3fa6b..e3b10bb7b3d4 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -28,6 +28,9 @@ #define PRIORITY_STR "priority" #define TOLERANCE_STR "tolerance" #define WINDOW_STR "window" +#define BROADCAST_STR "broadcast" + +static const char tipc_bclink_name[] = "broadcast-link"; static int link_list_cb(const struct nlmsghdr *nlh, void *data) { @@ -521,7 +524,8 @@ static void cmd_link_set_help(struct cmdl *cmdl) "PROPERTIES\n" " tolerance TOLERANCE - Set link tolerance\n" " priority PRIORITY - Set link priority\n" - " window WINDOW - Set link window\n", + " window WINDOW - Set link window\n" + " broadcast BROADCAST - Set link broadcast\n", cmdl->argv[0]); } @@ -585,6 +589,95 @@ static int cmd_link_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_doit(nlh, link_get_cb, ); } +static void cmd_link_set_bcast_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s link set broadcast PROPERTY\n\n" + "PROPERTIES\n" + " BROADCAST - Forces all multicast traffic to be\n" + " transmitted via broadcast only,\n" + " irrespective of cluster size and number\n" + " of destinations\n\n" + " REPLICAST - Forces all multicast traffic to be\n" + " transmitted via replicast only,\n" + " irrespective of cluster size and number\n" + " of destinations\n\n" + " AUTOSELECT- Auto switching to broadcast or replicast\n" + " depending on cluster size and destination\n" + " node number\n\n" + " ratio SIZE- Set the AUTOSELECT criteria, percentage of\n" + " destination nodes vs cluster size\n\n", + cmdl->argv[0]); +} + +static int cmd_link_set_bcast(struct nlmsghdr *nlh, const struct cmd *cmd, +struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlattr *props; + struct nlattr *attrs; + struct opt *opt; + struct opt opts[] = { + { "BROADCAST", OPT_KEY, NULL }, + { "REPLICAST", OPT_KEY, NULL }, + { "AUTOSELECT", OPT_KEY, NULL }, + { "ratio", OPT_KEYVAL, NULL }, + { NULL } + }; + int method = 0; + + if (help_flag) { + (cmd->help)(cmdl); + return -EINVAL; + } + +
[tipc-discussion] [net-next v3 1/3] tipc: support broadcast/replicast configurable for bc-link
Currently, a multicast stream uses either broadcast or replicast as transmission method, based on the ratio between number of actual destinations nodes and cluster size. However, when an L2 interface (e.g., VXLAN) provides pseudo broadcast support, this becomes very inefficient, as it blindly replicates multicast packets to all cluster/subnet nodes, irrespective of whether they host actual target sockets or not. The TIPC multicast algorithm is able to distinguish real destination nodes from other nodes, and hence provides a smarter and more efficient method for transferring multicast messages than pseudo broadcast can do. Because of this, we now make it possible for users to force the broadcast link to permanently switch to using replicast, irrespective of which capabilities the bearer provides, or pretend to provide. Conversely, we also make it possible to force the broadcast link to always use true broadcast. While maybe less useful in deployed systems, this may at least be useful for testing the broadcast algorithm in small clusters. We retain the current AUTOSELECT ability, i.e., to let the broadcast link automatically select which algorithm to use, and to switch back and forth between broadcast and replicast as the ratio between destination node number and cluster size changes. This remains the default method. Furthermore, we make it possible to configure the threshold ratio for such switches. The default ratio is now set to 10%, down from 25% in the earlier implementation. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- include/uapi/linux/tipc_netlink.h | 2 + net/tipc/bcast.c | 104 -- net/tipc/bcast.h | 7 ++ net/tipc/link.c | 8 +++ net/tipc/netlink.c| 4 +- 5 files changed, 120 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0ebe02ef1a86..efb958fd167d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -281,6 +281,8 @@ enum { TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */ + TIPC_NLA_PROP_BROADCAST,/* u32 */ + TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d8026543bf4c..12b59268bdd6 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link"; * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast + * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ @@ -64,7 +66,9 @@ struct tipc_bc_base { int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; + bool force_bcast; bool rcast_support; + bool force_rcast; int rc_ratio; int bc_threshold; }; @@ -485,10 +489,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) return 0; } +static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + switch (bc_mode) { + case BCLINK_MODE_BCAST: + if (!bb->bcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = true; + bb->force_rcast = false; + break; + case BCLINK_MODE_RCAST: + if (!bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = true; + break; + case BCLINK_MODE_SEL: + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + if (bc_ratio > 100 || bc_ratio <= 0) + return -EINVAL; + + bb->rc_ratio = bc_ratio; + tipc_bcast_lock(net); + tipc_bcbase_calc_bc_threshold(net); +
[tipc-discussion] [net-next v3 3/3] tipc: smooth change between replicast and broadcast
Currently, a multicast stream may start out using replicast, because there are few destinations, and then it should ideally switch to L2/broadcast IGMP/multicast when the number of destinations grows beyond a certain limit. The opposite should happen when the number decreases below the limit. To eliminate the risk of message reordering caused by method change, a sending socket must stick to a previously selected method until it enters an idle period of 5 seconds. Means there is a 5 seconds pause in the traffic from the sender socket. If the sender never makes such a pause, the method will never change, and transmission may become very inefficient as the cluster grows. With this commit, we allow such a switch between replicast and broadcast without any need for a traffic pause. Solution is to send a dummy message with only the header, also with the SYN bit set, via broadcast or replicast. For the data message, the SYN bit is set and sending via replicast or broadcast (inverse method with dummy). Then, at receiving side any messages follow first SYN bit message (data or dummy message), they will be held in deferred queue until another pair (dummy or data message) arrived in other link. v2: reverse christmas tree declaration Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/bcast.c | 165 +- net/tipc/bcast.h | 5 ++ net/tipc/msg.h| 10 +++ net/tipc/socket.c | 5 ++ 4 files changed, 184 insertions(+), 1 deletion(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 12b59268bdd6..5264a8ff6e01 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -220,9 +220,24 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests, } /* Can current method be changed ? */ method->expires = jiffies + TIPC_METHOD_EXPIRE; - if (method->mandatory || time_before(jiffies, exp)) + if (method->mandatory) return; + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && + time_before(jiffies, exp)) + return; + + /* Configuration as force 'broadcast' method */ + if (bb->force_bcast) { + method->rcast = false; + return; + } + /* Configuration as force 'replicast' method */ + if (bb->force_rcast) { + method->rcast = true; + return; + } + /* Configuration as 'autoselect' or default method */ /* Determine method to use now */ method->rcast = dests <= bb->bc_threshold; } @@ -285,6 +300,63 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, return 0; } +/* tipc_mcast_send_sync - deliver a dummy message with SYN bit + * @net: the applicable net namespace + * @skb: socket buffer to copy + * @method: send method to be used + * @dests: destination nodes for message. + * @cong_link_cnt: returns number of encountered congested destination links + * Returns 0 if success, otherwise errno + */ +static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, + struct tipc_mc_method *method, + struct tipc_nlist *dests, + u16 *cong_link_cnt) +{ + struct tipc_msg *hdr, *_hdr; + struct sk_buff_head tmpq; + struct sk_buff *_skb; + + /* Is a cluster supporting with new capabilities ? */ + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) + return 0; + + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_get_wrapped(hdr); + if (msg_type(hdr) != TIPC_MCAST_MSG) + return 0; + + /* Allocate dummy message */ + _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* Preparing for 'synching' header */ + msg_set_syn(hdr, 1); + + /* Copy skb's header into a dummy header */ + skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); + skb_orphan(_skb); + + /* Reverse method for dummy message */ + _hdr = buf_msg(_skb); + msg_set_size(_hdr, MCAST_H_SIZE); + msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); + + skb_queue_head_init(); + __skb_queue_tail(, _skb); + if (method->rcast) + tipc_bcast_xmit(net, , cong_link_cnt); + else + tipc_rcast_xmit(net, , dests, cong_link_cnt); + + /* This queue should normally be empty by now */ + __skb_queue_purge(); + + return 0; +} + /* tipc_mcast_xmit - deliver message to indicated destination nodes * and to identified node local sockets * @net: the applicable net namespace @@ -300,6 +372,9 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt) { struct sk_buff_head inputq, localq; + b
[tipc-discussion] [net-next v3 0/3] smooth change between replicast and broadcast
v3: update commit message Hoang Le (3): tipc: support broadcast/replicast configurable for bc-link tipc: introduce new capability flag for cluster tipc: smooth change between replicast and broadcast include/uapi/linux/tipc_netlink.h | 2 + net/tipc/bcast.c | 269 +- net/tipc/bcast.h | 12 ++ net/tipc/core.c | 2 + net/tipc/core.h | 3 + net/tipc/link.c | 8 + net/tipc/msg.h| 10 ++ net/tipc/netlink.c| 4 +- net/tipc/node.c | 18 ++ net/tipc/node.h | 6 +- net/tipc/socket.c | 5 + 11 files changed, 331 insertions(+), 8 deletions(-) -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next v3 2/3] tipc: introduce new capability flag for cluster
As a preparation for introducing a smooth switching between replicast and broadcast method for multicast message, We have to introduce a new capability flag TIPC_MCAST_RBCTL to handle this new feature. During a cluster upgrade a node can come back with this new capabilities which also must be reflected in the cluster capabilities field. The new feature is only applicable if all node in the cluster supports this new capability. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/core.c | 2 ++ net/tipc/core.h | 3 +++ net/tipc/node.c | 18 ++ net/tipc/node.h | 6 -- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 5b38f5164281..27cccd101ef6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -43,6 +43,7 @@ #include "net.h" #include "socket.h" #include "bcast.h" +#include "node.h" #include @@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->node_addr = 0; tn->trial_addr = 0; tn->addr_trial_end = 0; + tn->capabilities = TIPC_NODE_CAPABILITIES; memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; diff --git a/net/tipc/core.h b/net/tipc/core.h index 8020a6c360ff..7a68e1b6a066 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -122,6 +122,9 @@ struct tipc_net { /* Topology subscription server */ struct tipc_topsrv *topsrv; atomic_t subscription_count; + + /* Cluster capabilities */ + u16 capabilities; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/node.c b/net/tipc/node.c index 2dc4919ab23c..2717893e9dbe 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -383,6 +383,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_link_update_caps(l, capabilities); } write_unlock_bh(>lock); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -433,6 +438,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, break; } list_add_tail_rcu(>list, _node->list); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(>node_list_lock); @@ -589,6 +599,7 @@ static void tipc_node_clear_links(struct tipc_node *node) */ static bool tipc_node_cleanup(struct tipc_node *peer) { + struct tipc_node *temp_node; struct tipc_net *tn = tipc_net(peer->net); bool deleted = false; @@ -604,6 +615,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer) deleted = true; } tipc_node_write_unlock(peer); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + spin_unlock_bh(>node_list_lock); return deleted; } diff --git a/net/tipc/node.h b/net/tipc/node.h index 4f59a30e989a..2404225c5d58 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,7 +51,8 @@ enum { TIPC_BLOCK_FLOWCTL= (1 << 3), TIPC_BCAST_RCAST = (1 << 4), TIPC_NODE_ID128 = (1 << 5), - TIPC_LINK_PROTO_SEQNO = (1 << 6) + TIPC_LINK_PROTO_SEQNO = (1 << 6), + TIPC_MCAST_RBCTL = (1 << 7) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -60,7 +61,8 @@ enum { TIPC_BCAST_RCAST | \ TIPC_BLOCK_FLOWCTL | \ TIPC_NODE_ID128| \ - TIPC_LINK_PROTO_SEQNO) + TIPC_LINK_PROTO_SEQNO | \ + TIPC_MCAST_RBCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next v2 2/3] tipc: introduce new capability flag for cluster
As a preparation for introducing a moothly switching between replicast and broadcast method for multicast message. We have to introduce a new capability flag TIPC_MCAST_RBCTL to handle this new feature because of compatibility reasons. When a cluster upgrade a node can come back with this new capabilities which also must be reflected in the cluster capabilities field and new feature only applicable if the cluster supports this new capability. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- net/tipc/core.c | 2 ++ net/tipc/core.h | 3 +++ net/tipc/node.c | 18 ++ net/tipc/node.h | 6 -- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 5b38f5164281..27cccd101ef6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -43,6 +43,7 @@ #include "net.h" #include "socket.h" #include "bcast.h" +#include "node.h" #include @@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->node_addr = 0; tn->trial_addr = 0; tn->addr_trial_end = 0; + tn->capabilities = TIPC_NODE_CAPABILITIES; memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; diff --git a/net/tipc/core.h b/net/tipc/core.h index 8020a6c360ff..7a68e1b6a066 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -122,6 +122,9 @@ struct tipc_net { /* Topology subscription server */ struct tipc_topsrv *topsrv; atomic_t subscription_count; + + /* Cluster capabilities */ + u16 capabilities; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/node.c b/net/tipc/node.c index 2dc4919ab23c..2717893e9dbe 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -383,6 +383,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_link_update_caps(l, capabilities); } write_unlock_bh(>lock); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -433,6 +438,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, break; } list_add_tail_rcu(>list, _node->list); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(>node_list_lock); @@ -589,6 +599,7 @@ static void tipc_node_clear_links(struct tipc_node *node) */ static bool tipc_node_cleanup(struct tipc_node *peer) { + struct tipc_node *temp_node; struct tipc_net *tn = tipc_net(peer->net); bool deleted = false; @@ -604,6 +615,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer) deleted = true; } tipc_node_write_unlock(peer); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, >node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + spin_unlock_bh(>node_list_lock); return deleted; } diff --git a/net/tipc/node.h b/net/tipc/node.h index 4f59a30e989a..2404225c5d58 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,7 +51,8 @@ enum { TIPC_BLOCK_FLOWCTL= (1 << 3), TIPC_BCAST_RCAST = (1 << 4), TIPC_NODE_ID128 = (1 << 5), - TIPC_LINK_PROTO_SEQNO = (1 << 6) + TIPC_LINK_PROTO_SEQNO = (1 << 6), + TIPC_MCAST_RBCTL = (1 << 7) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -60,7 +61,8 @@ enum { TIPC_BCAST_RCAST | \ TIPC_BLOCK_FLOWCTL | \ TIPC_NODE_ID128| \ - TIPC_LINK_PROTO_SEQNO) + TIPC_LINK_PROTO_SEQNO | \ + TIPC_MCAST_RBCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); -- 2.17.1 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [net-next v2 1/3] tipc: support broadcast/replicast configurable for bc-link
Currently, a multicast stream uses either broadcast or replicast as transmission method, based on the ratio between number of actual destinations nodes and cluster size. However, when an L2 interface (e.g., VXLAN) provides pseudo broadcast support, this becomes very inefficient, as it blindly replicates multicast packets to all cluster/subnet nodes, irrespective of whether they host actual target sockets or not. The TIPC multicast algorithm is able to distinguish real destination nodes from other nodes, and hence provides a smarter and more efficient method for transferring multicast messages than pseudo broadcast can do. Because of this, we now make it possible for users to force the broadcast link to permanently switch to using replicast, irrespective of which capabilities the bearer provides, or pretend to provide. Conversely, we also make it possible to force the broadcast link to always use true broadcast. While maybe less useful in deployed systems, this may at least be useful for testing the broadcast algorithm in small clusters. We retain the current AUTOSELECT ability, i.e., to let the broadcast link automatically select which algorithm to use, and to switch back and forth between broadcast and replicast as the ratio between destination node number and cluster size changes. This remains the default method. Furthermore, we make it possible to configure the threshold ratio for such switches. The default ratio is now set to 10%, down from 25% in the earlier implementation. Acked-by: Jon Maloy Signed-off-by: Hoang Le --- include/uapi/linux/tipc_netlink.h | 2 + net/tipc/bcast.c | 104 -- net/tipc/bcast.h | 7 ++ net/tipc/link.c | 8 +++ net/tipc/netlink.c| 4 +- 5 files changed, 120 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0ebe02ef1a86..efb958fd167d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -281,6 +281,8 @@ enum { TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */ + TIPC_NLA_PROP_BROADCAST,/* u32 */ + TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d8026543bf4c..12b59268bdd6 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link"; * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast + * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ @@ -64,7 +66,9 @@ struct tipc_bc_base { int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; + bool force_bcast; bool rcast_support; + bool force_rcast; int rc_ratio; int bc_threshold; }; @@ -485,10 +489,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) return 0; } +static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + switch (bc_mode) { + case BCLINK_MODE_BCAST: + if (!bb->bcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = true; + bb->force_rcast = false; + break; + case BCLINK_MODE_RCAST: + if (!bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = true; + break; + case BCLINK_MODE_SEL: + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + if (bc_ratio > 100 || bc_ratio <= 0) + return -EINVAL; + + bb->rc_ratio = bc_ratio; + tipc_bcast_lock(net); + tipc_bcbase_calc_bc_threshold(net); +