[tipc-discussion] [PATCH] tipc: fix uninit-value in tipc_nl_node_reset_link_stats

2022-07-01 Thread Hoang Le
syzbot found the following issue on:
==
BUG: KMSAN: uninit-value in strlen lib/string.c:495 [inline]
BUG: KMSAN: uninit-value in strstr+0xb4/0x2e0 lib/string.c:840
 strlen lib/string.c:495 [inline]
 strstr+0xb4/0x2e0 lib/string.c:840
 tipc_nl_node_reset_link_stats+0x41e/0xba0 net/tipc/node.c:2582
 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline]
 genl_family_rcv_msg net/netlink/genetlink.c:775 [inline]
 genl_rcv_msg+0x103f/0x1260 net/netlink/genetlink.c:792
 netlink_rcv_skb+0x3a5/0x6c0 net/netlink/af_netlink.c:2501
 genl_rcv+0x3c/0x50 net/netlink/genetlink.c:803
 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
 netlink_unicast+0xf3b/0x1270 net/netlink/af_netlink.c:1345
 netlink_sendmsg+0x1288/0x1440 net/netlink/af_netlink.c:1921
 sock_sendmsg_nosec net/socket.c:714 [inline]
 sock_sendmsg net/socket.c:734 [inline]
 sys_sendmsg+0xabc/0xe90 net/socket.c:2492
 ___sys_sendmsg+0x2a5/0x350 net/socket.c:2546
 __sys_sendmsg net/socket.c:2575 [inline]
 __do_sys_sendmsg net/socket.c:2584 [inline]
 __se_sys_sendmsg net/socket.c:2582 [inline]
 __x64_sys_sendmsg+0x367/0x540 net/socket.c:2582
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x46/0xb0
==

This is because link name string is not validated before it's used
in calling strstr() and strlen().

Reported-by: syzbot+a73d24a22be5f...@syzkaller.appspotmail.com
Signed-off-by: Hoang Le 
---
 net/tipc/node.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index b48d97cbbe29..23419a599471 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2561,6 +2561,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, 
struct genl_info *info)
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = tipc_net(net);
struct tipc_link_entry *le;
+   int len;
 
if (!info->attrs[TIPC_NLA_LINK])
return -EINVAL;
@@ -2574,7 +2575,14 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, 
struct genl_info *info)
if (!attrs[TIPC_NLA_LINK_NAME])
return -EINVAL;
 
+   len = nla_len(attrs[TIPC_NLA_LINK_NAME]);
+   if (len <= 0)
+   return -EINVAL;
+
link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
+   len = min_t(int, len, TIPC_MAX_LINK_NAME);
+   if (!memchr(link_name, '\0', len))
+   return -EINVAL;
 
err = -EINVAL;
if (!strcmp(link_name, tipc_bclink_name)) {
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: cleanup unused function

2022-06-16 Thread Hoang Le
tipc_dest_list_len() is not being called anywhere. Clean it up.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/name_table.c | 11 ---
 net/tipc/name_table.h |  1 -
 2 files changed, 12 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 1d8ba233d047..d1180370fdf4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l)
kfree(dst);
}
 }
-
-int tipc_dest_list_len(struct list_head *l)
-{
-   struct tipc_dest *dst;
-   int i = 0;
-
-   list_for_each_entry(dst, l, list) {
-   i++;
-   }
-   return i;
-}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 259f95e3d99c..3bcd9ef8cee3 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 
port);
 bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
 bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
 void tipc_dest_list_purge(struct list_head *l);
-int tipc_dest_list_len(struct list_head *l);
 
 #endif
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: fix use-after-free Read in tipc_named_reinit

2022-06-16 Thread Hoang Le
syzbot found the following issue on:
==
BUG: KASAN: use-after-free in tipc_named_reinit+0x94f/0x9b0
net/tipc/name_distr.c:413
Read of size 8 at addr 88805299a000 by task kworker/1:9/23764

CPU: 1 PID: 23764 Comm: kworker/1:9 Not tainted
5.18.0-rc4-syzkaller-00878-g17d49e6e8012 #0
Hardware name: Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
Workqueue: events tipc_net_finalize_work
Call Trace:
 
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0xeb/0x495
mm/kasan/report.c:313
 print_report mm/kasan/report.c:429 [inline]
 kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491
 tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413
 tipc_net_finalize+0x234/0x3d0 net/tipc/net.c:138
 process_one_work+0x996/0x1610 kernel/workqueue.c:2289
 worker_thread+0x665/0x1080 kernel/workqueue.c:2436
 kthread+0x2e9/0x3a0 kernel/kthread.c:376
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298
 
[...]
==

In the commit
d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work"),
the cancel_work_sync() function just to make sure ONLY the work
tipc_net_finalize_work() is executing/pending on any CPU completed before
tipc namespace is destroyed through tipc_exit_net(). But this function
is not guaranteed the work is the last queued. So, the destroyed instance
may be accessed in the work which will try to enqueue later.

In order to completely fix, we re-order the calling of cancel_work_sync()
to make sure the work tipc_net_finalize_work() was last queued and it
must be completed by calling cancel_work_sync().

Reported-by: syzbot+47af19f3307fc9c5c...@syzkaller.appspotmail.com
Fixes: d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work")
Acked-by: Jon Maloy 
Signed-off-by: Ying Xue 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3f4542e0f065..434e70eabe08 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net)
struct tipc_net *tn = tipc_net(net);
 
tipc_detach_loopback(net);
+   tipc_net_stop(net);
/* Make sure the tipc_net_finalize_work() finished */
cancel_work_sync(>work);
-   tipc_net_stop(net);
-
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: cleanup unused function

2022-06-16 Thread Hoang Le
tipc_dest_list_len() is not being called anywhere. Clean it up.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/name_table.c | 11 ---
 net/tipc/name_table.h |  1 -
 2 files changed, 12 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 1d8ba233d047..d1180370fdf4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l)
kfree(dst);
}
 }
-
-int tipc_dest_list_len(struct list_head *l)
-{
-   struct tipc_dest *dst;
-   int i = 0;
-
-   list_for_each_entry(dst, l, list) {
-   i++;
-   }
-   return i;
-}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 259f95e3d99c..3bcd9ef8cee3 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 
port);
 bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
 bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
 void tipc_dest_list_purge(struct list_head *l);
-int tipc_dest_list_len(struct list_head *l);
 
 #endif
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH] tipc: cleanup unused function

2022-06-16 Thread Hoang Le
tipc_dest_list_len() is not being called anywhere. Clean it up.

Signed-off-by: Hoang Le 
---
 net/tipc/name_table.c | 11 ---
 net/tipc/name_table.h |  1 -
 2 files changed, 12 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 1d8ba233d047..d1180370fdf4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l)
kfree(dst);
}
 }
-
-int tipc_dest_list_len(struct list_head *l)
-{
-   struct tipc_dest *dst;
-   int i = 0;
-
-   list_for_each_entry(dst, l, list) {
-   i++;
-   }
-   return i;
-}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 259f95e3d99c..3bcd9ef8cee3 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 
port);
 bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
 bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
 void tipc_dest_list_purge(struct list_head *l);
-int tipc_dest_list_len(struct list_head *l);
 
 #endif
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH] tipc: fix use-after-free Read in tipc_named_reinit

2022-06-07 Thread Hoang Le
syzbot found the following issue on:
==
BUG: KASAN: use-after-free in tipc_named_reinit+0x94f/0x9b0
net/tipc/name_distr.c:413
Read of size 8 at addr 88805299a000 by task kworker/1:9/23764

CPU: 1 PID: 23764 Comm: kworker/1:9 Not tainted
5.18.0-rc4-syzkaller-00878-g17d49e6e8012 #0
Hardware name: Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
Workqueue: events tipc_net_finalize_work
Call Trace:
 
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0xeb/0x495
mm/kasan/report.c:313
 print_report mm/kasan/report.c:429 [inline]
 kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491
 tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413
 tipc_net_finalize+0x234/0x3d0 net/tipc/net.c:138
 process_one_work+0x996/0x1610 kernel/workqueue.c:2289
 worker_thread+0x665/0x1080 kernel/workqueue.c:2436
 kthread+0x2e9/0x3a0 kernel/kthread.c:376
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298
 
[...]
==

In the commit
d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work"),
the cancel_work_sync() function just to make sure ONLY the work
tipc_net_finalize_work() is executing/pending on any CPU completed before
tipc namespace is destroyed through tipc_exit_net(). But this function
is not guaranteed the work is the last queued. So, the destroyed instance
may be accessed in the work which will try to enqueue later.

In order to completely fix, we re-order the calling of cancel_work_sync()
to make sure the work tipc_net_finalize_work() was last queued and it
must be completed by calling cancel_work_sync().

Reported-by: syzbot+47af19f3307fc9c5c...@syzkaller.appspotmail.com
Fixes: d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work")
Signed-off-by: Ying Xue 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3f4542e0f065..434e70eabe08 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net)
struct tipc_net *tn = tipc_net(net);
 
tipc_detach_loopback(net);
+   tipc_net_stop(net);
/* Make sure the tipc_net_finalize_work() finished */
cancel_work_sync(>work);
-   tipc_net_stop(net);
-
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net v3] tipc: check attribute length for bearer name

2022-06-02 Thread Hoang Le
syzbot reported uninit-value:
=
BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline]
BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725
 string_nocheck lib/vsprintf.c:644 [inline]
 string+0x4f9/0x6f0 lib/vsprintf.c:725
 vsnprintf+0x/0x3650 lib/vsprintf.c:2806
 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158
 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256
 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283
 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50
 _printk+0x18d/0x1cf kernel/printk/printk.c:2293
 tipc_enable_bearer net/tipc/bearer.c:371 [inline]
 __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033
 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042
 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline]

- Do sanity check the attribute length for TIPC_NLA_BEARER_NAME.
- Do not use 'illegal name' in printing message.

Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com
Fixes: cb30a63384bc ("tipc: refactor function tipc_enable_bearer()")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
v3: add Fixes tag in commit message.
v2: remove unnecessary sanity check as Jakub's comment.
---
 net/tipc/bearer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6d39ca05f249..932c87b98eca 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
u32 i;
 
if (!bearer_name_validate(name, _names)) {
-   errstr = "illegal name";
NL_SET_ERR_MSG(extack, "Illegal name");
-   goto rejected;
+   return res;
}
 
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net v3] tipc: check attribute length for bearer name

2022-06-01 Thread Hoang Le
syzbot reported uninit-value:
=
BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline]
BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725
 string_nocheck lib/vsprintf.c:644 [inline]
 string+0x4f9/0x6f0 lib/vsprintf.c:725
 vsnprintf+0x/0x3650 lib/vsprintf.c:2806
 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158
 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256
 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283
 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50
 _printk+0x18d/0x1cf kernel/printk/printk.c:2293
 tipc_enable_bearer net/tipc/bearer.c:371 [inline]
 __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033
 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042
 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline]

- Do sanity check the attribute length for TIPC_NLA_BEARER_NAME.
- Do not use 'illegal name' in printing message.

v3: add Fixes tag in commit message.
v2: remove unnecessary sanity check as Jakub's comment.

Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com
Fixes: cb30a63384bc ("tipc: refactor function tipc_enable_bearer()")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6d39ca05f249..932c87b98eca 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
u32 i;
 
if (!bearer_name_validate(name, _names)) {
-   errstr = "illegal name";
NL_SET_ERR_MSG(extack, "Illegal name");
-   goto rejected;
+   return res;
}
 
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net v2] tipc: check attribute length for bearer name

2022-06-01 Thread Hoang Le
syzbot reported uninit-value:
=
BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline]
BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725
 string_nocheck lib/vsprintf.c:644 [inline]
 string+0x4f9/0x6f0 lib/vsprintf.c:725
 vsnprintf+0x/0x3650 lib/vsprintf.c:2806
 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158
 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256
 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283
 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50
 _printk+0x18d/0x1cf kernel/printk/printk.c:2293
 tipc_enable_bearer net/tipc/bearer.c:371 [inline]
 __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033
 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042
 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline]

- Do sanity check the attribute length for TIPC_NLA_BEARER_NAME.
- Do not use 'illegal name' in printing message.

v2: remove unnecessary sanity check as Jakub's comment

Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6d39ca05f249..932c87b98eca 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
u32 i;
 
if (!bearer_name_validate(name, _names)) {
-   errstr = "illegal name";
NL_SET_ERR_MSG(extack, "Illegal name");
-   goto rejected;
+   return res;
}
 
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: check attribute length for bearer name

2022-05-31 Thread Hoang Le
syzbot reported uninit-value:
=
BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline]
BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725
 string_nocheck lib/vsprintf.c:644 [inline]
 string+0x4f9/0x6f0 lib/vsprintf.c:725
 vsnprintf+0x/0x3650 lib/vsprintf.c:2806
 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158
 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256
 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283
 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50
 _printk+0x18d/0x1cf kernel/printk/printk.c:2293
 tipc_enable_bearer net/tipc/bearer.c:371 [inline]
 __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033
 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042
 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline]

- Do sanity check the attribute length for TIPC_NLA_BEARER_NAME.
- Do not use 'illegal name' in printing message.

Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6d39ca05f249..0fd7554c7cde 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -258,10 +258,10 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
char *errstr = "";
u32 i;
 
-   if (!bearer_name_validate(name, _names)) {
-   errstr = "illegal name";
+   if (strlen(name) > TIPC_MAX_BEARER_NAME ||
+   !bearer_name_validate(name, _names)) {
NL_SET_ERR_MSG(extack, "Illegal name");
-   goto rejected;
+   return res;
}
 
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: check attribute length for bearer name

2022-05-26 Thread Hoang Le
syzbot reported uninit-value:
=
BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline]
BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725
 string_nocheck lib/vsprintf.c:644 [inline]
 string+0x4f9/0x6f0 lib/vsprintf.c:725
 vsnprintf+0x/0x3650 lib/vsprintf.c:2806
 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158
 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256
 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283
 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50
 _printk+0x18d/0x1cf kernel/printk/printk.c:2293
 tipc_enable_bearer net/tipc/bearer.c:371 [inline]
 __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033
 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042
 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline]

- Do sanity check the attribute length for TIPC_NLA_BEARER_NAME.
- Do not use 'illegal name' in printing message.

Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6d39ca05f249..0fd7554c7cde 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -258,10 +258,10 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
char *errstr = "";
u32 i;
 
-   if (!bearer_name_validate(name, _names)) {
-   errstr = "illegal name";
+   if (strlen(name) > TIPC_MAX_BEARER_NAME ||
+   !bearer_name_validate(name, _names)) {
NL_SET_ERR_MSG(extack, "Illegal name");
-   goto rejected;
+   return res;
}
 
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: fix the timer expires after interval 100ms

2022-03-20 Thread Hoang Le
In the timer callback function tipc_sk_timeout(), we're trying to
reschedule another timeout to retransmit a setup request if destination
link is congested. But we use the incorrect timeout value
(msecs_to_jiffies(100)) instead of (jiffies + msecs_to_jiffies(100)),
so that the timer expires immediately, it's irrelevant for original
description.

In this commit we correct the timeout value in sk_reset_timer()

Fixes: 6787927475e5 ("tipc: buffer overflow handling in listener socket")
Acked-by: Ying Xue 
Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7545321c3440..17f8c523e33b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct 
sk_buff_head *list)
 
/* Try again later if dest link is congested */
if (tsk->cong_link_cnt) {
-   sk_reset_timer(sk, >sk_timer, msecs_to_jiffies(100));
+   sk_reset_timer(sk, >sk_timer,
+  jiffies + msecs_to_jiffies(100));
return;
}
/* Prepare SYN for retransmit */
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: fix the timer expires after interval 100ms

2022-03-17 Thread Hoang Le
In the timer callback function tipc_sk_timeout(), we're trying to
reschedule another timeout to retransmit a setup request if destination
link is congested. But we use the incorrect timeout value
(msecs_to_jiffies(100)) instead of (jiffies + msecs_to_jiffies(100)),
so that the timer expires immediately, it's irrelevant for original
description.

In this commit we correct the timeout value in sk_reset_timer()

Fixes: 6787927475e5 ("tipc: buffer overflow handling in listener socket")
Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7545321c3440..17f8c523e33b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct 
sk_buff_head *list)
 
/* Try again later if dest link is congested */
if (tsk->cong_link_cnt) {
-   sk_reset_timer(sk, >sk_timer, msecs_to_jiffies(100));
+   sk_reset_timer(sk, >sk_timer,
+  jiffies + msecs_to_jiffies(100));
return;
}
/* Prepare SYN for retransmit */
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] Revert "tipc: use consistent GFP flags"

2021-12-16 Thread Hoang Le
This reverts commit 86c3a3e964d910a62eeb277d60b2a60ebefa9feb.

The tipc_aead_init() function can be calling from an interrupt routine.
This allocation might sleep with GFP_KERNEL flag, hence the following BUG
is reported.

[   17.657509] BUG: sleeping function called from invalid context at 
include/linux/sched/mm.h:230
[   17.660916] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: 
swapper/3
[   17.664093] preempt_count: 302, expected: 0
[   17.665619] RCU nest depth: 2, expected: 0
[   17.667163] Preemption disabled at:
[   17.667165] [<>] 0x0
[   17.669753] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: GW  
   5.16.0-rc4+ #1
[   17.673006] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.12.0-1 04/01/2014
[   17.675540] Call Trace:
[   17.676285]  
[   17.676913]  dump_stack_lvl+0x34/0x44
[   17.678033]  __might_resched.cold+0xd6/0x10f
[   17.679311]  kmem_cache_alloc_trace+0x14d/0x220
[   17.680663]  tipc_crypto_start+0x4a/0x2b0 [tipc]
[   17.682146]  ? kmem_cache_alloc_trace+0xd3/0x220
[   17.683545]  tipc_node_create+0x2f0/0x790 [tipc]
[   17.684956]  tipc_node_check_dest+0x72/0x680 [tipc]
[   17.686706]  ? ___cache_free+0x31/0x350
[   17.688008]  ? skb_release_data+0x128/0x140
[   17.689431]  tipc_disc_rcv+0x479/0x510 [tipc]
[   17.690904]  tipc_rcv+0x71c/0x730 [tipc]
[   17.692219]  ? __netif_receive_skb_core+0xb7/0xf60
[   17.693856]  tipc_l2_rcv_msg+0x5e/0x90 [tipc]
[   17.695333]  __netif_receive_skb_list_core+0x20b/0x260
[   17.697072]  netif_receive_skb_list_internal+0x1bf/0x2e0
[   17.698870]  ? dev_gro_receive+0x4c2/0x680
[   17.700255]  napi_complete_done+0x6f/0x180
[   17.701657]  virtnet_poll+0x29c/0x42e [virtio_net]
[   17.703262]  __napi_poll+0x2c/0x170
[   17.704429]  net_rx_action+0x22f/0x280
[   17.705706]  __do_softirq+0xfd/0x30a
[   17.706921]  common_interrupt+0xa4/0xc0
[   17.708206]  
[   17.708922]  
[   17.709651]  asm_common_interrupt+0x1e/0x40
[   17.711078] RIP: 0010:default_idle+0x18/0x20

Fixes: 86c3a3e964d9 ("tipc: use consistent GFP flags")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/crypto.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 81116312b753..9325479295b8 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -524,7 +524,7 @@ static int tipc_aead_init(struct tipc_aead **aead, struct 
tipc_aead_key *ukey,
return -EEXIST;
 
/* Allocate a new AEAD */
-   tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+   tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
if (unlikely(!tmp))
return -ENOMEM;
 
@@ -1463,7 +1463,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct 
net *net,
return -EEXIST;
 
/* Allocate crypto */
-   c = kzalloc(sizeof(*c), GFP_KERNEL);
+   c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return -ENOMEM;
 
@@ -1477,7 +1477,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct 
net *net,
}
 
/* Allocate statistic structure */
-   c->stats = alloc_percpu(struct tipc_crypto_stats);
+   c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
if (!c->stats) {
if (c->wq)
destroy_workqueue(c->wq);
@@ -2450,7 +2450,7 @@ static void tipc_crypto_work_tx(struct work_struct *work)
}
 
/* Lets duplicate it first */
-   skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_KERNEL);
+   skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC);
rcu_read_unlock();
 
/* Now, generate new key, initiate & distribute it */
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] Revert "tipc: use consistent GFP flags"

2021-12-09 Thread Hoang Le
This reverts commit 86c3a3e964d910a62eeb277d60b2a60ebefa9feb.

The tipc_aead_init() function can be calling from an interrupt routine.
This allocation might sleep with GFP_KERNEL flag, hence the following BUG
is reported.

[   17.657509] BUG: sleeping function called from invalid context at 
include/linux/sched/mm.h:230
[   17.660916] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: 
swapper/3
[   17.664093] preempt_count: 302, expected: 0
[   17.665619] RCU nest depth: 2, expected: 0
[   17.667163] Preemption disabled at:
[   17.667165] [<>] 0x0
[   17.669753] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: GW  
   5.16.0-rc4+ #1
[   17.673006] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.12.0-1 04/01/2014
[   17.675540] Call Trace:
[   17.676285]  
[   17.676913]  dump_stack_lvl+0x34/0x44
[   17.678033]  __might_resched.cold+0xd6/0x10f
[   17.679311]  kmem_cache_alloc_trace+0x14d/0x220
[   17.680663]  tipc_crypto_start+0x4a/0x2b0 [tipc]
[   17.682146]  ? kmem_cache_alloc_trace+0xd3/0x220
[   17.683545]  tipc_node_create+0x2f0/0x790 [tipc]
[   17.684956]  tipc_node_check_dest+0x72/0x680 [tipc]
[   17.686706]  ? ___cache_free+0x31/0x350
[   17.688008]  ? skb_release_data+0x128/0x140
[   17.689431]  tipc_disc_rcv+0x479/0x510 [tipc]
[   17.690904]  tipc_rcv+0x71c/0x730 [tipc]
[   17.692219]  ? __netif_receive_skb_core+0xb7/0xf60
[   17.693856]  tipc_l2_rcv_msg+0x5e/0x90 [tipc]
[   17.695333]  __netif_receive_skb_list_core+0x20b/0x260
[   17.697072]  netif_receive_skb_list_internal+0x1bf/0x2e0
[   17.698870]  ? dev_gro_receive+0x4c2/0x680
[   17.700255]  napi_complete_done+0x6f/0x180
[   17.701657]  virtnet_poll+0x29c/0x42e [virtio_net]
[   17.703262]  __napi_poll+0x2c/0x170
[   17.704429]  net_rx_action+0x22f/0x280
[   17.705706]  __do_softirq+0xfd/0x30a
[   17.706921]  common_interrupt+0xa4/0xc0
[   17.708206]  
[   17.708922]  
[   17.709651]  asm_common_interrupt+0x1e/0x40
[   17.711078] RIP: 0010:default_idle+0x18/0x20

Signed-off-by: Hoang Le 
---
 net/tipc/crypto.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 81116312b753..9325479295b8 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -524,7 +524,7 @@ static int tipc_aead_init(struct tipc_aead **aead, struct 
tipc_aead_key *ukey,
return -EEXIST;
 
/* Allocate a new AEAD */
-   tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+   tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
if (unlikely(!tmp))
return -ENOMEM;
 
@@ -1463,7 +1463,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct 
net *net,
return -EEXIST;
 
/* Allocate crypto */
-   c = kzalloc(sizeof(*c), GFP_KERNEL);
+   c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return -ENOMEM;
 
@@ -1477,7 +1477,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct 
net *net,
}
 
/* Allocate statistic structure */
-   c->stats = alloc_percpu(struct tipc_crypto_stats);
+   c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
if (!c->stats) {
if (c->wq)
destroy_workqueue(c->wq);
@@ -2450,7 +2450,7 @@ static void tipc_crypto_work_tx(struct work_struct *work)
}
 
/* Lets duplicate it first */
-   skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_KERNEL);
+   skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC);
rcu_read_unlock();
 
/* Now, generate new key, initiate & distribute it */
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: increase timeout in tipc_sk_enqueue()

2021-09-13 Thread Hoang Le
In tipc_sk_enqueue() we use hardcoded 2 jiffies to extract
socket buffer from generic queue to particular socket.
The 2 jiffies is too short in case there are other high priority
tasks get CPU cycles for multiple jiffies update. As result, no
buffer could be enqueued to particular socket.

To solve this, we switch to use constant timeout 20msecs.
Then, the function will be expired between 2 jiffies (CONFIG_100HZ)
and 20 jiffies (CONFIG_1000HZ).

Fixes: c637c1035534 ("tipc: resolve race problem at unicast message reception")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a0a27d87f631..ad570c2450be 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct 
sk_buff *skb)
 static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
u32 dport, struct sk_buff_head *xmitq)
 {
-   unsigned long time_limit = jiffies + 2;
+   unsigned long time_limit = jiffies + usecs_to_jiffies(2);
struct sk_buff *skb;
unsigned int lim;
atomic_t *dcnt;
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: increase timeout in tipc_sk_enqueue()

2021-09-10 Thread Hoang Le
In tipc_sk_enqueue() we use hardcoded 2 jiffies to extract
socket buffer from generic queue to particular socket.
The 2 jiffies is too short in case there are other high priority
tasks get CPU cycles for multiple jiffies update. As result, no
buffer could be enqueued to particular socket.

To solve this, we switch to use to constant timeout 20msecs.
Then, the function will be expired between 2 jiffies (CONFIG_100HZ)
and 20 jiffies (CONFIG_1000HZ).

Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a0a27d87f631..ad570c2450be 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct 
sk_buff *skb)
 static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
u32 dport, struct sk_buff_head *xmitq)
 {
-   unsigned long time_limit = jiffies + 2;
+   unsigned long time_limit = jiffies + usecs_to_jiffies(2);
struct sk_buff *skb;
unsigned int lim;
atomic_t *dcnt;
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] Revert "tipc: Return the correct errno code"

2021-08-10 Thread Hoang Le
This reverts commit 0efea3c649f0 because of:
- The returning -ENOBUF error is fine on socket buffer allocation.
- There is side effect in the calling path
tipc_node_xmit()->tipc_link_xmit() when checking error code returning.

Fixes: 0efea3c649f0 ("tipc: Return the correct errno code")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/link.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index cf586840caeb..1b7a487c8841 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct 
tipc_msg *hdr)
skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
  dnode, l->addr, dport, 0, 0);
if (!skb)
-   return -ENOMEM;
+   return -ENOBUFS;
msg_set_dest_droppable(buf_msg(skb), true);
TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
skb_queue_tail(>wakeupq, skb);
@@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l)
  *
  * Consumes the buffer chain.
  * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
- * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM
+ * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
  */
 int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
   struct sk_buff_head *xmitq)
@@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct 
sk_buff_head *list,
if (!_skb) {
kfree_skb(skb);
__skb_queue_purge(list);
-   return -ENOMEM;
+   return -ENOBUFS;
}
__skb_queue_tail(transmq, skb);
tipc_link_set_skb_retransmit_time(skb, l);
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] Revert "tipc: Return the correct errno code"

2021-08-10 Thread Hoang Le
This reverts commit 0efea3c649f0 because of:
- The returning -ENOBUF error is fine on socket buffer allocation.
- There is side effect in the calling path
tipc_node_xmit()->tipc_link_xmit() when checking error code returning.

Fixes: 0efea3c649f0 ("tipc: Return the correct errno code")
Signed-off-by: Hoang Le 
---
 net/tipc/link.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index cf586840caeb..1b7a487c8841 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct 
tipc_msg *hdr)
skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
  dnode, l->addr, dport, 0, 0);
if (!skb)
-   return -ENOMEM;
+   return -ENOBUFS;
msg_set_dest_droppable(buf_msg(skb), true);
TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
skb_queue_tail(>wakeupq, skb);
@@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l)
  *
  * Consumes the buffer chain.
  * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
- * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM
+ * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
  */
 int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
   struct sk_buff_head *xmitq)
@@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct 
sk_buff_head *list,
if (!_skb) {
kfree_skb(skb);
__skb_queue_purge(list);
-   return -ENOMEM;
+   return -ENOBUFS;
}
__skb_queue_tail(transmq, skb);
tipc_link_set_skb_retransmit_time(skb, l);
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: fix sleeping in tipc accept routine

2021-07-22 Thread Hoang Le
The release_sock() is blocking function, it would change the state
after sleeping. In order to evaluate the stated condition outside
the socket lock context, switch to use wait_woken() instead.

Fixes: 6398e23cdb1d8 ("tipc: standardize accept routine")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9b0b311c7ec1..2c71828b7e5c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2664,7 +2664,7 @@ static int tipc_listen(struct socket *sock, int len)
 static int tipc_wait_for_accept(struct socket *sock, long timeo)
 {
struct sock *sk = sock->sk;
-   DEFINE_WAIT(wait);
+   DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
 
/* True wake-one mechanism for incoming connections: only
@@ -2673,12 +2673,12 @@ static int tipc_wait_for_accept(struct socket *sock, 
long timeo)
 * anymore, the common case will execute the loop only once.
*/
for (;;) {
-   prepare_to_wait_exclusive(sk_sleep(sk), ,
- TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(>sk_receive_queue)) {
+   add_wait_queue(sk_sleep(sk), );
release_sock(sk);
-   timeo = schedule_timeout(timeo);
+   timeo = wait_woken(, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
+   remove_wait_queue(sk_sleep(sk), );
}
err = 0;
if (!skb_queue_empty(>sk_receive_queue))
@@ -2690,7 +2690,6 @@ static int tipc_wait_for_accept(struct socket *sock, long 
timeo)
if (signal_pending(current))
break;
}
-   finish_wait(sk_sleep(sk), );
return err;
 }
 
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: fix sleeping in tipc accept routine

2021-07-21 Thread Hoang Le
The release_sock() is blocking function, it would change the state
after sleeping. In order to evaluate the stated condition outside
the socket lock context, switch to use wait_woken() instead.

Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9b0b311c7ec1..2c71828b7e5c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2664,7 +2664,7 @@ static int tipc_listen(struct socket *sock, int len)
 static int tipc_wait_for_accept(struct socket *sock, long timeo)
 {
struct sock *sk = sock->sk;
-   DEFINE_WAIT(wait);
+   DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
 
/* True wake-one mechanism for incoming connections: only
@@ -2673,12 +2673,12 @@ static int tipc_wait_for_accept(struct socket *sock, 
long timeo)
 * anymore, the common case will execute the loop only once.
*/
for (;;) {
-   prepare_to_wait_exclusive(sk_sleep(sk), ,
- TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(>sk_receive_queue)) {
+   add_wait_queue(sk_sleep(sk), );
release_sock(sk);
-   timeo = schedule_timeout(timeo);
+   timeo = wait_woken(, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
+   remove_wait_queue(sk_sleep(sk), );
}
err = 0;
if (!skb_queue_empty(>sk_receive_queue))
@@ -2690,7 +2690,6 @@ static int tipc_wait_for_accept(struct socket *sock, long 
timeo)
if (signal_pending(current))
break;
}
-   finish_wait(sk_sleep(sk), );
return err;
 }
 
-- 
2.30.2



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"

2021-05-13 Thread Hoang Le
This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046.
Above fix is not correct and caused memory leak issue.

Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
Acked-by: Jon Maloy 
Acked-by: Tung Nguyen 
Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 58935cd0d068..53af72824c9c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1262,7 +1262,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct 
sk_buff_head *arrvq,
spin_lock_bh(>lock);
if (skb_peek(arrvq) == skb) {
skb_queue_splice_tail_init(, inputq);
-   __skb_dequeue(arrvq);
+   /* Decrease the skb's refcnt as increasing in the
+* function tipc_skb_peek
+*/
+   kfree_skb(__skb_dequeue(arrvq));
}
spin_unlock_bh(>lock);
__skb_queue_purge();
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] tipc: make node link identity publish thread safe

2021-05-09 Thread Hoang Le
The using of the node address and node link identity are not thread safe,
meaning that two publications may be published the same values, as result
one of them will get failure because of already existing in the name table.
To avoid this we have to use the node address and node link identity values
from inside the node item's write lock protection.

Fixes: 50a3499ab853 ("tipc: simplify signature of tipc_namtbl_publish()")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/node.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 8217905348f4..81af92954c6c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(>lock);
 
if (flags & TIPC_NOTIFY_NODE_DOWN)
-   tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
+   tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
 
if (flags & TIPC_NOTIFY_NODE_UP)
-   tipc_named_node_up(net, n->addr, n->capabilities);
+   tipc_named_node_up(net, sk.node, n->capabilities);
 
if (flags & TIPC_NOTIFY_LINK_UP) {
-   tipc_mon_peer_up(net, n->addr, bearer_id);
-   tipc_nametbl_publish(net, , , n->link_id);
+   tipc_mon_peer_up(net, sk.node, bearer_id);
+   tipc_nametbl_publish(net, , , sk.ref);
}
if (flags & TIPC_NOTIFY_LINK_DOWN) {
-   tipc_mon_peer_down(net, n->addr, bearer_id);
-   tipc_nametbl_withdraw(net, , , n->link_id);
+   tipc_mon_peer_down(net, sk.node, bearer_id);
+   tipc_nametbl_withdraw(net, , , sk.ref);
}
 }
 
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2] tipc: call a sub-routine in separate socket

2021-05-05 Thread Hoang Le
When receiving a result from first query to netlink, we may exec
a another query inside the callback. If calling this sub-routine
in the same socket, it will be discarded the result from previous
exection.
To avoid this we perform a nested query in separate socket.

Fixes: 202102830663 ("tipc: use the libmnl functions in lib/mnl_utils.c")
Signed-off-by: Hoang Le 
Acked-by: Jon Maloy 
---
 tipc/bearer.c | 50 +-
 tipc/link.c   | 15 +--
 tipc/socket.c | 17 +++--
 3 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/tipc/bearer.c b/tipc/bearer.c
index 2afc48b9b108..968293bc9160 100644
--- a/tipc/bearer.c
+++ b/tipc/bearer.c
@@ -20,7 +20,9 @@
 #include 
 #include 
 #include 
+#include 
 
+#include "mnl_utils.h"
 #include "utils.h"
 #include "cmdl.h"
 #include "msg.h"
@@ -98,16 +100,28 @@ static int get_netid_cb(const struct nlmsghdr *nlh, void 
*data)
 
 static int generate_multicast(short af, char *buf, int bufsize)
 {
+   struct mnlu_gen_socket bearer_nlg;
struct nlmsghdr *nlh;
int netid;
+   int err = 0;
 
-   nlh = msg_init(TIPC_NL_NET_GET);
+   err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME,
+  TIPC_GENL_V2_VERSION);
+   if (err)
+   return -1;
+
+   nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_NET_GET,
+ NLM_F_REQUEST | NLM_F_DUMP);
if (!nlh) {
fprintf(stderr, "error, message initialization failed\n");
+   mnlu_gen_socket_close(_nlg);
return -1;
}
-   if (msg_dumpit(nlh, get_netid_cb, )) {
+
+   err = mnlu_gen_socket_sndrcv(_nlg, nlh, get_netid_cb, );
+   if (err) {
fprintf(stderr, "error, failed to fetch TIPC network id from 
kernel\n");
+   mnlu_gen_socket_close(_nlg);
return -EINVAL;
}
if (af == AF_INET)
@@ -115,6 +129,7 @@ static int generate_multicast(short af, char *buf, int 
bufsize)
else
snprintf(buf, bufsize, "ff02::%u", netid);
 
+   mnlu_gen_socket_close(_nlg);
return 0;
 }
 
@@ -794,10 +809,35 @@ static int bearer_get_udp_cb(const struct nlmsghdr *nlh, 
void *data)
if ((cb_data->attr == TIPC_NLA_UDP_REMOTE) &&
(cb_data->prop == UDP_PROP_IP) &&
opts[TIPC_NLA_UDP_MULTI_REMOTEIP]) {
-   struct genlmsghdr *genl = mnl_nlmsg_get_payload(cb_data->nlh);
+   struct mnlu_gen_socket bearer_nlg;
+   struct nlattr *attr;
+   struct nlmsghdr *h;
+   const char *bname;
+   int err = 0;
+
+   err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME,
+  TIPC_GENL_V2_VERSION);
+   if (err)
+   return -1;
+
+   h = mnlu_gen_socket_cmd_prepare(_nlg,
+   TIPC_NL_UDP_GET_REMOTEIP,
+   NLM_F_REQUEST | NLM_F_DUMP);
+   if (!h) {
+   fprintf(stderr, "error, message initialization 
failed\n");
+   mnlu_gen_socket_close(_nlg);
+   return -1;
+   }
 
-   genl->cmd = TIPC_NL_UDP_GET_REMOTEIP;
-   return msg_dumpit(cb_data->nlh, bearer_dump_udp_cb, NULL);
+   attr = mnl_attr_nest_start(h, TIPC_NLA_BEARER);
+   bname = mnl_attr_get_str(attrs[TIPC_NLA_BEARER_NAME]);
+   mnl_attr_put_strz(h, TIPC_NLA_BEARER_NAME, bname);
+   mnl_attr_nest_end(h, attr);
+
+   err = mnlu_gen_socket_sndrcv(_nlg, h,
+bearer_dump_udp_cb, NULL);
+   mnlu_gen_socket_close(_nlg);
+   return err;
}
 
addr = mnl_attr_get_payload(opts[cb_data->attr]);
diff --git a/tipc/link.c b/tipc/link.c
index 2123f109c694..9994ada2a367 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -17,7 +17,9 @@
 #include 
 #include 
 #include 
+#include 
 
+#include "mnl_utils.h"
 #include "cmdl.h"
 #include "msg.h"
 #include "link.h"
@@ -993,13 +995,20 @@ exit:
 
 static int link_mon_peer_list(uint32_t mon_ref)
 {
+   struct mnlu_gen_socket link_nlg;
struct nlmsghdr *nlh;
struct nlattr *nest;
int err = 0;
 
-   nlh = msg_init(TIPC_NL_MON_PEER_GET);
+   err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME,
+  TIPC_GENL_V2_VERSION);
+   if (err)
+   return -1;
+   nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_MON_PEER_GET,
+ NLM_F_REQUEST | NLM_F_DUMP);
if (!nlh) {
fprintf

[tipc-discussion] [iproute2-next v2] tipc: call a sub-routine in separate socket

2021-05-03 Thread Hoang Le
When receiving a result from first query to netlink, we may exec
a another query inside the callback. If calling this sub-routine
in the same socket, it will be discarded the result from previous
exection.
To avoid this we perform a nested query in separate socket.

Fixes: 202102830663 ("tipc: use the libmnl functions in lib/mnl_utils.c")
Signed-off-by: Hoang Le 
---
 tipc/bearer.c | 50 +-
 tipc/link.c   | 15 +--
 tipc/socket.c | 17 +++--
 3 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/tipc/bearer.c b/tipc/bearer.c
index 2afc48b9b108..968293bc9160 100644
--- a/tipc/bearer.c
+++ b/tipc/bearer.c
@@ -20,7 +20,9 @@
 #include 
 #include 
 #include 
+#include 
 
+#include "mnl_utils.h"
 #include "utils.h"
 #include "cmdl.h"
 #include "msg.h"
@@ -98,16 +100,28 @@ static int get_netid_cb(const struct nlmsghdr *nlh, void 
*data)
 
 static int generate_multicast(short af, char *buf, int bufsize)
 {
+   struct mnlu_gen_socket bearer_nlg;
struct nlmsghdr *nlh;
int netid;
+   int err = 0;
 
-   nlh = msg_init(TIPC_NL_NET_GET);
+   err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME,
+  TIPC_GENL_V2_VERSION);
+   if (err)
+   return -1;
+
+   nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_NET_GET,
+ NLM_F_REQUEST | NLM_F_DUMP);
if (!nlh) {
fprintf(stderr, "error, message initialization failed\n");
+   mnlu_gen_socket_close(_nlg);
return -1;
}
-   if (msg_dumpit(nlh, get_netid_cb, )) {
+
+   err = mnlu_gen_socket_sndrcv(_nlg, nlh, get_netid_cb, );
+   if (err) {
fprintf(stderr, "error, failed to fetch TIPC network id from 
kernel\n");
+   mnlu_gen_socket_close(_nlg);
return -EINVAL;
}
if (af == AF_INET)
@@ -115,6 +129,7 @@ static int generate_multicast(short af, char *buf, int 
bufsize)
else
snprintf(buf, bufsize, "ff02::%u", netid);
 
+   mnlu_gen_socket_close(_nlg);
return 0;
 }
 
@@ -794,10 +809,35 @@ static int bearer_get_udp_cb(const struct nlmsghdr *nlh, 
void *data)
if ((cb_data->attr == TIPC_NLA_UDP_REMOTE) &&
(cb_data->prop == UDP_PROP_IP) &&
opts[TIPC_NLA_UDP_MULTI_REMOTEIP]) {
-   struct genlmsghdr *genl = mnl_nlmsg_get_payload(cb_data->nlh);
+   struct mnlu_gen_socket bearer_nlg;
+   struct nlattr *attr;
+   struct nlmsghdr *h;
+   const char *bname;
+   int err = 0;
+
+   err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME,
+  TIPC_GENL_V2_VERSION);
+   if (err)
+   return -1;
+
+   h = mnlu_gen_socket_cmd_prepare(_nlg,
+   TIPC_NL_UDP_GET_REMOTEIP,
+   NLM_F_REQUEST | NLM_F_DUMP);
+   if (!h) {
+   fprintf(stderr, "error, message initialization 
failed\n");
+   mnlu_gen_socket_close(_nlg);
+   return -1;
+   }
 
-   genl->cmd = TIPC_NL_UDP_GET_REMOTEIP;
-   return msg_dumpit(cb_data->nlh, bearer_dump_udp_cb, NULL);
+   attr = mnl_attr_nest_start(h, TIPC_NLA_BEARER);
+   bname = mnl_attr_get_str(attrs[TIPC_NLA_BEARER_NAME]);
+   mnl_attr_put_strz(h, TIPC_NLA_BEARER_NAME, bname);
+   mnl_attr_nest_end(h, attr);
+
+   err = mnlu_gen_socket_sndrcv(_nlg, h,
+bearer_dump_udp_cb, NULL);
+   mnlu_gen_socket_close(_nlg);
+   return err;
}
 
addr = mnl_attr_get_payload(opts[cb_data->attr]);
diff --git a/tipc/link.c b/tipc/link.c
index 2123f109c694..9994ada2a367 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -17,7 +17,9 @@
 #include 
 #include 
 #include 
+#include 
 
+#include "mnl_utils.h"
 #include "cmdl.h"
 #include "msg.h"
 #include "link.h"
@@ -993,13 +995,20 @@ exit:
 
 static int link_mon_peer_list(uint32_t mon_ref)
 {
+   struct mnlu_gen_socket link_nlg;
struct nlmsghdr *nlh;
struct nlattr *nest;
int err = 0;
 
-   nlh = msg_init(TIPC_NL_MON_PEER_GET);
+   err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME,
+  TIPC_GENL_V2_VERSION);
+   if (err)
+   return -1;
+   nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_MON_PEER_GET,
+ NLM_F_REQUEST | NLM_F_DUMP);
if (!nlh) {
fprintf(stderr, "error, m

[tipc-discussion] [[re-send] net] tipc: make node link identity publish thread safe

2021-04-19 Thread Hoang Le
The using of the node address and node link identity are not thread safe,
meaning that two publications may be published the same values, as result
one of them will get failure because of already existing in the name table.
To avoid this we have to use the node address and node link identity values
from inside the node item's write lock protection.

Fixes: 50a3499ab853 ("tipc: simplify signature of tipc_namtbl_publish()")
Signed-off-by: Hoang Le 
---
 net/tipc/node.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 8217905348f4..81af92954c6c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(>lock);
 
if (flags & TIPC_NOTIFY_NODE_DOWN)
-   tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
+   tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
 
if (flags & TIPC_NOTIFY_NODE_UP)
-   tipc_named_node_up(net, n->addr, n->capabilities);
+   tipc_named_node_up(net, sk.node, n->capabilities);
 
if (flags & TIPC_NOTIFY_LINK_UP) {
-   tipc_mon_peer_up(net, n->addr, bearer_id);
-   tipc_nametbl_publish(net, , , n->link_id);
+   tipc_mon_peer_up(net, sk.node, bearer_id);
+   tipc_nametbl_publish(net, , , sk.ref);
}
if (flags & TIPC_NOTIFY_LINK_DOWN) {
-   tipc_mon_peer_down(net, n->addr, bearer_id);
-   tipc_nametbl_withdraw(net, , , n->link_id);
+   tipc_mon_peer_down(net, sk.node, bearer_id);
+   tipc_nametbl_withdraw(net, , , sk.ref);
}
 }
 
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net v2] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"

2021-04-19 Thread Hoang Le
This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046.
Above fix is not correct and caused memory leak issue.

Acked-by: Tung Nguyen 
Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 58935cd0d068..53af72824c9c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1262,7 +1262,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct 
sk_buff_head *arrvq,
spin_lock_bh(>lock);
if (skb_peek(arrvq) == skb) {
skb_queue_splice_tail_init(, inputq);
-   __skb_dequeue(arrvq);
+   /* Decrease the skb's refcnt as increasing in the
+* function tipc_skb_peek
+*/
+   kfree_skb(__skb_dequeue(arrvq));
}
spin_unlock_bh(>lock);
__skb_queue_purge();
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"

2021-04-12 Thread Hoang Le
This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046.

Above fix is not correct and caused memory leak issue:
In the function tipc_skb_peek, skb's refcnt increasing. Then
we have to call kfree_skb twice to decrease skb's refcnt and
free a skb.

Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 58935cd0d068..f21162aa0cf7 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1262,7 +1262,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct 
sk_buff_head *arrvq,
spin_lock_bh(>lock);
if (skb_peek(arrvq) == skb) {
skb_queue_splice_tail_init(, inputq);
-   __skb_dequeue(arrvq);
+   kfree_skb(__skb_dequeue(arrvq));
}
spin_unlock_bh(>lock);
__skb_queue_purge();
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next] tipc: call a sub-routine in separate socket

2021-04-11 Thread Hoang Le
When receiving a result from first query to netlink, we may exec
a another query inside the callback. If calling this sub-routine
in the same socket, it will be discarded the result from previous
exection.
To avoid this we perform a nested query in separate socket.

Fixes: 202102830663 ("tipc: use the libmnl functions in lib/mnl_utils.c")
Signed-off-by: Hoang Le 
---
 tipc/link.c   | 15 +--
 tipc/socket.c | 17 +++--
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/tipc/link.c b/tipc/link.c
index 2123f109c694..9994ada2a367 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -17,7 +17,9 @@
 #include 
 #include 
 #include 
+#include 
 
+#include "mnl_utils.h"
 #include "cmdl.h"
 #include "msg.h"
 #include "link.h"
@@ -993,13 +995,20 @@ exit:
 
 static int link_mon_peer_list(uint32_t mon_ref)
 {
+   struct mnlu_gen_socket link_nlg;
struct nlmsghdr *nlh;
struct nlattr *nest;
int err = 0;
 
-   nlh = msg_init(TIPC_NL_MON_PEER_GET);
+   err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME,
+  TIPC_GENL_V2_VERSION);
+   if (err)
+   return -1;
+   nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_MON_PEER_GET,
+ NLM_F_REQUEST | NLM_F_DUMP);
if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
+   mnlu_gen_socket_close(_nlg);
return -1;
}
 
@@ -1007,7 +1016,9 @@ static int link_mon_peer_list(uint32_t mon_ref)
mnl_attr_put_u32(nlh, TIPC_NLA_MON_REF, mon_ref);
mnl_attr_nest_end(nlh, nest);
 
-   err = msg_dumpit(nlh, link_mon_peer_list_cb, NULL);
+   err = mnlu_gen_socket_sndrcv(_nlg, nlh, link_mon_peer_list_cb,
+NULL);
+   mnlu_gen_socket_close(_nlg);
return err;
 }
 
diff --git a/tipc/socket.c b/tipc/socket.c
index deae12af4409..597ffd91af52 100644
--- a/tipc/socket.c
+++ b/tipc/socket.c
@@ -15,7 +15,9 @@
 #include 
 #include 
 #include 
+#include 
 
+#include "mnl_utils.h"
 #include "cmdl.h"
 #include "msg.h"
 #include "socket.h"
@@ -44,12 +46,21 @@ static int publ_list_cb(const struct nlmsghdr *nlh, void 
*data)
 
 static int publ_list(uint32_t sock)
 {
+   struct mnlu_gen_socket sock_nlg;
struct nlmsghdr *nlh;
struct nlattr *nest;
+   int err;
 
-   nlh = msg_init(TIPC_NL_PUBL_GET);
+   err = mnlu_gen_socket_open(_nlg, TIPC_GENL_V2_NAME,
+  TIPC_GENL_V2_VERSION);
+   if (err)
+   return -1;
+
+   nlh = mnlu_gen_socket_cmd_prepare(_nlg, TIPC_NL_PUBL_GET,
+ NLM_F_REQUEST | NLM_F_DUMP);
if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
+   mnlu_gen_socket_close(_nlg);
return -1;
}
 
@@ -57,7 +68,9 @@ static int publ_list(uint32_t sock)
mnl_attr_put_u32(nlh, TIPC_NLA_SOCK_REF, sock);
mnl_attr_nest_end(nlh, nest);
 
-   return msg_dumpit(nlh, publ_list_cb, NULL);
+   err = mnlu_gen_socket_sndrcv(_nlg, nlh, publ_list_cb, NULL);
+   mnlu_gen_socket_close(_nlg);
+   return err;
 }
 
 static int sock_list_cb(const struct nlmsghdr *nlh, void *data)
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next] tipc: use the libmnl functions in lib/mnl_utils.c

2021-03-31 Thread Hoang Le
To avoid code duplication, tipc should be converted to use the helper
functions for working with libmnl in lib/mnl_utils.c

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 tipc/bearer.c|  38 ++
 tipc/cmdl.c  |   2 -
 tipc/link.c  |  37 +
 tipc/media.c |  15 +++---
 tipc/msg.c   | 132 +++
 tipc/msg.h   |   2 +-
 tipc/nametable.c |   5 +-
 tipc/node.c  |  33 +---
 tipc/peer.c  |   8 ++-
 tipc/socket.c|  10 ++--
 tipc/tipc.c  |  21 +++-
 11 files changed, 83 insertions(+), 220 deletions(-)

diff --git a/tipc/bearer.c b/tipc/bearer.c
index 4470819e4a96..2afc48b9b108 100644
--- a/tipc/bearer.c
+++ b/tipc/bearer.c
@@ -21,9 +21,6 @@
 #include 
 #include 
 
-#include 
-#include 
-
 #include "utils.h"
 #include "cmdl.h"
 #include "msg.h"
@@ -101,11 +98,11 @@ static int get_netid_cb(const struct nlmsghdr *nlh, void 
*data)
 
 static int generate_multicast(short af, char *buf, int bufsize)
 {
-   int netid;
-   char mnl_msg[MNL_SOCKET_BUFFER_SIZE];
struct nlmsghdr *nlh;
+   int netid;
 
-   if (!(nlh = msg_init(mnl_msg, TIPC_NL_NET_GET))) {
+   nlh = msg_init(TIPC_NL_NET_GET);
+   if (!nlh) {
fprintf(stderr, "error, message initialization failed\n");
return -1;
}
@@ -399,7 +396,6 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const 
struct cmd *cmd,
 {
int err;
char *media;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct opt *opt;
struct nlattr *attrs;
struct opt opts[] = {
@@ -435,7 +431,8 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return -EINVAL;
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ADD))) {
+   nlh = msg_init(TIPC_NL_BEARER_ADD);
+   if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
@@ -482,7 +479,6 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const 
struct cmd *cmd,
int err;
struct opt *opt;
struct nlattr *nest;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct opt opts[] = {
{ "device", OPT_KEYVAL, NULL },
{ "domain", OPT_KEYVAL, NULL },
@@ -508,7 +504,8 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return -EINVAL;
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ENABLE))) {
+   nlh = msg_init(TIPC_NL_BEARER_ENABLE);
+   if (!nlh) {
fprintf(stderr, "error: message initialisation failed\n");
return -1;
}
@@ -563,7 +560,6 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const 
struct cmd *cmd,
  struct cmdl *cmdl, void *data)
 {
int err;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct nlattr *nest;
struct opt opts[] = {
{ "device", OPT_KEYVAL, NULL },
@@ -584,7 +580,8 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return -EINVAL;
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_DISABLE))) {
+   nlh = msg_init(TIPC_NL_BEARER_DISABLE);
+   if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
@@ -628,7 +625,6 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const 
struct cmd *cmd,
int err;
int val;
int prop;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct nlattr *props;
struct nlattr *attrs;
struct opt opts[] = {
@@ -675,7 +671,8 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const 
struct cmd *cmd,
}
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_SET))) {
+   nlh = msg_init(TIPC_NL_BEARER_SET);
+   if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
@@ -876,7 +873,6 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const 
struct cmd *cmd,
 {
int err;
char *media;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct opt *opt;
struct cb_data cb_data = {0};
struct nlattr *attrs;
@@ -918,7 +914,8 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return -EINVAL;
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_GET))) {
+   nlh = msg_init(TIPC_NL_BEARER_GET);
+   if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
@@ -956,7 +953,6 @@ static int cmd_bearer_get_prop(struct nlmsghdr *nlh, const 
struct cmd *cmd,
 {
 

[tipc-discussion] [net] tipc: fix unique bearer names sanity check

2021-03-31 Thread Hoang Le
When enabling a bearer by name, we don't sanity check its name with
higher slot in bearer list. This may have the effect that the name
of an already enabled bearer bypasses the check.

To fix the above issue, we just perform an extra checking with all
existing bearers.

Fixes: cb30a63384bc9 ("tipc: refactor function tipc_enable_bearer()")
Cc: sta...@vger.kernel.org
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 46 +++---
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index d47e0b940ac9..443f8e5b9477 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -256,6 +256,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
int bearer_id = 0;
int res = -EINVAL;
char *errstr = "";
+   u32 i;
 
if (!bearer_name_validate(name, _names)) {
errstr = "illegal name";
@@ -280,31 +281,38 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
prio = m->priority;
 
/* Check new bearer vs existing ones and find free bearer id if any */
-   while (bearer_id < MAX_BEARERS) {
-   b = rtnl_dereference(tn->bearer_list[bearer_id]);
-   if (!b)
-   break;
+   bearer_id = MAX_BEARERS;
+   i = MAX_BEARERS;
+   while (i-- != 0) {
+   b = rtnl_dereference(tn->bearer_list[i]);
+   if (!b) {
+   bearer_id = i;
+   continue;
+   }
if (!strcmp(name, b->name)) {
errstr = "already enabled";
NL_SET_ERR_MSG(extack, "Already enabled");
goto rejected;
}
-   bearer_id++;
-   if (b->priority != prio)
-   continue;
-   if (++with_this_prio <= 2)
-   continue;
-   pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
-   name, prio);
-   if (prio == TIPC_MIN_LINK_PRI) {
-   errstr = "cannot adjust to lower";
-   NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
-   goto rejected;
+
+   if (b->priority == prio &&
+   (++with_this_prio > 2)) {
+   pr_warn("Bearer <%s>: already 2 bearers with priority 
%u\n",
+   name, prio);
+
+   if (prio == TIPC_MIN_LINK_PRI) {
+   errstr = "cannot adjust to lower";
+   NL_SET_ERR_MSG(extack, "Cannot adjust to 
lower");
+   goto rejected;
+   }
+
+   pr_warn("Bearer <%s>: trying with adjusted priority\n",
+   name);
+   prio--;
+   bearer_id = MAX_BEARERS;
+   i = MAX_BEARERS;
+   with_this_prio = 1;
}
-   pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
-   prio--;
-   bearer_id = 0;
-   with_this_prio = 1;
}
 
if (bearer_id >= MAX_BEARERS) {
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net v2] tipc: fix unique bearer names sanity check

2021-03-29 Thread Hoang Le
When enabling a bearer with identify by name, we don't sanity check
its name with higher slot in bearer list. This lead to duplicate
bearer names bypassed the check.

To fix the above issue, we just perform an extra checking with all
existing bearers.

Fixes: cb30a63384bc9 ("tipc: refactor function tipc_enable_bearer()")
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 45 ++---
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index d47e0b940ac9..94eddc67d52e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -256,6 +256,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
int bearer_id = 0;
int res = -EINVAL;
char *errstr = "";
+   u32 i;
 
if (!bearer_name_validate(name, _names)) {
errstr = "illegal name";
@@ -280,31 +281,37 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
prio = m->priority;
 
/* Check new bearer vs existing ones and find free bearer id if any */
-   while (bearer_id < MAX_BEARERS) {
-   b = rtnl_dereference(tn->bearer_list[bearer_id]);
-   if (!b)
-   break;
+   bearer_id = MAX_BEARERS;
+   i = MAX_BEARERS;
+   while (i-- != 0) {
+   b = rtnl_dereference(tn->bearer_list[i]);
+   if (!b) {
+   bearer_id = i;
+   continue;
+   }
if (!strcmp(name, b->name)) {
errstr = "already enabled";
NL_SET_ERR_MSG(extack, "Already enabled");
goto rejected;
}
-   bearer_id++;
-   if (b->priority != prio)
-   continue;
-   if (++with_this_prio <= 2)
-   continue;
-   pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
-   name, prio);
-   if (prio == TIPC_MIN_LINK_PRI) {
-   errstr = "cannot adjust to lower";
-   NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
-   goto rejected;
+
+   if (b->priority == prio &&
+   (++with_this_prio > 2)) {
+   pr_warn("Bearer <%s>: already 2 bearers with priority 
%u\n",
+   name, prio);
+
+   if (prio == TIPC_MIN_LINK_PRI) {
+   errstr = "cannot adjust to lower";
+   NL_SET_ERR_MSG(extack, "Cannot adjust to 
lower");
+   goto rejected;
+   }
+
+   pr_warn("Bearer <%s>: trying with adjusted priority\n", 
name);
+   prio--;
+   bearer_id = MAX_BEARERS;
+   i = MAX_BEARERS;
+   with_this_prio = 1;
}
-   pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
-   prio--;
-   bearer_id = 0;
-   with_this_prio = 1;
}
 
if (bearer_id >= MAX_BEARERS) {
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next] tipc: use the libmnl functions in lib/mnl_utils.c

2021-03-29 Thread Hoang Le
To avoid duplication, tipc should be converted to use the helper
functions for working with libmnl in lib/mnl_utils.c

Signed-off-by: Hoang Le 
---
 tipc/bearer.c|  38 ++
 tipc/cmdl.c  |   2 -
 tipc/link.c  |  37 +
 tipc/media.c |  15 +++---
 tipc/msg.c   | 132 +++
 tipc/msg.h   |   2 +-
 tipc/nametable.c |   5 +-
 tipc/node.c  |  33 +---
 tipc/peer.c  |   8 ++-
 tipc/socket.c|  10 ++--
 tipc/tipc.c  |  21 +++-
 11 files changed, 83 insertions(+), 220 deletions(-)

diff --git a/tipc/bearer.c b/tipc/bearer.c
index 4470819e4a96..2afc48b9b108 100644
--- a/tipc/bearer.c
+++ b/tipc/bearer.c
@@ -21,9 +21,6 @@
 #include 
 #include 
 
-#include 
-#include 
-
 #include "utils.h"
 #include "cmdl.h"
 #include "msg.h"
@@ -101,11 +98,11 @@ static int get_netid_cb(const struct nlmsghdr *nlh, void 
*data)
 
 static int generate_multicast(short af, char *buf, int bufsize)
 {
-   int netid;
-   char mnl_msg[MNL_SOCKET_BUFFER_SIZE];
struct nlmsghdr *nlh;
+   int netid;
 
-   if (!(nlh = msg_init(mnl_msg, TIPC_NL_NET_GET))) {
+   nlh = msg_init(TIPC_NL_NET_GET);
+   if (!nlh) {
fprintf(stderr, "error, message initialization failed\n");
return -1;
}
@@ -399,7 +396,6 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const 
struct cmd *cmd,
 {
int err;
char *media;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct opt *opt;
struct nlattr *attrs;
struct opt opts[] = {
@@ -435,7 +431,8 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return -EINVAL;
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ADD))) {
+   nlh = msg_init(TIPC_NL_BEARER_ADD);
+   if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
@@ -482,7 +479,6 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const 
struct cmd *cmd,
int err;
struct opt *opt;
struct nlattr *nest;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct opt opts[] = {
{ "device", OPT_KEYVAL, NULL },
{ "domain", OPT_KEYVAL, NULL },
@@ -508,7 +504,8 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return -EINVAL;
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ENABLE))) {
+   nlh = msg_init(TIPC_NL_BEARER_ENABLE);
+   if (!nlh) {
fprintf(stderr, "error: message initialisation failed\n");
return -1;
}
@@ -563,7 +560,6 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const 
struct cmd *cmd,
  struct cmdl *cmdl, void *data)
 {
int err;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct nlattr *nest;
struct opt opts[] = {
{ "device", OPT_KEYVAL, NULL },
@@ -584,7 +580,8 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return -EINVAL;
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_DISABLE))) {
+   nlh = msg_init(TIPC_NL_BEARER_DISABLE);
+   if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
@@ -628,7 +625,6 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const 
struct cmd *cmd,
int err;
int val;
int prop;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct nlattr *props;
struct nlattr *attrs;
struct opt opts[] = {
@@ -675,7 +671,8 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const 
struct cmd *cmd,
}
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_SET))) {
+   nlh = msg_init(TIPC_NL_BEARER_SET);
+   if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
@@ -876,7 +873,6 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const 
struct cmd *cmd,
 {
int err;
char *media;
-   char buf[MNL_SOCKET_BUFFER_SIZE];
struct opt *opt;
struct cb_data cb_data = {0};
struct nlattr *attrs;
@@ -918,7 +914,8 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return -EINVAL;
}
 
-   if (!(nlh = msg_init(buf, TIPC_NL_BEARER_GET))) {
+   nlh = msg_init(TIPC_NL_BEARER_GET);
+   if (!nlh) {
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
@@ -956,7 +953,6 @@ static int cmd_bearer_get_prop(struct nlmsghdr *nlh, const 
struct cmd *cmd,
 {
in

[tipc-discussion] [net] tipc: fix unique bearer names sanity check

2021-03-29 Thread Hoang Le
When enabling a bearer with identify by name, we don't sanity check
its name with higher slot in bearer list. This lead to duplicate
bearer names bypassed the check.

To fix the above issue, we just perform an extra checking with all
existing bearers.

Fixes: cb30a63384bc9 ("tipc: refactor function tipc_enable_bearer()")
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index d47e0b940ac9..6fae68f0e654 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -256,6 +256,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
int bearer_id = 0;
int res = -EINVAL;
char *errstr = "";
+   u32 i;
 
if (!bearer_name_validate(name, _names)) {
errstr = "illegal name";
@@ -313,6 +314,18 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
goto rejected;
}
 
+   /* Check new bearer from free slot up to MAX_BEARERS */
+   for (i = bearer_id + 1; i <= MAX_BEARERS; ++i) {
+   b = rtnl_dereference(tn->bearer_list[i]);
+   if (!b)
+   continue;
+   if (!strcmp(name, b->name)) {
+   errstr = "already enabled";
+   NL_SET_ERR_MSG(extack, "Already enabled");
+   goto rejected;
+   }
+   }
+
b = kzalloc(sizeof(*b), GFP_ATOMIC);
if (!b)
return -ENOMEM;
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: fix kernel-doc warnings

2021-03-26 Thread Hoang Le
Fix kernel-doc warning introduced in
commit b83e214b2e04 ("tipc: add extack messages for bearer/media failure"):

net/tipc/bearer.c:248: warning: Function parameter or member 'extack' not 
described in 'tipc_enable_bearer'

Fixes: b83e214b2e04 ("tipc: add extack messages for bearer/media failure")
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 1090f21fcfac..d47e0b940ac9 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -240,6 +240,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 
bearer_id, u32 dest)
  * @disc_domain: bearer domain
  * @prio: bearer priority
  * @attr: nlattr array
+ * @extack: netlink extended ack
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
  u32 disc_domain, u32 prio,
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next] tipc: add support for the netlink extack

2021-03-24 Thread Hoang Le
Add support extack in tipc to dump the netlink extack error messages
(i.e -EINVAL) sent from kernel.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 tipc/msg.c | 29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/tipc/msg.c b/tipc/msg.c
index dc09d05048f3..f29b2f8d35ad 100644
--- a/tipc/msg.c
+++ b/tipc/msg.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "libnetlink.h"
 #include "msg.h"
 
 int parse_attrs(const struct nlattr *attr, void *data)
@@ -49,6 +50,7 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
 {
int ret;
struct mnl_socket *nl;
+   int one = 1;
 
nl = mnl_socket_open(NETLINK_GENERIC);
if (nl == NULL) {
@@ -56,6 +58,8 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
return NULL;
}
 
+   /* support to get extended ACK */
+   mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, , sizeof(one));
ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID);
if (ret < 0) {
perror("mnl_socket_bind");
@@ -73,21 +77,32 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
 
 static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int 
seq)
 {
-   int ret;
unsigned int portid;
char buf[MNL_SOCKET_BUFFER_SIZE];
+   struct nlmsghdr *h;
+   size_t num_bytes;
+   int is_err = 0;
+   int ret = 0;
 
portid = mnl_socket_get_portid(nl);
 
-   ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
-   while (ret > 0) {
-   ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
+   num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   while (num_bytes > 0) {
+   ret = mnl_cb_run(buf, num_bytes, seq, portid, callback, data);
if (ret <= 0)
break;
-   ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   }
+
+   if (ret == -1) {
+   if (num_bytes > 0) {
+   h = (struct nlmsghdr *)buf;
+   is_err = nl_dump_ext_ack(h, NULL);
+   }
+
+   if (!is_err)
+   perror("error");
}
-   if (ret == -1)
-   perror("error");
 
mnl_socket_close(nl);
 
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: add extack messages for bearer/media failure

2021-03-24 Thread Hoang Le
Add extack error messages for -EINVAL errors when enabling bearer,
getting/setting properties for a media/bearer

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 50 +--
 1 file changed, 40 insertions(+), 10 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a4389ef08a98..1090f21fcfac 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -243,7 +243,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 
bearer_id, u32 dest)
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
  u32 disc_domain, u32 prio,
- struct nlattr *attr[])
+ struct nlattr *attr[],
+ struct netlink_ext_ack *extack)
 {
struct tipc_net *tn = tipc_net(net);
struct tipc_bearer_names b_names;
@@ -257,17 +258,20 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
 
if (!bearer_name_validate(name, _names)) {
errstr = "illegal name";
+   NL_SET_ERR_MSG(extack, "Illegal name");
goto rejected;
}
 
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
errstr = "illegal priority";
+   NL_SET_ERR_MSG(extack, "Illegal priority");
goto rejected;
}
 
m = tipc_media_find(b_names.media_name);
if (!m) {
errstr = "media not registered";
+   NL_SET_ERR_MSG(extack, "Media not registered");
goto rejected;
}
 
@@ -281,6 +285,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
break;
if (!strcmp(name, b->name)) {
errstr = "already enabled";
+   NL_SET_ERR_MSG(extack, "Already enabled");
goto rejected;
}
bearer_id++;
@@ -292,6 +297,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
name, prio);
if (prio == TIPC_MIN_LINK_PRI) {
errstr = "cannot adjust to lower";
+   NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
goto rejected;
}
pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
@@ -302,6 +308,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
 
if (bearer_id >= MAX_BEARERS) {
errstr = "max 3 bearers permitted";
+   NL_SET_ERR_MSG(extack, "Max 3 bearers permitted");
goto rejected;
}
 
@@ -315,6 +322,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
if (res) {
kfree(b);
errstr = "failed to enable media";
+   NL_SET_ERR_MSG(extack, "Failed to enable media");
goto rejected;
}
 
@@ -331,6 +339,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
if (res) {
bearer_disable(net, b);
errstr = "failed to create discoverer";
+   NL_SET_ERR_MSG(extack, "Failed to create discoverer");
goto rejected;
}
 
@@ -909,6 +918,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct 
genl_info *info)
bearer = tipc_bearer_find(net, name);
if (!bearer) {
err = -EINVAL;
+   NL_SET_ERR_MSG(info->extack, "Bearer not found");
goto err_out;
}
 
@@ -948,8 +958,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct 
genl_info *info)
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
bearer = tipc_bearer_find(net, name);
-   if (!bearer)
+   if (!bearer) {
+   NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
+   }
 
bearer_disable(net, bearer);
 
@@ -1007,7 +1019,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct 
genl_info *info)
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
}
 
-   return tipc_enable_bearer(net, bearer, domain, prio, attrs);
+   return tipc_enable_bearer(net, bearer, domain, prio, attrs,
+ info->extack);
 }
 
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
@@ -1046,6 +1059,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct 
genl_info *info)
b = tipc_bearer_find(net, name);
if (!b) {
rtnl_unlock();
+   NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL

[tipc-discussion] [iproute2-next v2] tipc: add support for the netlink extack

2021-03-23 Thread Hoang Le
Add support extack in tipc to dump the netlink extack error messages
(i.e -EINVAL) sent from kernel.

Signed-off-by: Hoang Le 
---
 tipc/msg.c | 29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/tipc/msg.c b/tipc/msg.c
index dc09d05048f3..f29b2f8d35ad 100644
--- a/tipc/msg.c
+++ b/tipc/msg.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "libnetlink.h"
 #include "msg.h"
 
 int parse_attrs(const struct nlattr *attr, void *data)
@@ -49,6 +50,7 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
 {
int ret;
struct mnl_socket *nl;
+   int one = 1;
 
nl = mnl_socket_open(NETLINK_GENERIC);
if (nl == NULL) {
@@ -56,6 +58,8 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
return NULL;
}
 
+   /* support to get extended ACK */
+   mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, , sizeof(one));
ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID);
if (ret < 0) {
perror("mnl_socket_bind");
@@ -73,21 +77,32 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
 
 static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int 
seq)
 {
-   int ret;
unsigned int portid;
char buf[MNL_SOCKET_BUFFER_SIZE];
+   struct nlmsghdr *h;
+   size_t num_bytes;
+   int is_err = 0;
+   int ret = 0;
 
portid = mnl_socket_get_portid(nl);
 
-   ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
-   while (ret > 0) {
-   ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
+   num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   while (num_bytes > 0) {
+   ret = mnl_cb_run(buf, num_bytes, seq, portid, callback, data);
if (ret <= 0)
break;
-   ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   }
+
+   if (ret == -1) {
+   if (num_bytes > 0) {
+   h = (struct nlmsghdr *)buf;
+   is_err = nl_dump_ext_ack(h, NULL);
+   }
+
+   if (!is_err)
+   perror("error");
}
-   if (ret == -1)
-   perror("error");
 
mnl_socket_close(nl);
 
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next v2] tipc: add support for the netlink extack

2021-03-23 Thread Hoang Le
Add support extack in tipc to dump the netlink extack error messages
(i.e -EINVAL) sent from kernel.

Signed-off-by: Hoang Le 
---
 tipc/msg.c | 29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/tipc/msg.c b/tipc/msg.c
index dc09d05048f3..f29b2f8d35ad 100644
--- a/tipc/msg.c
+++ b/tipc/msg.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "libnetlink.h"
 #include "msg.h"
 
 int parse_attrs(const struct nlattr *attr, void *data)
@@ -49,6 +50,7 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
 {
int ret;
struct mnl_socket *nl;
+   int one = 1;
 
nl = mnl_socket_open(NETLINK_GENERIC);
if (nl == NULL) {
@@ -56,6 +58,8 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
return NULL;
}
 
+   /* support to get extended ACK */
+   mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, , sizeof(one));
ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID);
if (ret < 0) {
perror("mnl_socket_bind");
@@ -73,21 +77,32 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
 
 static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int 
seq)
 {
-   int ret;
unsigned int portid;
char buf[MNL_SOCKET_BUFFER_SIZE];
+   struct nlmsghdr *h;
+   size_t num_bytes;
+   int is_err = 0;
+   int ret = 0;
 
portid = mnl_socket_get_portid(nl);
 
-   ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
-   while (ret > 0) {
-   ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
+   num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   while (num_bytes > 0) {
+   ret = mnl_cb_run(buf, num_bytes, seq, portid, callback, data);
if (ret <= 0)
break;
-   ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   }
+
+   if (ret == -1) {
+   if (num_bytes > 0) {
+   h = (struct nlmsghdr *)buf;
+   is_err = nl_dump_ext_ack(h, NULL);
+   }
+
+   if (!is_err)
+   perror("error");
}
-   if (ret == -1)
-   perror("error");
 
mnl_socket_close(nl);
 
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next] tipc: add support for the netlink extack

2021-03-22 Thread Hoang Le
Add support extack in tipc to dump the netlink extack error messages
(i.e -EINVAL) sent from kernel.

Signed-off-by: Hoang Le 
---
 tipc/msg.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/tipc/msg.c b/tipc/msg.c
index dc09d05048f3..57ece0d13194 100644
--- a/tipc/msg.c
+++ b/tipc/msg.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "libnetlink.h"
 #include "msg.h"
 
 int parse_attrs(const struct nlattr *attr, void *data)
@@ -49,6 +50,7 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
 {
int ret;
struct mnl_socket *nl;
+   int one = 1;
 
nl = mnl_socket_open(NETLINK_GENERIC);
if (nl == NULL) {
@@ -56,6 +58,8 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
return NULL;
}
 
+   /* support to get extended ACK */
+   mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, , sizeof(one));
ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID);
if (ret < 0) {
perror("mnl_socket_bind");
@@ -73,21 +77,28 @@ static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
 
 static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int 
seq)
 {
-   int ret;
unsigned int portid;
char buf[MNL_SOCKET_BUFFER_SIZE];
+   struct nlmsghdr *h;
+   size_t num_bytes;
+   int ret = 0;
 
portid = mnl_socket_get_portid(nl);
 
-   ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
-   while (ret > 0) {
-   ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
+   num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   while (num_bytes > 0) {
+   ret = mnl_cb_run(buf, num_bytes, seq, portid, callback, data);
if (ret <= 0)
break;
-   ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+   num_bytes = mnl_socket_recvfrom(nl, buf, sizeof(buf));
}
-   if (ret == -1)
+
+   if (num_bytes > 0 && ret <= 0) {
+   h = (struct nlmsghdr *)buf;
+   nl_dump_ext_ack(h, NULL);
+   } else {
perror("error");
+   }
 
mnl_socket_close(nl);
 
-- 
2.25.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: add extack messages for bearer/media failure

2021-03-22 Thread Hoang Le
Add extack error messages for -EINVAL errors when enabling bearer,
getting/setting properties for a media/bearer

Signed-off-by: Hoang Le 
---
 net/tipc/bearer.c | 50 +--
 1 file changed, 40 insertions(+), 10 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a4389ef08a98..1090f21fcfac 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -243,7 +243,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 
bearer_id, u32 dest)
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
  u32 disc_domain, u32 prio,
- struct nlattr *attr[])
+ struct nlattr *attr[],
+ struct netlink_ext_ack *extack)
 {
struct tipc_net *tn = tipc_net(net);
struct tipc_bearer_names b_names;
@@ -257,17 +258,20 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
 
if (!bearer_name_validate(name, _names)) {
errstr = "illegal name";
+   NL_SET_ERR_MSG(extack, "Illegal name");
goto rejected;
}
 
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
errstr = "illegal priority";
+   NL_SET_ERR_MSG(extack, "Illegal priority");
goto rejected;
}
 
m = tipc_media_find(b_names.media_name);
if (!m) {
errstr = "media not registered";
+   NL_SET_ERR_MSG(extack, "Media not registered");
goto rejected;
}
 
@@ -281,6 +285,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
break;
if (!strcmp(name, b->name)) {
errstr = "already enabled";
+   NL_SET_ERR_MSG(extack, "Already enabled");
goto rejected;
}
bearer_id++;
@@ -292,6 +297,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
name, prio);
if (prio == TIPC_MIN_LINK_PRI) {
errstr = "cannot adjust to lower";
+   NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
goto rejected;
}
pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
@@ -302,6 +308,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
 
if (bearer_id >= MAX_BEARERS) {
errstr = "max 3 bearers permitted";
+   NL_SET_ERR_MSG(extack, "Max 3 bearers permitted");
goto rejected;
}
 
@@ -315,6 +322,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
if (res) {
kfree(b);
errstr = "failed to enable media";
+   NL_SET_ERR_MSG(extack, "Failed to enable media");
goto rejected;
}
 
@@ -331,6 +339,7 @@ static int tipc_enable_bearer(struct net *net, const char 
*name,
if (res) {
bearer_disable(net, b);
errstr = "failed to create discoverer";
+   NL_SET_ERR_MSG(extack, "Failed to create discoverer");
goto rejected;
}
 
@@ -909,6 +918,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct 
genl_info *info)
bearer = tipc_bearer_find(net, name);
if (!bearer) {
err = -EINVAL;
+   NL_SET_ERR_MSG(info->extack, "Bearer not found");
goto err_out;
}
 
@@ -948,8 +958,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct 
genl_info *info)
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
bearer = tipc_bearer_find(net, name);
-   if (!bearer)
+   if (!bearer) {
+   NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
+   }
 
bearer_disable(net, bearer);
 
@@ -1007,7 +1019,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct 
genl_info *info)
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
}
 
-   return tipc_enable_bearer(net, bearer, domain, prio, attrs);
+   return tipc_enable_bearer(net, bearer, domain, prio, attrs,
+ info->extack);
 }
 
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
@@ -1046,6 +1059,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct 
genl_info *info)
b = tipc_bearer_find(net, name);
if (!b) {
rtnl_unlock();
+   NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
}
 
@@ -1086,8 +1100,10 @@ in

[tipc-discussion] [net-next] tipc: Add a missing case of TIPC_DIRECT_MSG type

2020-03-25 Thread Hoang Le
In the commit f73b12812a3d
("tipc: improve throughput between nodes in netns"), we're missing a check
to handle TIPC_DIRECT_MSG type, it's still using old sending mechanism for
this message type. So, throughput improvement is not significant as
expected.

Besides that, when sending a large message with that type, we're also
handle wrong receiving queue, it should be enqueued in socket receiving
instead of multicast messages.

Fix this by adding the missing case for TIPC_DIRECT_MSG.

Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns")
Reported-by: Tuong Lien 
Signed-off-by: Hoang Le 
---
 net/tipc/msg.h| 5 +
 net/tipc/node.c   | 3 ++-
 net/tipc/socket.c | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 6d466ebdb64f..871feadbbc19 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -394,6 +394,11 @@ static inline u32 msg_connected(struct tipc_msg *m)
return msg_type(m) == TIPC_CONN_MSG;
 }
 
+static inline u32 msg_direct(struct tipc_msg *m)
+{
+   return msg_type(m) == TIPC_DIRECT_MSG;
+}
+
 static inline u32 msg_errcode(struct tipc_msg *m)
 {
return msg_bits(m, 1, 25, 0xf);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0c88778c88b5..10292c942384 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1586,7 +1586,8 @@ static void tipc_lxc_xmit(struct net *peer_net, struct 
sk_buff_head *list)
case TIPC_MEDIUM_IMPORTANCE:
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
-   if (msg_connected(hdr) || msg_named(hdr)) {
+   if (msg_connected(hdr) || msg_named(hdr) ||
+   msg_direct(hdr)) {
tipc_loopback_trace(peer_net, list);
spin_lock_init(>lock);
tipc_sk_rcv(peer_net, list);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 693e8902161e..87466607097f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1461,7 +1461,7 @@ static int __tipc_sendmsg(struct socket *sock, struct 
msghdr *m, size_t dlen)
}
 
__skb_queue_head_init();
-   mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
+   mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, );
if (unlikely(rc != dlen))
return rc;
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: simplify trivial boolean return

2020-02-20 Thread Hoang Le
Checking and returning 'true' boolean is useless as it will be
returning at end of function

Signed-off-by: Hoang Le 
---
 net/tipc/msg.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 0d515d20b056..4d0e0bdd997b 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -736,9 +736,6 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff 
*skb, int *err)
msg_set_destport(msg, dport);
*err = TIPC_OK;
 
-   if (!skb_cloned(skb))
-   return true;
-
return true;
 }
 
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: update replicast capability for broadcast send link

2019-11-20 Thread Hoang Le
When setting up a cluster with non-replicast/replicast capability
supported. This capability will be disabled for broadcast send link
in order to be backwards compatible.

However, when these non-support nodes left and be removed out the cluster.
We don't update this capability on broadcast send link. Then, some of
features that based on this capability will also disabling as unexpected.

In this commit, we make sure the broadcast send link capabilities will
be re-calculated as soon as a node removed/rejoined a cluster.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c | 4 ++--
 net/tipc/bcast.h | 2 +-
 net/tipc/link.c  | 2 +-
 net/tipc/node.c  | 8 +++-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index f41096a759fa..55aeba681cf4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -87,9 +87,9 @@ int tipc_bcast_get_mtu(struct net *net)
return tipc_link_mss(tipc_bc_sndlink(net));
 }
 
-void tipc_bcast_disable_rcast(struct net *net)
+void tipc_bcast_toggle_rcast(struct net *net, bool supp)
 {
-   tipc_bc_base(net)->rcast_support = false;
+   tipc_bc_base(net)->rcast_support = supp;
 }
 
 static void tipc_bcbase_calc_bc_threshold(struct net *net)
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index dadad953e2be..9e847d9617d3 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -85,7 +85,7 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link 
*rcv_bcl);
 void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
 void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
 int  tipc_bcast_get_mtu(struct net *net);
-void tipc_bcast_disable_rcast(struct net *net);
+void tipc_bcast_toggle_rcast(struct net *net, bool supp);
 int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_mc_method *method, struct tipc_nlist *dests,
u16 *cong_link_cnt);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index fb72031228c9..24d4d10756d3 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -550,7 +550,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 
peer,
 
/* Disable replicast if even a single peer doesn't support it */
if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST))
-   tipc_bcast_disable_rcast(net);
+   tipc_bcast_toggle_rcast(net, false);
 
return true;
 }
diff --git a/net/tipc/node.c b/net/tipc/node.c
index aaf595613e6e..ab04e00cb95b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -496,6 +496,9 @@ struct tipc_node *tipc_node_create(struct net *net, u32 
addr, u8 *peer_id,
tn->capabilities &= temp_node->capabilities;
}
 
+   tipc_bcast_toggle_rcast(net,
+   (tn->capabilities & TIPC_BCAST_RCAST));
+
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -557,6 +560,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 
addr, u8 *peer_id,
list_for_each_entry_rcu(temp_node, >node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+   tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
trace_tipc_node_create(n, true, " ");
 exit:
spin_unlock_bh(>node_list_lock);
@@ -740,7 +744,8 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
list_for_each_entry_rcu(temp_node, >node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
-
+   tipc_bcast_toggle_rcast(peer->net,
+   (tn->capabilities & TIPC_BCAST_RCAST));
spin_unlock_bh(>node_list_lock);
return deleted;
 }
@@ -2198,6 +2203,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info 
*info)
list_for_each_entry_rcu(temp_node, >node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+   tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
err = 0;
 err_out:
tipc_node_put(peer);
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: update replicast capability for broadcast send link

2019-11-20 Thread Hoang Le
When setting up a cluster with non-replicast/replicast capability
supported. This capability will be disabled for broadcast send link
in order to be backwards compatible.

However, when these non-support nodes left and be removed out the cluster.
We don't update this capability on broadcast send link. Then, some of
features that based on this capability will also disabling as unexpected.

In this commit, we make sure the broadcast send link capabilities will
be re-calculated as soon as a node removed/rejoined a cluster.

Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c | 4 ++--
 net/tipc/bcast.h | 2 +-
 net/tipc/link.c  | 2 +-
 net/tipc/node.c  | 8 +++-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 44ed481fec47..3d14e60ef642 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -87,9 +87,9 @@ int tipc_bcast_get_mtu(struct net *net)
return tipc_link_mss(tipc_bc_sndlink(net));
 }
 
-void tipc_bcast_disable_rcast(struct net *net)
+void tipc_bcast_toggle_rcast(struct net *net, bool supp)
 {
-   tipc_bc_base(net)->rcast_support = false;
+   tipc_bc_base(net)->rcast_support = supp;
 }
 
 static void tipc_bcbase_calc_bc_threshold(struct net *net)
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index dadad953e2be..9e847d9617d3 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -85,7 +85,7 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link 
*rcv_bcl);
 void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
 void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
 int  tipc_bcast_get_mtu(struct net *net);
-void tipc_bcast_disable_rcast(struct net *net);
+void tipc_bcast_toggle_rcast(struct net *net, bool supp);
 int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_mc_method *method, struct tipc_nlist *dests,
u16 *cong_link_cnt);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a2e9a64d5a0f..5153b9bb7b3f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -550,7 +550,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 
peer,
 
/* Disable replicast if even a single peer doesn't support it */
if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST))
-   tipc_bcast_disable_rcast(net);
+   tipc_bcast_toggle_rcast(net, false);
 
return true;
 }
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b058647fa78b..b9f6b5dfdb5b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -496,6 +496,9 @@ struct tipc_node *tipc_node_create(struct net *net, u32 
addr, u8 *peer_id,
tn->capabilities &= temp_node->capabilities;
}
 
+   tipc_bcast_toggle_rcast(net,
+   (tn->capabilities & TIPC_BCAST_RCAST));
+
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -557,6 +560,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 
addr, u8 *peer_id,
list_for_each_entry_rcu(temp_node, >node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+   tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
trace_tipc_node_create(n, true, " ");
 exit:
spin_unlock_bh(>node_list_lock);
@@ -740,7 +744,8 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
list_for_each_entry_rcu(temp_node, >node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
-
+   tipc_bcast_toggle_rcast(peer->net,
+   (tn->capabilities & TIPC_BCAST_RCAST));
spin_unlock_bh(>node_list_lock);
return deleted;
 }
@@ -2198,6 +2203,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info 
*info)
list_for_each_entry_rcu(temp_node, >node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+   tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
err = 0;
 err_out:
tipc_node_put(peer);
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net-next] tipc: update a binding service via broadcast

2019-11-14 Thread Hoang Le
Hi Jon,

Please take a look at v2. The mechanism looks the same as I did before in 
commit:
c55c8edafa91 ("tipc: smooth change between replicast and broadcast")
However, in this case we handle only one direction:  replicast -> broadcast.
Then, it is still backward compatible.

[...]
>From ae2ee6a7064de3ec1dc2c7df2db241d22b0d129f Mon Sep 17 00:00:00 2001
From: Hoang Le 
Date: Wed, 13 Nov 2019 14:01:03 +0700
Subject: [PATCH] tipc: update a binding service via broadcast

Currently, updating binding table (add service binding to
name table/withdraw a service binding) is being sent over replicast.
However, if we are scaling up clusters to > 100 nodes/containers this
method is less affection because of looping through nodes in a cluster one
by one.

It is worth to use broadcast to update a binding service. Then binding
table updates in all nodes for one shot.

The mechanism is backward compatible because of sending side changing.

v2: resolve synchronization problem when switching from unicast to
broadcast

Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c  | 13 +
 net/tipc/bcast.h  |  2 ++
 net/tipc/link.c   | 16 
 net/tipc/name_distr.c |  8 
 net/tipc/name_table.c |  9 ++---
 5 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index f41096a759fa..18431fa897ab 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct 
sk_buff_head *defq,
__skb_queue_tail(inputq, _skb);
}
 }
+
+int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb)
+{
+   struct sk_buff_head xmitq;
+   u16 cong_link_cnt;
+   int rc = 0;
+
+   __skb_queue_head_init();
+   __skb_queue_tail(, skb);
+   rc = tipc_bcast_xmit(net, , _link_cnt);
+   __skb_queue_purge();
+   return rc;
+}
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index dadad953e2be..a100da3800fc 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net);
 u32 tipc_bcast_get_broadcast_mode(struct net *net);
 u32 tipc_bcast_get_broadcast_ratio(struct net *net);

+int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb);
+
 void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
   struct sk_buff_head *inputq);

diff --git a/net/tipc/link.c b/net/tipc/link.c
index fb72031228c9..22f1854435df 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -187,6 +187,9 @@ struct tipc_link {
struct tipc_link *bc_sndlink;
u8 nack_state;
bool bc_peer_is_up;
+   bool named_sync;
+   struct sk_buff_head defer_namedq;
+

/* Statistics */
struct tipc_stats stats;
@@ -363,6 +366,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
tipc_link_reset(rcv_l);
rcv_l->state = LINK_RESET;
+   rcv_l->named_sync = false;
if (!snd_l->ackers) {
trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!");
tipc_link_reset(snd_l);
@@ -508,6 +512,7 @@ bool tipc_link_create(struct net *net, char *if_name, int 
bearer_id,
__skb_queue_head_init(>failover_deferdq);
skb_queue_head_init(>wakeupq);
skb_queue_head_init(l->inputq);
+   __skb_queue_head_init(>defer_namedq);
return true;
 }

@@ -932,6 +937,8 @@ void tipc_link_reset(struct tipc_link *l)
l->silent_intv_cnt = 0;
l->rst_cnt = 0;
l->bc_peer_is_up = false;
+   l->named_sync = false;
+   __skb_queue_purge(>defer_namedq);
memset(>mon_state, 0, sizeof(l->mon_state));
tipc_link_reset_stats(l);
 }
@@ -1210,6 +1217,15 @@ static bool tipc_data_input(struct tipc_link *l, struct 
sk_buff *skb,
return true;
case NAME_DISTRIBUTOR:
l->bc_rcvlink->state = LINK_ESTABLISHED;
+   if (msg_is_syn(hdr)) {
+   l->bc_rcvlink->named_sync = true;
+   skb_queue_splice_tail_init(>defer_namedq, l->namedq);
+   return true;
+   }
+   if (msg_is_rcast(hdr) && !l->bc_rcvlink->named_sync) {
+   skb_queue_tail(>defer_namedq, skb);
+   return true;
+   }
skb_queue_tail(l->namedq, skb);
return true;
case MSG_BUNDLER:
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 5feaf3b67380..419b3f0f102d 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -180,6 +180,14 @@ static void named_distribute(struct net *net, struct 
sk_buff_head *list,
skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
__

[tipc-discussion] [net-next] tipc: update a binding service via broadcast

2019-11-12 Thread Hoang Le
Currently, updating binding table (add service binding to
name table/withdraw a service binding) is being sent over replicast.
However, if we are scaling up clusters to > 100 nodes/containers this
method is less affection because of looping through nodes in a cluster one
by one.

It is worth to use broadcast to update a binding service. Then binding
table updates in all nodes for one shot.

The mechanism is backward compatible because of sending side changing.

Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c  | 13 +
 net/tipc/bcast.h  |  2 ++
 net/tipc/name_table.c |  4 ++--
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index f41096a759fa..18431fa897ab 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct 
sk_buff_head *defq,
__skb_queue_tail(inputq, _skb);
}
 }
+
+int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb)
+{
+   struct sk_buff_head xmitq;
+   u16 cong_link_cnt;
+   int rc = 0;
+
+   __skb_queue_head_init();
+   __skb_queue_tail(, skb);
+   rc = tipc_bcast_xmit(net, , _link_cnt);
+   __skb_queue_purge();
+   return rc;
+}
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index dadad953e2be..a100da3800fc 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net);
 u32 tipc_bcast_get_broadcast_mode(struct net *net);
 u32 tipc_bcast_get_broadcast_ratio(struct net *net);
 
+int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb);
+
 void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
   struct sk_buff_head *inputq);
 
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 66a65c2cdb23..9e9c61f7c999 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -633,7 +633,7 @@ struct publication *tipc_nametbl_publish(struct net *net, 
u32 type, u32 lower,
spin_unlock_bh(>nametbl_lock);
 
if (skb)
-   tipc_node_broadcast(net, skb);
+   tipc_bcast_named_publish(net, skb);
return p;
 }
 
@@ -664,7 +664,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 
lower,
spin_unlock_bh(>nametbl_lock);
 
if (skb) {
-   tipc_node_broadcast(net, skb);
+   tipc_bcast_named_publish(net, skb);
return 1;
}
return 0;
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net-next] tipc: update mon's self addr when node addr generated

2019-11-12 Thread Hoang Le
Yeah, but I think we will have the same result with both of them. Because link 
monitoring is still 'idle' during this period time.

Regards,
Hoang
-Original Message-
From: Jon Maloy  
Sent: Wednesday, November 13, 2019 8:35 AM
To: Hoang Huu Le ; ma...@donjonn.com; 
tipc-discussion@lists.sourceforge.net
Subject: RE: [net-next] tipc: update mon's self addr when node addr generated

Thinking about it, wouldn't it be better to add the node to the monitor at the 
moment it really has an address, and not earlier?
To add it to the monitor with address 0 is pretty meaningless.

///jon

> -Original Message-
> From: Jon Maloy
> Sent: 11-Nov-19 09:10
> To: Hoang Le ; ma...@donjonn.com; 
> tipc-discussion@lists.sourceforge.net
> Subject: RE: [net-next] tipc: update mon's self addr when node addr generated
> 
> Acked.
> 
> ///jon
> 
> > -----Original Message-
> > From: Hoang Le 
> > Sent: 11-Nov-19 04:24
> > To: Jon Maloy ; ma...@donjonn.com; 
> > tipc-discussion@lists.sourceforge.net
> > Subject: [net-next] tipc: update mon's self addr when node addr generated
> >
> > In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address
> > hash values"), the 32-bit node address only generated after one second
> > trial period expired. However the self's addr in struct tipc_monitor do
> > not update according to node address generated. This lead to it is
> > always zero as initial value. As result, sorting algorithm using this
> > value does not work as expected, neither neighbor monitoring framework.
> >
> > In this commit, we add a fix to update self's addr when 32-bit node
> > address generated.
> >
> > Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash 
> > values")
> > Signed-off-by: Hoang Le 
> > ---
> >  net/tipc/monitor.c | 15 +++
> >  net/tipc/monitor.h |  1 +
> >  net/tipc/net.c |  2 ++
> >  3 files changed, 18 insertions(+)
> >
> > diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
> > index 6a6eae88442f..58708b4c7719 100644
> > --- a/net/tipc/monitor.c
> > +++ b/net/tipc/monitor.c
> > @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id)
> > kfree(mon);
> >  }
> >
> > +void tipc_mon_reinit_self(struct net *net)
> > +{
> > +   struct tipc_monitor *mon;
> > +   int bearer_id;
> > +
> > +   for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
> > +   mon = tipc_monitor(net, bearer_id);
> > +   if (!mon)
> > +   continue;
> > +   write_lock_bh(>lock);
> > +   mon->self->addr = tipc_own_addr(net);
> > +   write_unlock_bh(>lock);
> > +   }
> > +}
> > +
> >  int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size)
> >  {
> > struct tipc_net *tn = tipc_net(net);
> > diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
> > index 2a21b93e0d04..ed63d2e650b0 100644
> > --- a/net/tipc/monitor.h
> > +++ b/net/tipc/monitor.h
> > @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct 
> > tipc_nl_msg *msg,
> >   u32 bearer_id);
> >  int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
> >  u32 bearer_id, u32 *prev_node);
> > +void tipc_mon_reinit_self(struct net *net);
> >
> >  extern const int tipc_max_domain_size;
> >  #endif
> > diff --git a/net/tipc/net.c b/net/tipc/net.c
> > index 85707c185360..2de3cec9929d 100644
> > --- a/net/tipc/net.c
> > +++ b/net/tipc/net.c
> > @@ -42,6 +42,7 @@
> >  #include "node.h"
> >  #include "bcast.h"
> >  #include "netlink.h"
> > +#include "monitor.h"
> >
> >  /*
> >   * The TIPC locking policy is designed to ensure a very fine locking
> > @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr)
> > tipc_set_node_addr(net, addr);
> > tipc_named_reinit(net);
> > tipc_sk_reinit(net);
> > +   tipc_mon_reinit_self(net);
> > tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
> >  TIPC_CLUSTER_SCOPE, 0, addr);
> >  }
> > --
> > 2.20.1




___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: update mon's self addr when node addr generated

2019-11-11 Thread Hoang Le
In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address
hash values"), the 32-bit node address only generated after one second
trial period expired. However the self's addr in struct tipc_monitor do
not update according to node address generated. This lead to it is
always zero as initial value. As result, sorting algorithm using this
value does not work as expected, neither neighbor monitoring framework.

In this commit, we add a fix to update self's addr when 32-bit node
address generated.

Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash 
values")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/monitor.c | 15 +++
 net/tipc/monitor.h |  1 +
 net/tipc/net.c |  2 ++
 3 files changed, 18 insertions(+)

diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 6a6eae88442f..58708b4c7719 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id)
kfree(mon);
 }
 
+void tipc_mon_reinit_self(struct net *net)
+{
+   struct tipc_monitor *mon;
+   int bearer_id;
+
+   for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+   mon = tipc_monitor(net, bearer_id);
+   if (!mon)
+   continue;
+   write_lock_bh(>lock);
+   mon->self->addr = tipc_own_addr(net);
+   write_unlock_bh(>lock);
+   }
+}
+
 int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size)
 {
struct tipc_net *tn = tipc_net(net);
diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
index 2a21b93e0d04..ed63d2e650b0 100644
--- a/net/tipc/monitor.h
+++ b/net/tipc/monitor.h
@@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg 
*msg,
  u32 bearer_id);
 int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
 u32 bearer_id, u32 *prev_node);
+void tipc_mon_reinit_self(struct net *net);
 
 extern const int tipc_max_domain_size;
 #endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 85707c185360..2de3cec9929d 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -42,6 +42,7 @@
 #include "node.h"
 #include "bcast.h"
 #include "netlink.h"
+#include "monitor.h"
 
 /*
  * The TIPC locking policy is designed to ensure a very fine locking
@@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr)
tipc_set_node_addr(net, addr);
tipc_named_reinit(net);
tipc_sk_reinit(net);
+   tipc_mon_reinit_self(net);
tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
 TIPC_CLUSTER_SCOPE, 0, addr);
 }
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: update mon's self addr when node addr generated

2019-11-11 Thread Hoang Le
In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address
hash values"), the 32-bit node address only generated after one second
trial period expired. However the self's addr in struct tipc_monitor do
not update according to node address generated. This lead to it is
always zero as initial value. As result, sorting algorithm using this
value does not work as expected, neither neighbor monitoring framework.

In this commit, we add a fix to update self's addr when 32-bit node
address generated.

Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash 
values")
Signed-off-by: Hoang Le 
---
 net/tipc/monitor.c | 15 +++
 net/tipc/monitor.h |  1 +
 net/tipc/net.c |  2 ++
 3 files changed, 18 insertions(+)

diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 6a6eae88442f..58708b4c7719 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id)
kfree(mon);
 }
 
+void tipc_mon_reinit_self(struct net *net)
+{
+   struct tipc_monitor *mon;
+   int bearer_id;
+
+   for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+   mon = tipc_monitor(net, bearer_id);
+   if (!mon)
+   continue;
+   write_lock_bh(>lock);
+   mon->self->addr = tipc_own_addr(net);
+   write_unlock_bh(>lock);
+   }
+}
+
 int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size)
 {
struct tipc_net *tn = tipc_net(net);
diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
index 2a21b93e0d04..ed63d2e650b0 100644
--- a/net/tipc/monitor.h
+++ b/net/tipc/monitor.h
@@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg 
*msg,
  u32 bearer_id);
 int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
 u32 bearer_id, u32 *prev_node);
+void tipc_mon_reinit_self(struct net *net);
 
 extern const int tipc_max_domain_size;
 #endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 85707c185360..2de3cec9929d 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -42,6 +42,7 @@
 #include "node.h"
 #include "bcast.h"
 #include "netlink.h"
+#include "monitor.h"
 
 /*
  * The TIPC locking policy is designed to ensure a very fine locking
@@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr)
tipc_set_node_addr(net, addr);
tipc_named_reinit(net);
tipc_sk_reinit(net);
+   tipc_mon_reinit_self(net);
tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
 TIPC_CLUSTER_SCOPE, 0, addr);
 }
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: eliminate checking netns if node established

2019-11-07 Thread Hoang Le
Currently, we scan over all network namespaces at each received
discovery message in order to check if the sending peer might be
present in a host local namespaces.

This is unnecessary since we can assume that a peer will not change its
location during an established session.

We now improve the condition for this testing so that we don't perform
any redundant scans.

Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/node.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 1f1584518221..b66d2f67b1dd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -472,10 +472,6 @@ static struct tipc_node *tipc_node_create(struct net *net, 
u32 addr,
 tipc_bc_sndlink(net),
 >bc_entry.link)) {
pr_warn("Broadcast rcv link creation failed, no memory\n");
-   if (n->peer_net) {
-   n->peer_net = NULL;
-   n->peer_hash_mix = 0;
-   }
kfree(n);
n = NULL;
goto exit;
@@ -1073,6 +1069,9 @@ void tipc_node_check_dest(struct net *net, u32 addr,
if (sign_match && addr_match && link_up) {
/* All is fine. Do nothing. */
reset = false;
+   /* Peer node is not a container/local namespace */
+   if (!n->peer_hash_mix)
+   n->peer_hash_mix = hash_mixes;
} else if (sign_match && addr_match && !link_up) {
/* Respond. The link will come up in due time */
*respond = true;
@@ -1398,11 +1397,8 @@ static void node_lost_contact(struct tipc_node *n,
 
/* Notify publications from this node */
n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
-
-   if (n->peer_net) {
-   n->peer_net = NULL;
-   n->peer_hash_mix = 0;
-   }
+   n->peer_net = NULL;
+   n->peer_hash_mix = 0;
/* Notify sockets connected to node */
list_for_each_entry_safe(conn, safe, conns, list) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next 2/2] tipc: reduce sensitive to retransmit failures

2019-11-05 Thread Hoang Le
With huge cluster (e.g >200nodes), the amount of that flow:
gap -> retransmit packet -> acked will take time in case of STATE_MSG
dropped/delayed because a lot of traffic. This lead to 1.5 sec tolerance
value criteria made link easy failure around 2nd, 3rd of failed
retransmission attempts.

Instead of re-introduced criteria of 99 faled retransmissions to fix the
issue, we increase failure detection timer to ten times tolerance value.

Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/link.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 038861bad72b..2aed7a958a8c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1087,7 +1087,7 @@ static bool link_retransmit_failure(struct tipc_link *l, 
struct tipc_link *r,
return false;
 
if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
-   msecs_to_jiffies(r->tolerance)))
+   msecs_to_jiffies(r->tolerance * 10)))
return false;
 
hdr = buf_msg(skb);
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next 1/2] tipc: update cluster capabilities if node deleted

2019-11-05 Thread Hoang Le
There are two improvements when re-calculate cluster capabilities:

- When deleting a specific down node, need to re-calculate.
- In tipc_node_cleanup(), do not need to re-calculate if node
is still existing in cluster.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/node.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 742c04756d72..a20fabd09e7e 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -665,6 +665,11 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
}
tipc_node_write_unlock(peer);
 
+   if (!deleted) {
+   spin_unlock_bh(>node_list_lock);
+   return deleted;
+   }
+
/* Calculate cluster capabilities */
tn->capabilities = TIPC_NODE_CAPABILITIES;
list_for_each_entry_rcu(temp_node, >node_list, list) {
@@ -2041,7 +2046,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info 
*info)
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
-   struct tipc_node *peer;
+   struct tipc_node *peer, *temp_node;
u32 addr;
int err;
 
@@ -2082,6 +2087,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct 
genl_info *info)
tipc_node_write_unlock(peer);
tipc_node_delete(peer);
 
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
err = 0;
 err_out:
tipc_node_put(peer);
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: eliminate checking netns if node acknowledge

2019-11-05 Thread Hoang Le
At current we do check netns local for every neighbor discovery that
is being sent from external netns node. This is become unnecessary
for node acknowledge.

We now improve above checking for peer node come back and discovery
message sent from unacknowledge node.

Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns")
Signed-off-by: Hoang Le 
---
 net/tipc/node.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4b60928049ea..742c04756d72 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -472,10 +472,8 @@ static struct tipc_node *tipc_node_create(struct net *net, 
u32 addr,
 tipc_bc_sndlink(net),
 >bc_entry.link)) {
pr_warn("Broadcast rcv link creation failed, no memory\n");
-   if (n->peer_net) {
-   n->peer_net = NULL;
-   n->peer_hash_mix = 0;
-   }
+   n->peer_net = NULL;
+   n->peer_hash_mix = 0;
kfree(n);
n = NULL;
goto exit;
@@ -1068,6 +1066,9 @@ void tipc_node_check_dest(struct net *net, u32 addr,
if (sign_match && addr_match && link_up) {
/* All is fine. Do nothing. */
reset = false;
+   /* Peer node is not a container/netns local */
+   if (!n->peer_hash_mix)
+   n->peer_hash_mix = hash_mixes;
} else if (sign_match && addr_match && !link_up) {
/* Respond. The link will come up in due time */
*respond = true;
@@ -1393,11 +1394,8 @@ static void node_lost_contact(struct tipc_node *n,
 
/* Notify publications from this node */
n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
-
-   if (n->peer_net) {
-   n->peer_net = NULL;
-   n->peer_hash_mix = 0;
-   }
+   n->peer_net = NULL;
+   n->peer_hash_mix = 0;
/* Notify sockets connected to node */
list_for_each_entry_safe(conn, safe, conns, list) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next 2/2] tipc: add column to indicate netns-local

2019-11-05 Thread Hoang Le
Example:
Node IdentityHash Is container? State
1001002  01001002 noup
1001010  31000101 noup
1001011  31010101 noup
1001012  31020101 noup
1001003  31030001 yes   up
1001013  31030101 noup
1001004  31040001 yes   up
1001014  31040101 noup
1001015  31050101 noup
1001006  31060001 yes   up
1001016  31060101 noup
1001007  31070001 yes   up
1001008  31080001 yes   up
1001009  31090001 yes   up
100100a  31510001 yes   up
100100b  31520001 yes   up
100100c  31530001 yes   up
100100d  31540001 noup
100100e  31550001 noup
100100f  31560001 noup

Signed-off-by: Hoang Le 
---
 include/uapi/linux/tipc_netlink.h | 1 +
 tipc/node.c   | 7 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/tipc_netlink.h 
b/include/uapi/linux/tipc_netlink.h
index efb958fd167d..1a071268bf5d 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -160,6 +160,7 @@ enum {
TIPC_NLA_NODE_UNSPEC,
TIPC_NLA_NODE_ADDR, /* u32 */
TIPC_NLA_NODE_UP,   /* flag */
+   TIPC_NLA_NODE_LOCAL,/* flag */
 
__TIPC_NLA_NODE_MAX,
TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1
diff --git a/tipc/node.c b/tipc/node.c
index 2fec6753c974..b4203af014d3 100644
--- a/tipc/node.c
+++ b/tipc/node.c
@@ -42,6 +42,11 @@ static int node_list_cb(const struct nlmsghdr *nlh, void 
*data)
addr = mnl_attr_get_u32(attrs[TIPC_NLA_NODE_ADDR]);
hash2nodestr(addr, str);
printf("%-32s %08x ", str, addr);
+   if (attrs[TIPC_NLA_NODE_LOCAL])
+   printf("%-12s  ", "yes");
+   else
+   printf("%-12s  ", "no");
+
if (attrs[TIPC_NLA_NODE_UP])
printf("up\n");
else
@@ -63,7 +68,7 @@ static int cmd_node_list(struct nlmsghdr *nlh, const struct 
cmd *cmd,
fprintf(stderr, "error, message initialisation failed\n");
return -1;
}
-   printf("Node IdentityHash State\n");
+   printf("Node IdentityHash Is container? 
State\n");
return msg_dumpit(nlh, node_list_cb, NULL);
 }
 
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next 1/2] tipc: support 128bit node identity for peer removing

2019-11-05 Thread Hoang Le
We add the support to remove a specific node down with 128bit
node identifier, as an alternative to legacy 32-bit node address.

v2: improve usage for 'tipc peer remove' command

Signed-off-by: Hoang Le 
---
 tipc/peer.c | 53 -
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/tipc/peer.c b/tipc/peer.c
index f6380777033d..f14ec35e6f71 100644
--- a/tipc/peer.c
+++ b/tipc/peer.c
@@ -59,17 +59,68 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return msg_doit(nlh, NULL, NULL);
 }
 
+static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+   char buf[MNL_SOCKET_BUFFER_SIZE];
+   __u8 id[16] = {0,};
+   __u64 *w0 = (__u64 *)[0];
+   __u64 *w1 = (__u64 *)[8];
+   struct nlattr *nest;
+   char *str;
+
+   if (cmdl->argc != cmdl->optind + 1) {
+   fprintf(stderr, "Usage: %s peer remove identity NODEID\n",
+   cmdl->argv[0]);
+   return -EINVAL;
+   }
+
+   str = shift_cmdl(cmdl);
+   if (str2nodeid(str, id)) {
+   fprintf(stderr, "Invalid node identity\n");
+   return -EINVAL;
+   }
+
+   nlh = msg_init(buf, TIPC_NL_PEER_REMOVE);
+   if (!nlh) {
+   fprintf(stderr, "error, message initialisation failed\n");
+   return -1;
+   }
+
+   nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET);
+   mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0);
+   mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1);
+   mnl_attr_nest_end(nlh, nest);
+
+   return msg_doit(nlh, NULL, NULL);
+}
+
 static void cmd_peer_rm_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s peer remove PROPERTY\n\n"
+   "PROPERTIES\n"
+   " identity NODEID - Remove peer node identity\n",
+   cmdl->argv[0]);
+}
+
+static void cmd_peer_rm_addr_help(struct cmdl *cmdl)
 {
fprintf(stderr, "Usage: %s peer remove address ADDRESS\n",
cmdl->argv[0]);
 }
 
+static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s peer remove identity NODEID\n",
+   cmdl->argv[0]);
+}
+
 static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd,
struct cmdl *cmdl, void *data)
 {
const struct cmd cmds[] = {
-   { "address",cmd_peer_rm_addr,   cmd_peer_rm_help },
+   { "address",  cmd_peer_rm_addr,   cmd_peer_rm_addr_help },
+   { "identity", cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help },
{ NULL }
};
 
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: update cluster capabilities if node deleted

2019-10-30 Thread Hoang Le
There are two improvements when re-calculate cluster capabilities:

- When deleting a specific down node, need to re-calculate.
- In tipc_node_cleanup(), do not need to re-calculate if node
is still existing in cluster.

Signed-off-by: Hoang Le 
---
 net/tipc/node.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4b60928049ea..1f1584518221 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -667,6 +667,11 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
}
tipc_node_write_unlock(peer);
 
+   if (!deleted) {
+   spin_unlock_bh(>node_list_lock);
+   return deleted;
+   }
+
/* Calculate cluster capabilities */
tn->capabilities = TIPC_NODE_CAPABILITIES;
list_for_each_entry_rcu(temp_node, >node_list, list) {
@@ -2043,7 +2048,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info 
*info)
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
-   struct tipc_node *peer;
+   struct tipc_node *peer, *temp_node;
u32 addr;
int err;
 
@@ -2084,6 +2089,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct 
genl_info *info)
tipc_node_write_unlock(peer);
tipc_node_delete(peer);
 
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
err = 0;
 err_out:
tipc_node_put(peer);
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: reduce sensitive to retransmit failures

2019-10-30 Thread Hoang Le
With huge cluster (e.g >200nodes), the amount of that flow:
gap -> retransmit packet -> acked will take time in case of STATE_MSG
dropped/delayed because a lot of traffic. This lead to 1.5 sec tolerance
value criteria made link easy failure around 2nd, 3rd of failed
retransmission attempts.

Instead of re-introduced criteria of 99 failed retransmissions to fix the
issue, we increase failure detection timer to ten times tolerance value.

Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria")
Signed-off-by: Hoang Le 
---
 net/tipc/link.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 7d7a66178607..9f524c325c0d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1084,7 +1084,7 @@ static bool link_retransmit_failure(struct tipc_link *l, 
struct tipc_link *r,
return false;
 
if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
-   msecs_to_jiffies(r->tolerance)))
+   msecs_to_jiffies(r->tolerance * 10)))
return false;
 
hdr = buf_msg(skb);
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next v2] tipc: improve throughput between nodes in netns

2019-10-28 Thread Hoang Le
Currently, TIPC transports intra-node user data messages directly
socket to socket, hence shortcutting all the lower layers of the
communication stack. This gives TIPC very good intra node performance,
both regarding throughput and latency.

We now introduce a similar mechanism for TIPC data traffic across
network namespaces located in the same kernel. On the send path, the
call chain is as always accompanied by the sending node's network name
space pointer. However, once we have reliably established that the
receiving node is represented by a namespace on the same host, we just
replace the namespace pointer with the receiving node/namespace's
ditto, and follow the regular socket receive patch though the receiving
node. This technique gives us a throughput similar to the node internal
throughput, several times larger than if we let the traffic go though
the full network stacks. As a comparison, max throughput for 64k
messages is four times larger than TCP throughput for the same type of
traffic.

To meet any security concerns, the following should be noted.

- All nodes joining a cluster are supposed to have been be certified
and authenticated by mechanisms outside TIPC. This is no different for
nodes/namespaces on the same host; they have to auto discover each
other using the attached interfaces, and establish links which are
supervised via the regular link monitoring mechanism. Hence, a kernel
local node has no other way to join a cluster than any other node, and
have to obey to policies set in the IP or device layers of the stack.

- Only when a sender has established with 100% certainty that the peer
node is located in a kernel local namespace does it choose to let user
data messages, and only those, take the crossover path to the receiving
node/namespace.

- If the receiving node/namespace is removed, its namespace pointer
is invalidated at all peer nodes, and their neighbor link monitoring
will eventually note that this node is gone.

- To ensure the "100% certainty" criteria, and prevent any possible
spoofing, received discovery messages must contain a proof that the
sender knows a common secret. We use the hash mix of the sending
node/namespace for this purpose, since it can be accessed directly by
all other namespaces in the kernel. Upon reception of a discovery
message, the receiver checks this proof against all the local
namespaces'hash_mix:es. If it finds a match, that, along with a
matching node id and cluster id, this is deemed sufficient proof that
the peer node in question is in a local namespace, and a wormhole can
be opened.

- We should also consider that TIPC is intended to be a cluster local
IPC mechanism (just like e.g. UNIX sockets) rather than a network
protocol, and hence we think it can justified to allow it to shortcut the
lower protocol layers.

Regarding traceability, we should notice that since commit 6c9081a3915d
("tipc: add loopback device tracking") it is possible to follow the node
internal packet flow by just activating tcpdump on the loopback
interface. This will be true even for this mechanism; by activating
tcpdump on the involved nodes' loopback interfaces their inter-name
space messaging can easily be tracked.

v2:
- update 'net' pointer when node left/rejoined
v3:
- grab read/write lock when using node ref obj
v4:
- clone traffics between netns to loopback

Suggested-by: Jon Maloy 
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c   |  16 +
 net/tipc/core.h   |   6 ++
 net/tipc/discover.c   |   4 +-
 net/tipc/msg.h|  14 
 net/tipc/name_distr.c |   2 +-
 net/tipc/node.c   | 155 --
 net/tipc/node.h   |   5 +-
 net/tipc/socket.c |   6 +-
 8 files changed, 197 insertions(+), 11 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23cb379a93d6..ab648dd150ee 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net)
tipc_sk_rht_destroy(net);
 }
 
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
+{
+   tipc_node_pre_cleanup_net(net);
+}
+
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+   .pre_exit = tipc_pernet_pre_exit,
+};
+
 static struct pernet_operations tipc_net_ops = {
.init = tipc_init_net,
.exit = tipc_exit_net,
@@ -151,6 +160,10 @@ static int __init tipc_init(void)
if (err)
goto out_pernet_topsrv;
 
+   err = register_pernet_subsys(_pernet_pre_exit_ops);
+   if (err)
+   goto out_register_pernet_subsys;
+
err = tipc_bearer_setup();
if (err)
goto out_bearer;
@@ -158,6 +171,8 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
 out_bearer:
+   unregister_pernet_subsys(_pernet_pre_exit_ops);
+out_register_pernet_subsys:
unregister_pernet_device(_topsr

[tipc-discussion] [net-next v4] tipc: improve throughput between nodes in netns

2019-10-28 Thread Hoang Le
Currently, TIPC transports intra-node user data messages directly
socket to socket, hence shortcutting all the lower layers of the
communication stack. This gives TIPC very good intra node performance,
both regarding throughput and latency.

We now introduce a similar mechanism for TIPC data traffic across
network namespaces located in the same kernel. On the send path, the
call chain is as always accompanied by the sending node's network name
space pointer. However, once we have reliably established that the
receiving node is represented by a namespace on the same host, we just
replace the namespace pointer with the receiving node/namespace's
ditto, and follow the regular socket receive patch though the receiving
node. This technique gives us a throughput similar to the node internal
throughput, several times larger than if we let the traffic go though
the full network stacks. As a comparison, max throughput for 64k
messages is four times larger than TCP throughput for the same type of
traffic.

To meet any security concerns, the following should be noted.

- All nodes joining a cluster are supposed to have been be certified
and authenticated by mechanisms outside TIPC. This is no different for
nodes/namespaces on the same host; they have to auto discover each
other using the attached interfaces, and establish links which are
supervised via the regular link monitoring mechanism. Hence, a kernel
local node has no other way to join a cluster than any other node, and
have to obey to policies set in the IP or device layers of the stack.

- Only when a sender has established with 100% certainty that the peer
node is located in a kernel local namespace does it choose to let user
data messages, and only those, take the crossover path to the receiving
node/namespace.

- If the receiving node/namespace is removed, its namespace pointer
is invalidated at all peer nodes, and their neighbor link monitoring
will eventually note that this node is gone.

- To ensure the "100% certainty" criteria, and prevent any possible
spoofing, received discovery messages must contain a proof that the
sender knows a common secret. We use the hash mix of the sending
node/namespace for this purpose, since it can be accessed directly by
all other namespaces in the kernel. Upon reception of a discovery
message, the receiver checks this proof against all the local
namespaces'hash_mix:es. If it finds a match, that, along with a
matching node id and cluster id, this is deemed sufficient proof that
the peer node in question is in a local namespace, and a wormhole can
be opened.

- We should also consider that TIPC is intended to be a cluster local
IPC mechanism (just like e.g. UNIX sockets) rather than a network
protocol, and hence we think it can justified to allow it to shortcut the
lower protocol layers.

Regarding traceability, we should notice that since commit 6c9081a3915d
("tipc: add loopback device tracking") it is possible to follow the node
internal packet flow by just activating tcpdump on the loopback
interface. This will be true even for this mechanism; by activating
tcpdump on the involved nodes' loopback interfaces their inter-name
space messaging can easily be tracked.

v2:
- update 'net' pointer when node left/rejoined
v3:
- grab read/write lock when using node ref obj
v4:
- clone traffics between netns to loopback

Suggested-by: Jon Maloy 
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c   |  16 +
 net/tipc/core.h   |   6 ++
 net/tipc/discover.c   |   4 +-
 net/tipc/msg.h|  14 
 net/tipc/name_distr.c |   2 +-
 net/tipc/node.c   | 155 --
 net/tipc/node.h   |   5 +-
 net/tipc/socket.c |   6 +-
 8 files changed, 197 insertions(+), 11 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23cb379a93d6..ab648dd150ee 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net)
tipc_sk_rht_destroy(net);
 }
 
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
+{
+   tipc_node_pre_cleanup_net(net);
+}
+
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+   .pre_exit = tipc_pernet_pre_exit,
+};
+
 static struct pernet_operations tipc_net_ops = {
.init = tipc_init_net,
.exit = tipc_exit_net,
@@ -151,6 +160,10 @@ static int __init tipc_init(void)
if (err)
goto out_pernet_topsrv;
 
+   err = register_pernet_subsys(_pernet_pre_exit_ops);
+   if (err)
+   goto out_register_pernet_subsys;
+
err = tipc_bearer_setup();
if (err)
goto out_bearer;
@@ -158,6 +171,8 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
 out_bearer:
+   unregister_pernet_subsys(_pernet_pre_exit_ops);
+out_register_pernet_subsys:
unregister_pernet_device(_topsr

[tipc-discussion] [net-next v3] tipc: improve throughput between nodes in netns

2019-10-27 Thread Hoang Le
Currently, TIPC transports intra-node user data messages directly
socket to socket, hence shortcutting all the lower layers of the
communication stack. This gives TIPC very good intra node performance,
both regarding throughput and latency.

We now introduce a similar mechanism for TIPC data traffic across
network namespaces located in the same kernel. On the send path, the
call chain is as always accompanied by the sending node's network name
space pointer. However, once we have reliably established that the
receiving node is represented by a namespace on the same host, we just
replace the namespace pointer with the receiving node/namespace's
ditto, and follow the regular socket receive patch though the receiving
node. This technique gives us a throughput similar to the node internal
throughput, several times larger than if we let the traffic go though
the full network stacks. As a comparison, max throughput for 64k
messages is four times larger than TCP throughput for the same type of
traffic.

To meet any security concerns, the following should be noted.

- All nodes joining a cluster are supposed to have been be certified
and authenticated by mechanisms outside TIPC. This is no different for
nodes/namespaces on the same host; they have to auto discover each
other using the attached interfaces, and establish links which are
supervised via the regular link monitoring mechanism. Hence, a kernel
local node has no other way to join a cluster than any other node, and
have to obey to policies set in the IP or device layers of the stack.

- Only when a sender has established with 100% certainty that the peer
node is located in a kernel local namespace does it choose to let user
data messages, and only those, take the crossover path to the receiving
node/namespace.

- If the receiving node/namespace is removed, its namespace pointer
is invalidated at all peer nodes, and their neighbor link monitoring
will eventually note that this node is gone.

- To ensure the "100% certainty" criteria, and prevent any possible
spoofing, received discovery messages must contain a proof that the
sender knows a common secret. We use the hash mix of the sending
node/namespace for this purpose, since it can be accessed directly by
all other namespaces in the kernel. Upon reception of a discovery
message, the receiver checks this proof against all the local
namespaces'hash_mix:es. If it finds a match, that, along with a
matching node id and cluster id, this is deemed sufficient proof that
the peer node in question is in a local namespace, and a wormhole can
be opened.

- We should also consider that TIPC is intended to be a cluster local
IPC mechanism (just like e.g. UNIX sockets) rather than a network
protocol, and hence we think it can justified to allow it to shortcut the
lower protocol layers.

Regarding traceability, we should notice that since commit 6c9081a3915d
("tipc: add loopback device tracking") it is possible to follow the node
internal packet flow by just activating tcpdump on the loopback
interface. This will be true even for this mechanism; by activating
tcpdump on the involved nodes' loopback interfaces their inter-name
space messaging can easily be tracked.

v2:
- update 'net' pointer when node left/rejoined
v3:
- grab read/write lock when using node ref obj

Suggested-by: Jon Maloy 
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c   |  16 +
 net/tipc/core.h   |   6 ++
 net/tipc/discover.c   |   4 +-
 net/tipc/msg.h|  14 
 net/tipc/name_distr.c |   2 +-
 net/tipc/node.c   | 151 --
 net/tipc/node.h   |   5 +-
 net/tipc/socket.c |   6 +-
 8 files changed, 193 insertions(+), 11 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23cb379a93d6..ab648dd150ee 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net)
tipc_sk_rht_destroy(net);
 }
 
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
+{
+   tipc_node_pre_cleanup_net(net);
+}
+
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+   .pre_exit = tipc_pernet_pre_exit,
+};
+
 static struct pernet_operations tipc_net_ops = {
.init = tipc_init_net,
.exit = tipc_exit_net,
@@ -151,6 +160,10 @@ static int __init tipc_init(void)
if (err)
goto out_pernet_topsrv;
 
+   err = register_pernet_subsys(_pernet_pre_exit_ops);
+   if (err)
+   goto out_register_pernet_subsys;
+
err = tipc_bearer_setup();
if (err)
goto out_bearer;
@@ -158,6 +171,8 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
 out_bearer:
+   unregister_pernet_subsys(_pernet_pre_exit_ops);
+out_register_pernet_subsys:
unregister_pernet_device(_topsrv_net_ops);
 out_pernet_topsrv:
tipc_socket_stop();

[tipc-discussion] [net-next v2] tipc: improve throughput between nodes in netns

2019-10-24 Thread Hoang Le
Currently, TIPC transports intra-node user data messages directly
socket to socket, hence shortcutting all the lower layers of the
communication stack. This gives TIPC very good intra node performance,
both regarding throughput and latency.

We now introduce a similar mechanism for TIPC data traffic across
network namespaces located in the same kernel. On the send path, the
call chain is as always accompanied by the sending node's network name
space pointer. However, once we have reliably established that the
receiving node is represented by a namespace on the same host, we just
replace the namespace pointer with the receiving node/namespace's
ditto, and follow the regular socket receive patch though the receiving
node. This technique gives us a throughput similar to the node internal
throughput, several times larger than if we let the traffic go though
the full network stacks. As a comparison, max throughput for 64k
messages is four times larger than TCP throughput for the same type of
traffic.

To meet any security concerns, the following should be noted.

- All nodes joining a cluster are supposed to have been be certified
and authenticated by mechanisms outside TIPC. This is no different for
nodes/namespaces on the same host; they have to auto discover each
other using the attached interfaces, and establish links which are
supervised via the regular link monitoring mechanism. Hence, a kernel
local node has no other way to join a cluster than any other node, and
have to obey to policies set in the IP or device layers of the stack.

- Only when a sender has established with 100% certainty that the peer
node is located in a kernel local namespace does it choose to let user
data messages, and only those, take the crossover path to the receiving
node/namespace.

- If the receiving node/namespace is removed, its namespace pointer
is invalidated at all peer nodes, and their neighbor link monitoring
will eventually note that this node is gone.

- To ensure the "100% certainty" criteria, and prevent any possible
spoofing, received discovery messages must contain a proof that the
sender knows a common secret. We use the hash mix of the sending
node/namespace for this purpose, since it can be accessed directly by
all other namespaces in the kernel. Upon reception of a discovery
message, the receiver checks this proof against all the local
namespaces'hash_mix:es. If it finds a match, that, along with a
matching node id and cluster id, this is deemed sufficient proof that
the peer node in question is in a local namespace, and a wormhole can
be opened.

- We should also consider that TIPC is intended to be a cluster local
IPC mechanism (just like e.g. UNIX sockets) rather than a network
protocol, and hence we think it can justified to allow it to shortcut the
lower protocol layers.

Regarding traceability, we should notice that since commit 6c9081a3915d
("tipc: add loopback device tracking") it is possible to follow the node
internal packet flow by just activating tcpdump on the loopback
interface. This will be true even for this mechanism; by activating
tcpdump on the involved nodes' loopback interfaces their inter-name
space messaging can easily be tracked.

v2:
- update 'net' pointer when node left/rejoined

Suggested-by: Jon Maloy 
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c   |  16 +
 net/tipc/core.h   |   6 ++
 net/tipc/discover.c   |   4 +-
 net/tipc/msg.h|  14 
 net/tipc/name_distr.c |   2 +-
 net/tipc/node.c   | 148 --
 net/tipc/node.h   |   5 +-
 net/tipc/socket.c |   6 +-
 8 files changed, 190 insertions(+), 11 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23cb379a93d6..ab648dd150ee 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -105,6 +105,15 @@ static void __net_exit tipc_exit_net(struct net *net)
tipc_sk_rht_destroy(net);
 }
 
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
+{
+   tipc_node_pre_cleanup_net(net);
+}
+
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+   .pre_exit = tipc_pernet_pre_exit,
+};
+
 static struct pernet_operations tipc_net_ops = {
.init = tipc_init_net,
.exit = tipc_exit_net,
@@ -151,6 +160,10 @@ static int __init tipc_init(void)
if (err)
goto out_pernet_topsrv;
 
+   err = register_pernet_subsys(_pernet_pre_exit_ops);
+   if (err)
+   goto out_register_pernet_subsys;
+
err = tipc_bearer_setup();
if (err)
goto out_bearer;
@@ -158,6 +171,8 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
 out_bearer:
+   unregister_pernet_subsys(_pernet_pre_exit_ops);
+out_register_pernet_subsys:
unregister_pernet_device(_topsrv_net_ops);
 out_pernet_topsrv:
tipc_socket_stop();
@@ -177,6 +192,7 @@ static int __init tipc_init(void)

Re: [tipc-discussion] [net-next] tipc: improve throughput between nodes in netns

2019-10-21 Thread Hoang Le
Hi Eric,

Thanks for quick feedback.
See my inline answer.

Regards,
Hoang
-Original Message-
From: Eric Dumazet  
Sent: Tuesday, October 22, 2019 9:41 AM
To: Hoang Le ; jon.ma...@ericsson.com; 
ma...@donjonn.com; tipc-discussion@lists.sourceforge.net; net...@vger.kernel.org
Subject: Re: [net-next] tipc: improve throughput between nodes in netns


On 10/21/19 7:20 PM, Hoang Le wrote:
>   n->net = net;
>   n->capabilities = capabilities;
> + n->pnet = NULL;
> + for_each_net_rcu(tmp) {

This does not scale well, if say you have a thousand netns ?
[Hoang] This check execs only once at setup step. So we get no problem with 
huge namespaces.

> + tn_peer = net_generic(tmp, tipc_net_id);
> + if (!tn_peer)
> + continue;
> + /* Integrity checking whether node exists in namespace or not */
> + if (tn_peer->net_id != tn->net_id)
> + continue;
> + if (memcmp(peer_id, tn_peer->node_id, NODE_ID_LEN))
> + continue;
> +
> + hash_chk = tn_peer->random;
> + hash_chk ^= net_hash_mix(_net);

Why the xor with net_hash_mix(_net) is needed ?
[Hoang] We're trying to eliminate a sniff at injectable discovery message. 
Building hash-mixes as much as possible is to prevent fake discovery messages.

> + hash_chk ^= net_hash_mix(tmp);
> + if (hash_chk ^ hash_mixes)
> + continue;
> + n->pnet = tmp;
> + break;
> + }


How can we set n->pnet without increasing netns ->count ?
Using check_net() later might trigger an use-after-free.

[Hoang] In this case, peer node is down. I assume the tipc xmit function 
already bypassed these lines.



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: improve throughput between nodes in netns

2019-10-21 Thread Hoang Le
Currently, TIPC transports intra-node user data messages directly
socket to socket, hence shortcutting all the lower layers of the
communication stack. This gives TIPC very good intra node performance,
both regarding throughput and latency.

We now introduce a similar mechanism for TIPC data traffic across
network namespaces located in the same kernel. On the send path, the
call chain is as always accompanied by the sending node's network name
space pointer. However, once we have reliably established that the
receiving node is represented by a namespace on the same host, we just
replace the namespace pointer with the receiving node/namespace's
ditto, and follow the regular socket receive patch though the receiving
node. This technique gives us a throughput similar to the node internal
throughput, several times larger than if we let the traffic go though
the full network stacks. As a comparison, max throughput for 64k
messages is four times larger than TCP throughput for the same type of
traffic.

To meet any security concerns, the following should be noted.

- All nodes joining a cluster are supposed to have been be certified
and authenticated by mechanisms outside TIPC. This is no different for
nodes/namespaces on the same host; they have to auto discover each
other using the attached interfaces, and establish links which are
supervised via the regular link monitoring mechanism. Hence, a kernel
local node has no other way to join a cluster than any other node, and
have to obey to policies set in the IP or device layers of the stack.

- Only when a sender has established with 100% certainty that the peer
node is located in a kernel local namespace does it choose to let user
data messages, and only those, take the crossover path to the receiving
node/namespace.

- If the receiving node/namespace is removed, its namespace pointer
is invalidated at all peer nodes, and their neighbor link monitoring
will eventually note that this node is gone.

- To ensure the "100% certainty" criteria, and prevent any possible
spoofing, received discovery messages must contain a proof that the
sender knows a common secret. We use the hash mix of the sending
node/namespace for this purpose, since it can be accessed directly by
all other namespaces in the kernel. Upon reception of a discovery
message, the receiver checks this proof against all the local
namespaces'hash_mix:es. If it finds a match, that, along with a
matching node id and cluster id, this is deemed sufficient proof that
the peer node in question is in a local namespace, and a wormhole can
be opened.

- We should also consider that TIPC is intended to be a cluster local
IPC mechanism (just like e.g. UNIX sockets) rather than a network
protocol, and hence we think it can justified to allow it to shortcut the
lower protocol layers.

Regarding traceability, we should notice that since commit 6c9081a3915d
("tipc: add loopback device tracking") it is possible to follow the node
internal packet flow by just activating tcpdump on the loopback
interface. This will be true even for this mechanism; by activating
tcpdump on the involved nodes' loopback interfaces their inter-name
space messaging can easily be tracked.

Suggested-by: Jon Maloy 
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/discover.c   |  10 -
 net/tipc/msg.h|  10 +
 net/tipc/name_distr.c |   2 +-
 net/tipc/node.c   | 100 --
 net/tipc/node.h   |   4 +-
 net/tipc/socket.c |   6 +--
 6 files changed, 121 insertions(+), 11 deletions(-)

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68e8a69..338d402fcf39 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -38,6 +38,8 @@
 #include "node.h"
 #include "discover.h"
 
+#include 
+
 /* min delay during bearer start up */
 #define TIPC_DISC_INIT msecs_to_jiffies(125)
 /* max delay if bearer has no links */
@@ -83,6 +85,7 @@ static void tipc_disc_init_msg(struct net *net, struct 
sk_buff *skb,
struct tipc_net *tn = tipc_net(net);
u32 dest_domain = b->domain;
struct tipc_msg *hdr;
+   u32 hash;
 
hdr = buf_msg(skb);
tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp,
@@ -94,6 +97,10 @@ static void tipc_disc_init_msg(struct net *net, struct 
sk_buff *skb,
msg_set_dest_domain(hdr, dest_domain);
msg_set_bc_netid(hdr, tn->net_id);
b->media->addr2msg(msg_media_addr(hdr), >addr);
+   hash = tn->random;
+   hash ^= net_hash_mix(_net);
+   hash ^= net_hash_mix(net);
+   msg_set_peer_net_hash(hdr, hash);
msg_set_node_id(hdr, tipc_own_id(net));
 }
 
@@ -242,7 +249,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
if (!tipc_in_scope(legacy, b->domain, src))
return;
tipc_node_check_dest(net, src, peer_id, b, caps, signature,
-  

Re: [tipc-discussion] [net-next] tipc: improve throughput between nodes in netns

2019-10-20 Thread Hoang Le
Hi Jon,

I have submitted the new code change in separate email.
Please help to review again.

Thanks,
Hoang
-Original Message-
From: Jon Maloy  
Sent: Friday, October 18, 2019 9:21 PM
To: Hoang Huu Le ; ma...@donjonn.com; 
tipc-...@dektech.com.au; tipc-discussion@lists.sourceforge.net
Subject: RE: [net-next] tipc: improve throughput between nodes in netns

Hi Hoang,
Our task is to establish that the message really came from the same node we 
have found in a local name space.
Imagine somebody is sniffing on a network, and finds there is a remote peer 
with proof(hash_mix)= M node id X and cluster id Y.
He then creates an illegitimate local name space with the proof(hash_mix)= N , 
node id X, but cluster id Z, so that all its
discovery messages are dropped by the receiver.
He may then create fake discovery messages with proof(hash_mix)= N, node id X 
and cluster id Y, which will be accepted by the
receiver and compared to the fake node's data.
Alas, they all match, and he has succeeded in hijacking traffic to the remote 
node, and this may happen even if the traffic was
meant to be encrypted.

Admittedly there are some weaknesses in this scenario, e.g., he cannot do this 
if unless the remote node is temporarily down (maybe
he can kill it with a fake RESET message?), and there are other reasons why 
this might be very hard to do. But, better safe than
sorry, if we can avoid this with just a simple extra test that costs nothing.

Regards
///jon

> -Original Message-
> From: Hoang Le 
> Sent: 18-Oct-19 04:24
> To: Jon Maloy ; ma...@donjonn.com; tipc-
> d...@dektech.com.au; tipc-discussion@lists.sourceforge.net
> Subject: RE: [net-next] tipc: improve throughput between nodes in netns
> 
> Hi Jon,
> 
> Thanks for good description.
> However, w.r.t your comment  "We even need to verify cluster ids.",  I'm still
> unclear why we need to isolate cluster ids here.
> I guess the node had been accepted already when bypassed at function
> tipc_disc_rcv. Then, we just check to apply new mechanism for kernel local
> namespaces.
> 
> Regars,
> Hoang
> -Original Message-
> From: Jon Maloy 
> Sent: Friday, October 18, 2019 2:20 AM
> To: Hoang Huu Le ; ma...@donjonn.com;
> tipc-...@dektech.com.au; tipc-discussion@lists.sourceforge.net
> Subject: RE: [net-next] tipc: improve throughput between nodes in netns
> 
> Hi Hoang,
> We need a very good log text to justify this.
> 
> My proposal:
> 
> "Currently, TIPC transports intra-node user data messages directly socket to
> socket, hence shortcutting all the lower layers of the communication stack.
> This gives TIPC very good intra node performance, both regarding throughput
> and latency.
> 
> We now introduce a similar mechanism for TIPC data traffic across network
> name spaces located in the same kernel. On the send path, the call chain is as
> always accompanied by the sending node's network name space pointer.
> However, once we have reliably established that the receiving node is
> represented by a name space on the same host, we just replace the name
> space pointer with the receiving node/name space's ditto, and follow the
> regular socket receive patch though the receiving node. This technique gives
> us a throughput similar to the node internal throughput, several times larger
> than if we let the traffic go though the full network stack. As a comparison,
> max throughput for 64k messages is four times larger than TCP throughput for
> the same type of traffic.
> 
> To meet any security concerns, the following should be noted.
> 
> - All nodes joining a cluster are supposed to have been be certified and
> authenticated by mechanisms outside TIPC. This is no different for
> nodes/name spaces on the same host; they have to auto discover each other
> using the attached interfaces, and establish links which are supervised via 
> the
> regular link monitoring mechanism. Hence, a kernel local node has no other
> way to join a cluster than any other node, and have to obey to policies set in
> the IP or device layers of the stack.
> 
> - Only when a sender has established with 100% certainty that the peer node
> is located in a kernel local name space does it choose to let user data 
> messages,
> and only those, take the crossover path to the receiving node/name space.
> 
> - If the receiving node/name space  is removed, its name space pointer is
> invalidated at all peer nodes, and their neighbor link monitoring will 
> eventually
> note that this node is gone.
> 
> - To ensure the "100% certainty" criteria, and prevent any possible spoofing,
> received discovery messages must contain a proof that they know a common
> secret. We use the hash_mix of the sending node/name space for this
> purpose, since it can be a

[tipc-discussion] [net-next v2] tipc: improve throughput between nodes in netns

2019-10-20 Thread Hoang Le
Currently, TIPC transports intra-node user data messages directly
socket to socket, hence shortcutting all the lower layers of the
communication stack. This gives TIPC very good intra node performance,
both regarding throughput and latency.

We now introduce a similar mechanism for TIPC data traffic across
network name spaces located in the same kernel. On the send path, the
call chain is as always accompanied by the sending node's network name
space pointer. However, once we have reliably established that the
receiving node is represented by a name space on the same host, we just
replace the name space pointer with the receiving node/name space's
ditto, and follow the regular socket receive patch though the receiving
node. This technique gives us a throughput similar to the node internal
throughput, several times larger than if we let the traffic go though
the full network stack. As a comparison, max throughput for 64k
messages is four times larger than TCP throughput for the same type of
traffic.

To meet any security concerns, the following should be noted.

- All nodes joining a cluster are supposed to have been be certified
and authenticated by mechanisms outside TIPC. This is no different for
nodes/name spaces on the same host; they have to auto discover each
other using the attached interfaces, and establish links which are
supervised via the regular link monitoring mechanism. Hence, a kernel
local node has no other way to join a cluster than any other node, and
have to obey to policies set in the IP or device layers of the stack.

- Only when a sender has established with 100% certainty that the peer
node is located in a kernel local name space does it choose to let user
data messages, and only those, take the crossover path to the receiving
node/name space.

- If the receiving node/name space  is removed, its name space pointer
is invalidated at all peer nodes, and their neighbor link monitoring
will eventually note that this node is gone.

- To ensure the "100% certainty" criteria, and prevent any possible
spoofing, received discovery messages must contain a proof that they
know a common secret. We use the hash_mix of the sending node/name
space for this purpose, since it can be accessed directly by all other
name spaces in the kernel. Upon reception of a discovery message, the
receiver checks this proof against all the local name spaces'
hash_mix:es.  If it finds a match, that, along with a matching node id
and cluster id, this is deemed sufficient proof that the peer node in
question is in a local name space, and a wormhole can be opened.

- We should also consider that TIPC is intended to be a cluster local
IPC mechanism (just like e.g. UNIX sockets)  rather than a network
protocol, and hence should be given more freedom to shortcut the lower
protocol than other protocols.

Regarding traceability, we should notice that since commit 6c9081a3915d
("tipc: add loopback device tracking") it is possible to follow the node
internal packet flow by just activating tcpdump on the loopback
interface. This will be true even for this mechanism; by activating
tcpdump on the invloved nodes' loopback interfaces their inter-name
space messaging can easily be tracked.

Suggested-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/discover.c   |  10 -
 net/tipc/msg.h|  10 +
 net/tipc/name_distr.c |   2 +-
 net/tipc/node.c   | 100 --
 net/tipc/node.h   |   4 +-
 net/tipc/socket.c |   6 +--
 6 files changed, 121 insertions(+), 11 deletions(-)

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68e8a69..338d402fcf39 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -38,6 +38,8 @@
 #include "node.h"
 #include "discover.h"
 
+#include 
+
 /* min delay during bearer start up */
 #define TIPC_DISC_INIT msecs_to_jiffies(125)
 /* max delay if bearer has no links */
@@ -83,6 +85,7 @@ static void tipc_disc_init_msg(struct net *net, struct 
sk_buff *skb,
struct tipc_net *tn = tipc_net(net);
u32 dest_domain = b->domain;
struct tipc_msg *hdr;
+   u32 hash;
 
hdr = buf_msg(skb);
tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp,
@@ -94,6 +97,10 @@ static void tipc_disc_init_msg(struct net *net, struct 
sk_buff *skb,
msg_set_dest_domain(hdr, dest_domain);
msg_set_bc_netid(hdr, tn->net_id);
b->media->addr2msg(msg_media_addr(hdr), >addr);
+   hash = tn->random;
+   hash ^= net_hash_mix(_net);
+   hash ^= net_hash_mix(net);
+   msg_set_peer_net_hash(hdr, hash);
msg_set_node_id(hdr, tipc_own_id(net));
 }
 
@@ -242,7 +249,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
if (!tipc_in_scope(legacy, b->domain, src))
return;
tipc_node_check_dest(net, src, peer_id, b, caps, signature,
-  

Re: [tipc-discussion] [net-next] tipc: improve throughput between nodes in netns

2019-10-18 Thread Hoang Le
Hi Jon,

Thanks for good description. 
However, w.r.t your comment  "We even need to verify cluster ids.",  I'm still 
unclear why we need to isolate cluster ids here.
I guess the node had been accepted already when bypassed at function 
tipc_disc_rcv. Then, we just check to apply new mechanism for
kernel local namespaces. 

Regars,
Hoang
-Original Message-
From: Jon Maloy  
Sent: Friday, October 18, 2019 2:20 AM
To: Hoang Huu Le ; ma...@donjonn.com; 
tipc-...@dektech.com.au; tipc-discussion@lists.sourceforge.net
Subject: RE: [net-next] tipc: improve throughput between nodes in netns

Hi Hoang,
We need a very good log text to justify this.

My proposal:

"Currently, TIPC transports intra-node user data messages directly socket to 
socket, hence shortcutting all the lower layers of the
communication stack. This gives TIPC very good intra node performance, both 
regarding throughput and latency.

We now introduce a similar mechanism for TIPC data traffic across network name 
spaces located in the same kernel. On the send path,
the call chain is as always accompanied by the sending node's network name 
space pointer. However, once we have reliably established
that the receiving node is represented by a name space on the same host, we 
just replace the name space pointer with the receiving
node/name space's ditto, and follow the regular socket receive patch though the 
receiving node. This technique gives us a throughput
similar to the node internal throughput, several times larger than if we let 
the traffic go though the full network stack. As a
comparison, max throughput for 64k messages is four times larger than TCP 
throughput for the same type of traffic.

To meet any security concerns, the following should be noted. 

- All nodes joining a cluster are supposed to have been be certified and 
authenticated by mechanisms outside TIPC. This is no
different for nodes/name spaces on the same host; they have to auto discover 
each other using the attached interfaces, and establish
links which are supervised via the regular link monitoring mechanism. Hence, a 
kernel local node has no other way to join a cluster
than any other node, and have to obey to policies set in the IP or device 
layers of the stack. 

- Only when a sender has established with 100% certainty that the peer node is 
located in a kernel local name space does it choose
to let user data messages, and only those, take the crossover path to the 
receiving node/name space.

- If the receiving node/name space  is removed, its name space pointer is 
invalidated at all peer nodes, and their neighbor link
monitoring will eventually note that this node is gone.

- To ensure the "100% certainty" criteria, and prevent any possible spoofing, 
received discovery messages must contain a proof that
they know a common secret. We use the hash_mix of the sending node/name space 
for this purpose, since it can be accessed directly by
all other name spaces in the kernel. Upon reception of a discovery message, the 
receiver checks this proof against all the local
name spaces' hash_mix:es.  If it finds a match, that, along with a matching 
node id and cluster id, this is deemed sufficient proof
that the peer node in question is in a local name space, and a wormhole can be 
opened.

- We should also consider that TIPC is intended to be a cluster local IPC 
mechanism (just like e.g. UNIX sockets)  rather than a
network protocol, and hence should be given more freedom to shortcut the lower 
protocol than other protocols.

Regarding traceability, we should notice that since commit 6c9081a3915d ("add 
loopback device tracing") it is possible to follow the
node internal packet flow by just activating tcpdump on the loopback interface. 
This will be true even for this mechanism; by
activating tcpdump on the invloved nodes' loopback interfaces their inter-name 
space messaging can easily be tracked."

I also think there should be a "Suggested-by: Jon Maloy 
" at the bottom of the patch.

See more comments below.


> -Original Message-
> From: Hoang Le 
> Sent: 17-Oct-19 06:10
> To: Jon Maloy ; ma...@donjonn.com; tipc-
> d...@dektech.com.au
> Subject: [net-next] tipc: improve throughput between nodes in netns
> 
> Introduce traffic cross namespaces transmission as intranode.
> By this way, throughput between nodes in namespace as fast as local.
> Looks though the architectural view of TIPC, the new TIPC mechanism for
> containers will not introduce any security or breaking the current policies at
> all:
> 
> 1/ Extranode:
> 
>   Node A Node B
> +-+   +-+
> |  TIPC   |   |  TIPC   |
> |   Application   |   |   Application   |
> |-|  

[tipc-discussion] [iproute2-next v2] tipc: support 128bit node identity for peer removing

2019-10-13 Thread Hoang Le
We add the support to remove a specific node down with 128bit
node identifier, as an alternative to legacy 32-bit node address.

v2: improve usage for 'tipc peer remove' command

Signed-off-by: Hoang Le 
---
 tipc/peer.c | 53 -
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/tipc/peer.c b/tipc/peer.c
index f6380777033d..f14ec35e6f71 100644
--- a/tipc/peer.c
+++ b/tipc/peer.c
@@ -59,17 +59,68 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return msg_doit(nlh, NULL, NULL);
 }
 
+static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+   char buf[MNL_SOCKET_BUFFER_SIZE];
+   __u8 id[16] = {0,};
+   __u64 *w0 = (__u64 *)[0];
+   __u64 *w1 = (__u64 *)[8];
+   struct nlattr *nest;
+   char *str;
+
+   if (cmdl->argc != cmdl->optind + 1) {
+   fprintf(stderr, "Usage: %s peer remove identity NODEID\n",
+   cmdl->argv[0]);
+   return -EINVAL;
+   }
+
+   str = shift_cmdl(cmdl);
+   if (str2nodeid(str, id)) {
+   fprintf(stderr, "Invalid node identity\n");
+   return -EINVAL;
+   }
+
+   nlh = msg_init(buf, TIPC_NL_PEER_REMOVE);
+   if (!nlh) {
+   fprintf(stderr, "error, message initialisation failed\n");
+   return -1;
+   }
+
+   nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET);
+   mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0);
+   mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1);
+   mnl_attr_nest_end(nlh, nest);
+
+   return msg_doit(nlh, NULL, NULL);
+}
+
 static void cmd_peer_rm_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s peer remove PROPERTY\n\n"
+   "PROPERTIES\n"
+   " identity NODEID - Remove peer node identity\n",
+   cmdl->argv[0]);
+}
+
+static void cmd_peer_rm_addr_help(struct cmdl *cmdl)
 {
fprintf(stderr, "Usage: %s peer remove address ADDRESS\n",
cmdl->argv[0]);
 }
 
+static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s peer remove identity NODEID\n",
+   cmdl->argv[0]);
+}
+
 static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd,
struct cmdl *cmdl, void *data)
 {
const struct cmd cmds[] = {
-   { "address",cmd_peer_rm_addr,   cmd_peer_rm_help },
+   { "address",  cmd_peer_rm_addr,   cmd_peer_rm_addr_help },
+   { "identity", cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help },
{ NULL }
};
 
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next v2] tipc: support 128bit node identity for peer removing

2019-10-13 Thread Hoang Le
We add the support to remove a specific node down with 128bit
node identifier, as an alternative to legacy 32-bit node address.

v2: improve usage for 'tipc peer remove' command

Signed-off-by: Hoang Le 
---
 tipc/peer.c | 55 -
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/tipc/peer.c b/tipc/peer.c
index f6380777033d..e1517743f80f 100644
--- a/tipc/peer.c
+++ b/tipc/peer.c
@@ -59,17 +59,70 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return msg_doit(nlh, NULL, NULL);
 }
 
+static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+   char buf[MNL_SOCKET_BUFFER_SIZE];
+   __u8 id[16] = {0,};
+   __u64 *w0 = (__u64 *)[0];
+   __u64 *w1 = (__u64 *)[8];
+   struct nlattr *nest;
+   char *str;
+
+   if (cmdl->argc != cmdl->optind + 1) {
+   fprintf(stderr, "Usage: %s peer remove nodeid NODEID\n",
+   cmdl->argv[0]);
+   return -EINVAL;
+   }
+
+   str = shift_cmdl(cmdl);
+   if (str2nodeid(str, id)) {
+   fprintf(stderr, "Invalid node identity\n");
+   return -EINVAL;
+   }
+
+   nlh = msg_init(buf, TIPC_NL_PEER_REMOVE);
+   if (!nlh) {
+   fprintf(stderr, "error, message initialisation failed\n");
+   return -1;
+   }
+
+   nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET);
+   mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0);
+   mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1);
+   mnl_attr_nest_end(nlh, nest);
+
+   return msg_doit(nlh, NULL, NULL);
+}
+
 static void cmd_peer_rm_help(struct cmdl *cmdl)
+{
+   fprintf(stderr,
+   "Usage: %s peer remove PROPERTY\n\n"
+   "PROPERTIES\n"
+   " address  - Remove peer node address\n"
+   " nodeid   - Remove peer node identity\n",
+   cmdl->argv[0]);
+}
+
+static void cmd_peer_rm_addr_help(struct cmdl *cmdl)
 {
fprintf(stderr, "Usage: %s peer remove address ADDRESS\n",
cmdl->argv[0]);
 }
 
+static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s peer remove nodeid NODEID\n",
+   cmdl->argv[0]);
+}
+
 static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd,
struct cmdl *cmdl, void *data)
 {
const struct cmd cmds[] = {
-   { "address",cmd_peer_rm_addr,   cmd_peer_rm_help },
+   { "address", cmd_peer_rm_addr,   cmd_peer_rm_addr_help },
+   { "nodeid",  cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help },
{ NULL }
};
 
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: support 128bit node identity for peer removing

2019-10-13 Thread Hoang Le
We add the support to remove a specific node down with 128bit
node identifier, as an alternative to legacy 32-bit node address.

Signed-off-by: Hoang Le 
---
 net/tipc/node.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index c8f6177dd5a2..152b98b2e8f5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1926,8 +1926,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct 
genl_info *info)
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+   u8 node_id[NODE_ID_LEN];
+   u64 *w0 = (u64 *)_id[0];
+   u64 *w1 = (u64 *)_id[8];
struct tipc_node *peer;
-   u32 addr;
+   u32 addr = 0;
int err;
 
/* We identify the peer by its net */
@@ -1940,16 +1943,26 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct 
genl_info *info)
if (err)
return err;
 
-   if (!attrs[TIPC_NLA_NET_ADDR])
-   return -EINVAL;
-
-   addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+   if (attrs[TIPC_NLA_NET_ADDR]) {
+   addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+   if (!addr)
+   return -EINVAL;
+   if (in_own_node(net, addr))
+   return -ENOTSUPP;
+   }
 
-   if (in_own_node(net, addr))
-   return -ENOTSUPP;
+   if (attrs[TIPC_NLA_NET_NODEID]) {
+   if (!attrs[TIPC_NLA_NET_NODEID_W1])
+   return -EINVAL;
+   *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
+   *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
+   }
 
spin_lock_bh(>node_list_lock);
-   peer = tipc_node_find(net, addr);
+   if (!addr)
+   peer = tipc_node_find_by_id(net, node_id);
+   else
+   peer = tipc_node_find(net, addr);
if (!peer) {
spin_unlock_bh(>node_list_lock);
return -ENXIO;
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next] tipc: support 128bit node identity for peer removing

2019-10-13 Thread Hoang Le
We add the support to remove a specific node down with 128bit
node identifier, as an alternative to legacy 32-bit node address.

Signed-off-by: Hoang Le 
---
 tipc/peer.c | 54 -
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/tipc/peer.c b/tipc/peer.c
index f6380777033d..9f116b257fda 100644
--- a/tipc/peer.c
+++ b/tipc/peer.c
@@ -59,17 +59,69 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return msg_doit(nlh, NULL, NULL);
 }
 
+static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+   char buf[MNL_SOCKET_BUFFER_SIZE];
+   __u8 id[16] = {0,};
+   __u64 *w0 = (__u64 *)[0];
+   __u64 *w1 = (__u64 *)[8];
+   struct nlattr *nest;
+   char *str;
+
+   if (cmdl->argc != cmdl->optind + 1) {
+   fprintf(stderr, "Usage: %s peer remove nodeid NODEID\n",
+   cmdl->argv[0]);
+   return -EINVAL;
+   }
+
+   str = shift_cmdl(cmdl);
+   if (str2nodeid(str, id)) {
+   fprintf(stderr, "Invalid node identity\n");
+   return -EINVAL;
+   }
+
+   nlh = msg_init(buf, TIPC_NL_PEER_REMOVE);
+   if (!nlh) {
+   fprintf(stderr, "error, message initialisation failed\n");
+   return -1;
+   }
+
+   nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET);
+   mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0);
+   mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1);
+   mnl_attr_nest_end(nlh, nest);
+
+   return msg_doit(nlh, NULL, NULL);
+}
+
 static void cmd_peer_rm_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s peer remove PROPERTY\n\n",
+   "PROPERTIES\n"
+   " address  - Remove peer node address\n"
+   " nodeid   - Remove peer node identity\n",
+   cmdl->argv[0]);
+}
+
+static void cmd_peer_rm_addr_help(struct cmdl *cmdl)
 {
fprintf(stderr, "Usage: %s peer remove address ADDRESS\n",
cmdl->argv[0]);
 }
 
+static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s peer remove nodeid NODEID\n",
+   cmdl->argv[0]);
+}
+
 static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd,
struct cmdl *cmdl, void *data)
 {
const struct cmd cmds[] = {
-   { "address",cmd_peer_rm_addr,   cmd_peer_rm_help },
+   { "address", cmd_peer_rm_addr,   cmd_peer_rm_addr_help },
+   { "nodeid",  cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help },
{ NULL }
};
 
-- 
2.20.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: improve throughput between nodes in netns

2019-10-02 Thread Hoang Le
Introduce traffic cross namespaces transmission as local node.
By this way, throughput between nodes in namespace as fast as local.

Testcase:
$ip netns exec 1 benchmark_client -c 100
$ip netns exec 2 benchmark_server

Before:
+-+
|  Msg Size  | # |  # Msgs/  |  Elapsed  |Throughput
  |
|  [octets]  | Conns |Conn   |  [ms] 
++
||   |   |   | Total [Msg/s] | Total [Mb/s] | 
Per Conn [Mb/s] |
+-+
|64  |  100  |64000  |13005  |   492103  | 251  |   
   2  |
+-+
|   256  |  100  |32000  | 4964  |   644627  |1320  |   
  13  |
+-+
|  1024  |  100  |16000  | 4524  |   353612  |2896  |   
  28  |
+-+
|  4096  |  100  | 8000  | 3675  |   217644  |7131  |   
  71  |
+-+
| 16384  |  100  | 4000  | 7914  |50540  |6624  |   
  66  |
+-+
| 65536  |  100  | 2000  |13000  |15384  |8065  |   
  80  |
+-+

After:
+-+
|  Msg Size  | # |  # Msgs/  |  Elapsed  |Throughput
  |
|  [octets]  | Conns |Conn   |  [ms] 
++
||   |   |   | Total [Msg/s] | Total [Mb/s] | 
Per Conn [Mb/s] |
+-+
|64  |  100  |64000  | 7842  |   816090  | 417  |   
   4  |
+-+
|   256  |  100  |32000  | 3593  |   890469  |1823  |   
  18  |
+-+
|  1024  |  100  |16000  | 1835  |   871828  |7142  |   
  71  |
+-+
|  4096  |  100  | 8000  | 1134  |   704904  |   23098  |   
 230  |
+-+
| 16384  |  100  | 4000  |  878  |   455295  |   59676  |   
 596  |
+-+
| 65536  |  100  | 2000  | 1007  |   198487  |  104064  |   
1040  |
+-+

Signed-off-by: Hoang Le 
---
 net/tipc/discover.c   |  6 ++-
 net/tipc/msg.h| 10 +
 net/tipc/name_distr.c |  2 +-
 net/tipc/node.c   | 94 +--
 net/tipc/node.h   |  4 +-
 net/tipc/socket.c |  6 +--
 6 files changed, 111 insertions(+), 11 deletions(-)

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68e8a69..98d4eea97eb7 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -38,6 +38,8 @@
 #include "node.h"
 #include "discover.h"
 
+#include 
+
 /* min delay during bearer start up */
 #define TIPC_DISC_INIT msecs_to_jiffies(125)
 /* max delay if bearer has no links */
@@ -94,6 +96,7 @@ static void tipc_disc_init_msg(struct net *net, struct 
sk_buff *skb,
msg_set_dest_domain(hdr, dest_domain);
msg_set_bc_netid(hdr, tn->net_id);
b->media->addr2msg(msg_media_addr(hdr), >addr);
+   msg_set_peer_net_hash(hdr, net_hash_mix(net));
msg_set_node_id(hdr, tipc_own_id(net));
 }
 
@@ -200,6 +203,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
u8 peer_id[NODE_ID_LEN] = {0,};
u32 dst = msg_dest_domain(hdr);
u32 net_id = msg_bc_netid(hdr);
+   u32 pnet_hash = msg_peer_net_hash(hdr);
struct tipc_media_addr maddr;
u32 src = msg_prevnode(hdr);
u32 mtyp = msg_type(hdr);
@@ -242,7 +246,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
if (

Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation

2019-07-30 Thread Hoang Le
Hi Jon,

Please ignore previous results because I'm wrong on testing ICMP starved by 
TIPC.

Regards,
Hoang
-Original Message-
From: Hoang Le  
Sent: Tuesday, July 30, 2019 11:24 AM
To: 'tung quang nguyen' ; 
tipc-discussion@lists.sourceforge.net; 'Jon Maloy' ;
'Jon Maloy' ; ying@windriver.com
Subject: Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup 
queue starvation

Hi Jon,

I combine benchmark test with 50 connections and ping cmd from two nodes.
You can compare results from original code, your fix and Tung's fix as 
following:

Original code:
node1 ~ # ping -s 1400 10.0.0.2 -c 300
PING 10.0.0.2 (10.0.0.2): 1400 data bytes
1408 bytes from 10.0.0.2: seq=22 ttl=64 time=1.337 ms
1408 bytes from 10.0.0.2: seq=24 ttl=64 time=1.208 ms
1408 bytes from 10.0.0.2: seq=25 ttl=64 time=1.145 ms
1408 bytes from 10.0.0.2: seq=76 ttl=64 time=1.145 ms
1408 bytes from 10.0.0.2: seq=78 ttl=64 time=1.449 ms
1408 bytes from 10.0.0.2: seq=130 ttl=64 time=1.230 ms
1408 bytes from 10.0.0.2: seq=134 ttl=64 time=1.020 ms
1408 bytes from 10.0.0.2: seq=185 ttl=64 time=1.743 ms
1408 bytes from 10.0.0.2: seq=186 ttl=64 time=1.502 ms
1408 bytes from 10.0.0.2: seq=187 ttl=64 time=1.289 ms
1408 bytes from 10.0.0.2: seq=189 ttl=64 time=1.306 ms
1408 bytes from 10.0.0.2: seq=239 ttl=64 time=1.254 ms
1408 bytes from 10.0.0.2: seq=241 ttl=64 time=1.114 ms
1408 bytes from 10.0.0.2: seq=242 ttl=64 time=1.058 ms

--- 10.0.0.2 ping statistics ---
301 packets transmitted, 301 packets received, 0% packet loss
round-trip min/avg/max = 0.077/0.361/1.743 ms

- JON's fix
node1 ~ # ping -s 1400 10.0.0.2 -c 300
1408 bytes from 10.0.0.2: seq=22 ttl=64 time=1.013 ms
1408 bytes from 10.0.0.2: seq=87 ttl=64 time=2.468 ms

--- 10.0.0.2 ping statistics ---
300 packets transmitted, 300 packets received, 0% packet loss
round-trip min/avg/max = 0.119/0.323/2.468 ms
node1 ~ #

- Tung's fix
node1 ~ # ping -s 1400 10.0.0.2 -c 300
--- 10.0.0.2 ping statistics ---
300 packets transmitted, 300 packets received, 0% packet loss
round-trip min/avg/max = 0.101/0.303/0.864 ms

>From ping statistics, I could see your solution starved twice and maximum time 
>is 2.468 ms.
Then, we're not completely solve the issue yet. But test results from Tung's 
fix, I don't see a starvation happen.  
So, I think we can go ahead with Tung's code fixed. Please give me your idea.

Regards,
Hoang
-Original Message-
From: tung quang nguyen  
Sent: Thursday, July 25, 2019 5:50 PM
To: 'Jon Maloy' ; 'Jon Maloy' ; 
tipc-discussion@lists.sourceforge.net;
ying@windriver.com
Subject: Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup 
queue starvation

Hi Jon,

Let's go for this way for now.

Thanks.

Best regards,
Tung Nguyen

-Original Message-
From: Jon Maloy  
Sent: Friday, July 19, 2019 10:06 AM
To: Jon Maloy ; Jon Maloy 
Cc: mohan.krishna.ghanta.krishnamur...@ericsson.com;
parthasarathy.bhuvara...@gmail.com; tung.q.ngu...@dektech.com.au;
hoang.h...@dektech.com.au; canh.d@dektech.com.au;
tuong.t.l...@dektech.com.au; gordan.mihalje...@dektech.com.au;
ying@windriver.com; tipc-discussion@lists.sourceforge.net
Subject: [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation

In commit 365ad353c256 ("tipc: reduce risk of user starvation during
link congestion") we allowed senders to add exactly one list of extra
buffers to the link backlog queues during link congestion (aka
"oversubscription"). However, the criteria for when to stop adding
wakeup messages to the input queue when the overload abates is
inaccurate, and may cause starvation problems during very high load.

Currently, we stop adding wakeup messages after 10 total failed attempts
where we find that there is no space left in the backlog queue for a
certain importance level. The counter for this is accumulated across all
levels, which may lead the algorithm to leave the loop prematurely,
although there may still be plenty of space available at some levels.
The result is sometimes that messages near the wakeup queue tail are not
added to the input queue as they should be.

We now introduce a more exact algorithm, where we keep adding wakeup
messages to a level as long as the backlog queue has free slots for
the corresponding level, and stop at the moment there are no more such
slots or when there are no more wakeup messages to dequeue.

Fixes: 365ad35 ("tipc: reduce risk of user starvation during link
congestion")
Reported-by: Tung Nguyen 
Signed-off-by: Jon Maloy 
---
 net/tipc/link.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 66d3a07..f1d2732 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -853,18 +853,31 @@ static int link_schedule_user(struct tipc_link *l,
struct tipc_msg *hdr)
  */
 static void link_prepare_wakeup(struct tipc_link *l)
 {
+   struct sk_buff_head *wakeupq = >wakeupq;
+   struct sk_buff_h

Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation

2019-07-29 Thread Hoang Le
Hi Jon,

I combine benchmark test with 50 connections and ping cmd from two nodes.
You can compare results from original code, your fix and Tung's fix as 
following:

Original code:
node1 ~ # ping -s 1400 10.0.0.2 -c 300
PING 10.0.0.2 (10.0.0.2): 1400 data bytes
1408 bytes from 10.0.0.2: seq=22 ttl=64 time=1.337 ms
1408 bytes from 10.0.0.2: seq=24 ttl=64 time=1.208 ms
1408 bytes from 10.0.0.2: seq=25 ttl=64 time=1.145 ms
1408 bytes from 10.0.0.2: seq=76 ttl=64 time=1.145 ms
1408 bytes from 10.0.0.2: seq=78 ttl=64 time=1.449 ms
1408 bytes from 10.0.0.2: seq=130 ttl=64 time=1.230 ms
1408 bytes from 10.0.0.2: seq=134 ttl=64 time=1.020 ms
1408 bytes from 10.0.0.2: seq=185 ttl=64 time=1.743 ms
1408 bytes from 10.0.0.2: seq=186 ttl=64 time=1.502 ms
1408 bytes from 10.0.0.2: seq=187 ttl=64 time=1.289 ms
1408 bytes from 10.0.0.2: seq=189 ttl=64 time=1.306 ms
1408 bytes from 10.0.0.2: seq=239 ttl=64 time=1.254 ms
1408 bytes from 10.0.0.2: seq=241 ttl=64 time=1.114 ms
1408 bytes from 10.0.0.2: seq=242 ttl=64 time=1.058 ms

--- 10.0.0.2 ping statistics ---
301 packets transmitted, 301 packets received, 0% packet loss
round-trip min/avg/max = 0.077/0.361/1.743 ms

- JON's fix
node1 ~ # ping -s 1400 10.0.0.2 -c 300
1408 bytes from 10.0.0.2: seq=22 ttl=64 time=1.013 ms
1408 bytes from 10.0.0.2: seq=87 ttl=64 time=2.468 ms

--- 10.0.0.2 ping statistics ---
300 packets transmitted, 300 packets received, 0% packet loss
round-trip min/avg/max = 0.119/0.323/2.468 ms
node1 ~ #

- Tung's fix
node1 ~ # ping -s 1400 10.0.0.2 -c 300
--- 10.0.0.2 ping statistics ---
300 packets transmitted, 300 packets received, 0% packet loss
round-trip min/avg/max = 0.101/0.303/0.864 ms

>From ping statistics, I could see your solution starved twice and maximum time 
>is 2.468 ms.
Then, we're not completely solve the issue yet. But test results from Tung's 
fix, I don't see a starvation happen.  
So, I think we can go ahead with Tung's code fixed. Please give me your idea.

Regards,
Hoang
-Original Message-
From: tung quang nguyen  
Sent: Thursday, July 25, 2019 5:50 PM
To: 'Jon Maloy' ; 'Jon Maloy' ; 
tipc-discussion@lists.sourceforge.net;
ying@windriver.com
Subject: Re: [tipc-discussion] [net-next v2 1/1] tipc: reduce risk of wakeup 
queue starvation

Hi Jon,

Let's go for this way for now.

Thanks.

Best regards,
Tung Nguyen

-Original Message-
From: Jon Maloy  
Sent: Friday, July 19, 2019 10:06 AM
To: Jon Maloy ; Jon Maloy 
Cc: mohan.krishna.ghanta.krishnamur...@ericsson.com;
parthasarathy.bhuvara...@gmail.com; tung.q.ngu...@dektech.com.au;
hoang.h...@dektech.com.au; canh.d@dektech.com.au;
tuong.t.l...@dektech.com.au; gordan.mihalje...@dektech.com.au;
ying@windriver.com; tipc-discussion@lists.sourceforge.net
Subject: [net-next v2 1/1] tipc: reduce risk of wakeup queue starvation

In commit 365ad353c256 ("tipc: reduce risk of user starvation during
link congestion") we allowed senders to add exactly one list of extra
buffers to the link backlog queues during link congestion (aka
"oversubscription"). However, the criteria for when to stop adding
wakeup messages to the input queue when the overload abates is
inaccurate, and may cause starvation problems during very high load.

Currently, we stop adding wakeup messages after 10 total failed attempts
where we find that there is no space left in the backlog queue for a
certain importance level. The counter for this is accumulated across all
levels, which may lead the algorithm to leave the loop prematurely,
although there may still be plenty of space available at some levels.
The result is sometimes that messages near the wakeup queue tail are not
added to the input queue as they should be.

We now introduce a more exact algorithm, where we keep adding wakeup
messages to a level as long as the backlog queue has free slots for
the corresponding level, and stop at the moment there are no more such
slots or when there are no more wakeup messages to dequeue.

Fixes: 365ad35 ("tipc: reduce risk of user starvation during link
congestion")
Reported-by: Tung Nguyen 
Signed-off-by: Jon Maloy 
---
 net/tipc/link.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 66d3a07..f1d2732 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -853,18 +853,31 @@ static int link_schedule_user(struct tipc_link *l,
struct tipc_msg *hdr)
  */
 static void link_prepare_wakeup(struct tipc_link *l)
 {
+   struct sk_buff_head *wakeupq = >wakeupq;
+   struct sk_buff_head *inputq = l->inputq;
struct sk_buff *skb, *tmp;
-   int imp, i = 0;
+   struct sk_buff_head tmpq;
+   int avail[5] = {0,};
+   int imp = 0;
+
+   __skb_queue_head_init();
 
-   skb_queue_walk_safe(>wakeupq, skb, tmp) {
+   for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++)
+   avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
+
+   skb_queue_walk_safe(wakeupq, skb, tmp) {

[tipc-discussion] [net-next] tipc: fix retransmission failure when link re-established

2019-07-16 Thread Hoang Le
Currently a link is declared stale and reset if stale limit time is longer
than link tolerance time. But, this stale limit does not initial correctly
when the link is resetting. This lead to a link declared as failure
because reset criteria always passed though no packet re-transmitted when
link is re-establishing.

To fix this, we set stale limit time far into the future in two places:
reset a link and acked from peer.

Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria")
Signed-off-by: Hoang Le 
---
 net/tipc/link.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 66d3a07bc571..2ba79d451f08 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -909,6 +909,7 @@ void tipc_link_reset(struct tipc_link *l)
l->silent_intv_cnt = 0;
l->rst_cnt = 0;
l->bc_peer_is_up = false;
+   l->stale_limit = msecs_to_jiffies(~0);
memset(>mon_state, 0, sizeof(l->mon_state));
tipc_link_reset_stats(l);
 }
@@ -1510,6 +1511,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff 
*skb,
 
/* Forward queues and wake up waiting users */
if (likely(tipc_link_release_pkts(l, msg_ack(hdr {
+   l->stale_limit = msecs_to_jiffies(~0);
tipc_link_advance_backlog(l, xmitq);
if (unlikely(!skb_queue_empty(>wakeupq)))
link_prepare_wakeup(l);
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next v6] tipc: support interface name when activating UDP bearer

2019-06-24 Thread Hoang Le
Support for indicating interface name has an ip address in parallel
with specifying ip address when activating UDP bearer.
This liberates the user from keeping track of the current ip address
for each device.

Old command syntax:
$tipc bearer enable media udp name NAME localip IP

New command syntax:
$tipc bearer enable media udp name NAME [localip IP|dev DEVICE]

v2:
- Removed initial value for fd
- Fixed the returning value for cmd_bearer_validate_and_get_addr
  to make its consistent with using: zero or non-zero
v3: - Switch to use helper 'get_ifname' to retrieve interface name
v4: - Replace legacy SIOCGIFADDR by netlink
v5: - Fix leaky rtnl_handle

Acked-by: Ying Xue 
Signed-off-by: Hoang Le 
---
 tipc/bearer.c | 94 ---
 1 file changed, 89 insertions(+), 5 deletions(-)

diff --git a/tipc/bearer.c b/tipc/bearer.c
index 1f3a4d1e..4470819e4a96 100644
--- a/tipc/bearer.c
+++ b/tipc/bearer.c
@@ -19,10 +19,12 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 
+#include "utils.h"
 #include "cmdl.h"
 #include "msg.h"
 #include "bearer.h"
@@ -68,7 +70,7 @@ static void cmd_bearer_enable_l2_help(struct cmdl *cmdl, char 
*media)
 static void cmd_bearer_enable_udp_help(struct cmdl *cmdl, char *media)
 {
fprintf(stderr,
-   "Usage: %s bearer enable [OPTIONS] media %s name NAME localip 
IP [UDP OPTIONS]\n\n"
+   "Usage: %s bearer enable [OPTIONS] media %s name NAME [localip 
IP|device DEVICE] [UDP OPTIONS]\n\n"
"OPTIONS\n"
" domain DOMAIN - Discovery domain\n"
" priority PRIORITY - Bearer priority\n\n"
@@ -119,6 +121,76 @@ static int generate_multicast(short af, char *buf, int 
bufsize)
return 0;
 }
 
+static struct ifreq ifr;
+static int nl_dump_req_filter(struct nlmsghdr *nlh, int reqlen)
+{
+   struct ifaddrmsg *ifa = NLMSG_DATA(nlh);
+
+   ifa->ifa_index = ifr.ifr_ifindex;
+
+   return 0;
+}
+
+static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg)
+{
+   struct ifaddrmsg *ifa = NLMSG_DATA(nlh);
+   char *r_addr = (char *)arg;
+   int len = nlh->nlmsg_len;
+   struct rtattr *addr_attr;
+
+   if (ifr.ifr_ifindex != ifa->ifa_index)
+   return 0;
+
+   if (strlen(r_addr) > 0)
+   return 0;
+
+   addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa),
+len - NLMSG_LENGTH(sizeof(*ifa)));
+   if (!addr_attr)
+   return 0;
+
+   if (ifa->ifa_family == AF_INET) {
+   struct sockaddr_in ip4addr;
+   memcpy(_addr, RTA_DATA(addr_attr),
+  sizeof(struct in_addr));
+   inet_ntop(AF_INET, _addr, r_addr,
+ INET_ADDRSTRLEN);
+   } else if (ifa->ifa_family == AF_INET6) {
+   struct sockaddr_in6 ip6addr;
+   memcpy(_addr, RTA_DATA(addr_attr),
+  sizeof(struct in6_addr));
+   inet_ntop(AF_INET6, _addr, r_addr,
+ INET6_ADDRSTRLEN);
+   }
+   return 0;
+}
+
+static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr)
+{
+   struct rtnl_handle rth = { .fd = -1 };
+   int err = -1;
+
+   memset(, 0, sizeof(ifr));
+   if (!name || !r_addr || get_ifname(ifr.ifr_name, name))
+   return err;
+
+   ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name);
+   if (!ifr.ifr_ifindex)
+   return err;
+
+   /* remove from cache */
+   ll_drop_by_index(ifr.ifr_ifindex);
+
+   if ((err = rtnl_open(, 0)) < 0)
+   return err;
+
+   if ((err = rtnl_addrdump_req(, AF_UNSPEC, nl_dump_req_filter)) > 0)
+   err = rtnl_dump_filter(, nl_dump_addr_filter, r_addr);
+
+   rtnl_close();
+   return err;
+}
+
 static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts,
  struct cmdl *cmdl)
 {
@@ -136,13 +208,25 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, 
struct opt *opts,
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_DGRAM
};
+   char addr[INET6_ADDRSTRLEN] = {0};
 
-   if (!(opt = get_opt(opts, "localip"))) {
-   fprintf(stderr, "error, udp bearer localip missing\n");
-   cmd_bearer_enable_udp_help(cmdl, "udp");
+   opt = get_opt(opts, "device");
+   if (opt && cmd_bearer_validate_and_get_addr(opt->val, addr) < 0) {
+   fprintf(stderr, "error, no device name available\n");
return -EINVAL;
}
-   locip = opt->val;
+
+   if (strlen(addr) > 0) {
+   locip = addr;
+   } else {
+   opt = get_opt(

Re: [tipc-discussion] [iproute2-next v5] tipc: support interface name when activating UDP bearer

2019-06-23 Thread Hoang Le
Thanks David. I will update code change as your comments.

For the item:
> + /* remove from cache */
> + ll_drop_by_index(ifr.ifr_ifindex);

why the call to ll_drop_by_index? doing so means that ifindex is looked
up again.

[Hoang]
> + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name);
This function stored an entry ll_cache in hash map table. We have to call this 
function to prevent memory leaked. 

Regards,
Hoang
-Original Message-
From: David Ahern  
Sent: Saturday, June 22, 2019 5:50 AM
To: Hoang Le ; dsah...@gmail.com; 
jon.ma...@ericsson.com; ma...@donjonn.com; ying@windriver.com; 
net...@vger.kernel.org; tipc-discussion@lists.sourceforge.net
Subject: Re: [iproute2-next v5] tipc: support interface name when activating 
UDP bearer

On 6/13/19 2:07 AM, Hoang Le wrote:
> @@ -119,6 +121,74 @@ static int generate_multicast(short af, char *buf, int 
> bufsize)
>   return 0;
>  }
>  
> +static struct ifreq ifr = {};

you don't need to initialize globals, but you could pass a a struct as
the arg to the filter here which is both the addr buffer and the ifindex
of interest.

> +static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg)
> +{
> + struct ifaddrmsg *ifa = NLMSG_DATA(nlh);
> + char *r_addr = (char *)arg;
> + int len = nlh->nlmsg_len;
> + struct rtattr *addr_attr;
> +
> + if (ifr.ifr_ifindex != ifa->ifa_index)
> + return 0;
> +
> + if (strlen(r_addr) > 0)
> + return 1;
> +
> + addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa),
> +  len - NLMSG_LENGTH(sizeof(*ifa)));
> + if (!addr_attr)
> + return 0;
> +
> + if (ifa->ifa_family == AF_INET) {
> + struct sockaddr_in ip4addr;
> + memcpy(_addr, RTA_DATA(addr_attr),
> +sizeof(struct in_addr));
> + if (inet_ntop(AF_INET, _addr, r_addr,
> +   INET_ADDRSTRLEN) == NULL)
> + return 0;
> + } else if (ifa->ifa_family == AF_INET6) {
> + struct sockaddr_in6 ip6addr;
> + memcpy(_addr, RTA_DATA(addr_attr),
> +sizeof(struct in6_addr));
> + if (inet_ntop(AF_INET6, _addr, r_addr,
> +   INET6_ADDRSTRLEN) == NULL)
> + return 0;
> + }
> + return 1;
> +}
> +
> +static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr)
> +{
> + struct rtnl_handle rth ={ .fd = -1 };

space between '={'

> +
> + memset(, 0, sizeof(ifr));
> + if (!name || !r_addr || get_ifname(ifr.ifr_name, name))
> + return 0;
> +
> + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name);
> + if (!ifr.ifr_ifindex)
> + return 0;
> +
> + /* remove from cache */
> + ll_drop_by_index(ifr.ifr_ifindex);

why the call to ll_drop_by_index? doing so means that ifindex is looked
up again.

> +
> + if (rtnl_open(, 0) < 0)
> + return 0;
> +
> + if (rtnl_addrdump_req(, AF_UNSPEC, 0) < 0) {

If you pass a filter here to set ifa_index, this command on newer
kernels will be much more efficient. See ipaddr_dump_filter.


> + rtnl_close();
> + return 0;
> + }
> +
> + if (rtnl_dump_filter(, nl_dump_addr_filter, r_addr) < 0) {
> + rtnl_close();
> + return 0;
> + }
> + rtnl_close();
> + return 1;
> +}

it would better to have 1 exit with the rtnl_close and return rc based
on above.



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next v5] tipc: support interface name when activating UDP bearer

2019-06-13 Thread Hoang Le
Support for indicating interface name has an ip address in parallel
with specifying ip address when activating UDP bearer.
This liberates the user from keeping track of the current ip address
for each device.

Old command syntax:
$tipc bearer enable media udp name NAME localip IP

New command syntax:
$tipc bearer enable media udp name NAME [localip IP|dev DEVICE]

v2:
- Removed initial value for fd
- Fixed the returning value for cmd_bearer_validate_and_get_addr
  to make its consistent with using: zero or non-zero
v3: - Switch to use helper 'get_ifname' to retrieve interface name
v4: - Replace legacy SIOCGIFADDR by netlink
v5: - Fix leaky rtnl_handle

Acked-by: Ying Xue 
Signed-off-by: Hoang Le 
---
 tipc/bearer.c | 92 ---
 1 file changed, 87 insertions(+), 5 deletions(-)

diff --git a/tipc/bearer.c b/tipc/bearer.c
index 1f3a4d1e..e17e2477c1ad 100644
--- a/tipc/bearer.c
+++ b/tipc/bearer.c
@@ -19,10 +19,12 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 
+#include "utils.h"
 #include "cmdl.h"
 #include "msg.h"
 #include "bearer.h"
@@ -68,7 +70,7 @@ static void cmd_bearer_enable_l2_help(struct cmdl *cmdl, char 
*media)
 static void cmd_bearer_enable_udp_help(struct cmdl *cmdl, char *media)
 {
fprintf(stderr,
-   "Usage: %s bearer enable [OPTIONS] media %s name NAME localip 
IP [UDP OPTIONS]\n\n"
+   "Usage: %s bearer enable [OPTIONS] media %s name NAME [localip 
IP|device DEVICE] [UDP OPTIONS]\n\n"
"OPTIONS\n"
" domain DOMAIN - Discovery domain\n"
" priority PRIORITY - Bearer priority\n\n"
@@ -119,6 +121,74 @@ static int generate_multicast(short af, char *buf, int 
bufsize)
return 0;
 }
 
+static struct ifreq ifr = {};
+static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg)
+{
+   struct ifaddrmsg *ifa = NLMSG_DATA(nlh);
+   char *r_addr = (char *)arg;
+   int len = nlh->nlmsg_len;
+   struct rtattr *addr_attr;
+
+   if (ifr.ifr_ifindex != ifa->ifa_index)
+   return 0;
+
+   if (strlen(r_addr) > 0)
+   return 1;
+
+   addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa),
+len - NLMSG_LENGTH(sizeof(*ifa)));
+   if (!addr_attr)
+   return 0;
+
+   if (ifa->ifa_family == AF_INET) {
+   struct sockaddr_in ip4addr;
+   memcpy(_addr, RTA_DATA(addr_attr),
+  sizeof(struct in_addr));
+   if (inet_ntop(AF_INET, _addr, r_addr,
+ INET_ADDRSTRLEN) == NULL)
+   return 0;
+   } else if (ifa->ifa_family == AF_INET6) {
+   struct sockaddr_in6 ip6addr;
+   memcpy(_addr, RTA_DATA(addr_attr),
+  sizeof(struct in6_addr));
+   if (inet_ntop(AF_INET6, _addr, r_addr,
+ INET6_ADDRSTRLEN) == NULL)
+   return 0;
+   }
+   return 1;
+}
+
+static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr)
+{
+   struct rtnl_handle rth ={ .fd = -1 };
+
+   memset(, 0, sizeof(ifr));
+   if (!name || !r_addr || get_ifname(ifr.ifr_name, name))
+   return 0;
+
+   ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name);
+   if (!ifr.ifr_ifindex)
+   return 0;
+
+   /* remove from cache */
+   ll_drop_by_index(ifr.ifr_ifindex);
+
+   if (rtnl_open(, 0) < 0)
+   return 0;
+
+   if (rtnl_addrdump_req(, AF_UNSPEC, 0) < 0) {
+   rtnl_close();
+   return 0;
+   }
+
+   if (rtnl_dump_filter(, nl_dump_addr_filter, r_addr) < 0) {
+   rtnl_close();
+   return 0;
+   }
+   rtnl_close();
+   return 1;
+}
+
 static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts,
  struct cmdl *cmdl)
 {
@@ -136,13 +206,25 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, 
struct opt *opts,
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_DGRAM
};
+   char addr[INET6_ADDRSTRLEN] = {0};
 
-   if (!(opt = get_opt(opts, "localip"))) {
-   fprintf(stderr, "error, udp bearer localip missing\n");
-   cmd_bearer_enable_udp_help(cmdl, "udp");
+   opt = get_opt(opts, "device");
+   if (opt && !cmd_bearer_validate_and_get_addr(opt->val, addr)) {
+   fprintf(stderr, "error, no device name available\n");
return -EINVAL;
}
-   locip = opt->val;
+
+   if (strlen(addr) > 0) {
+   locip = addr;
+   } else {
+   opt = get_opt(opts, "lo

[tipc-discussion] [iproute2-next v4] tipc: support interface name when activating UDP bearer

2019-06-13 Thread Hoang Le
Support for indicating interface name has an ip address in parallel
with specifying ip address when activating UDP bearer.
This liberates the user from keeping track of the current ip address
for each device.

Old command syntax:
$tipc bearer enable media udp name NAME localip IP

New command syntax:
$tipc bearer enable media udp name NAME [localip IP|dev DEVICE]

v2:
- Removed initial value for fd
- Fixed the returning value for cmd_bearer_validate_and_get_addr
  to make its consistent with using: zero or non-zero
v3:
- Switch to use helper 'get_ifname' to retrieve interface name

v4:
- Replace legacy SIOCGIFADDR using by netlink

Acked-by: Ying Xue 
Signed-off-by: Hoang Le 
---
 tipc/bearer.c | 89 ---
 1 file changed, 84 insertions(+), 5 deletions(-)

diff --git a/tipc/bearer.c b/tipc/bearer.c
index 1f3a4d1e..367ec8a2630f 100644
--- a/tipc/bearer.c
+++ b/tipc/bearer.c
@@ -19,10 +19,12 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 
+#include "utils.h"
 #include "cmdl.h"
 #include "msg.h"
 #include "bearer.h"
@@ -68,7 +70,7 @@ static void cmd_bearer_enable_l2_help(struct cmdl *cmdl, char 
*media)
 static void cmd_bearer_enable_udp_help(struct cmdl *cmdl, char *media)
 {
fprintf(stderr,
-   "Usage: %s bearer enable [OPTIONS] media %s name NAME localip 
IP [UDP OPTIONS]\n\n"
+   "Usage: %s bearer enable [OPTIONS] media %s name NAME [localip 
IP|device DEVICE] [UDP OPTIONS]\n\n"
"OPTIONS\n"
" domain DOMAIN - Discovery domain\n"
" priority PRIORITY - Bearer priority\n\n"
@@ -119,6 +121,71 @@ static int generate_multicast(short af, char *buf, int 
bufsize)
return 0;
 }
 
+static struct ifreq ifr;
+static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg)
+{
+   struct ifaddrmsg *ifa = NLMSG_DATA(nlh);
+   char *r_addr = (char *)arg;
+   int len = nlh->nlmsg_len;
+   struct rtattr *addr_attr;
+
+   if (ifr.ifr_ifindex != ifa->ifa_index)
+   return 0;
+
+   if (strlen(r_addr) > 0)
+   return 1;
+
+   addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa),
+len - NLMSG_LENGTH(sizeof(*ifa)));
+   if (!addr_attr)
+   return 0;
+
+   if (ifa->ifa_family == AF_INET) {
+   struct sockaddr_in ip4addr;
+   memcpy(_addr, RTA_DATA(addr_attr),
+  sizeof(struct in_addr));
+   if (inet_ntop(AF_INET, _addr, r_addr,
+ INET_ADDRSTRLEN) == NULL)
+   return 0;
+   } else if (ifa->ifa_family == AF_INET6) {
+   struct sockaddr_in6 ip6addr;
+   memcpy(_addr, RTA_DATA(addr_attr),
+  sizeof(struct in6_addr));
+   if (inet_ntop(AF_INET6, _addr, r_addr,
+ INET6_ADDRSTRLEN) == NULL)
+   return 0;
+   }
+   return 1;
+}
+
+static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr)
+{
+   struct rtnl_handle rth = { .fd = -1 };
+
+   memset(, 0, sizeof(ifr));
+   if (!name || !r_addr || get_ifname(ifr.ifr_name, name))
+   return 0;
+
+   ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name);
+   if (!ifr.ifr_ifindex)
+   return 0;
+
+   /* remove from cache */
+   ll_drop_by_index(ifr.ifr_ifindex);
+
+   if (rtnl_open(, 0) < 0)
+   return 0;
+
+   if (rtnl_addrdump_req(, AF_UNSPEC, 0) < 0)
+   return 0;
+
+   if (rtnl_dump_filter(, nl_dump_addr_filter, r_addr) < 0)
+   return 0;
+
+   rtnl_close();
+   return 1;
+}
+
 static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts,
  struct cmdl *cmdl)
 {
@@ -136,13 +203,25 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, 
struct opt *opts,
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_DGRAM
};
+   char addr[INET6_ADDRSTRLEN] = {0};
 
-   if (!(opt = get_opt(opts, "localip"))) {
-   fprintf(stderr, "error, udp bearer localip missing\n");
-   cmd_bearer_enable_udp_help(cmdl, "udp");
+   opt = get_opt(opts, "device");
+   if (opt && !cmd_bearer_validate_and_get_addr(opt->val, addr)) {
+   fprintf(stderr, "error, no device name available\n");
return -EINVAL;
}
-   locip = opt->val;
+
+   if (strlen(addr) > 0) {
+   locip = addr;
+   } else {
+   opt = get_opt(opts, "localip");
+   if (!opt) {
+   fprintf(stderr, "error,

[tipc-discussion] [net] tipc: missing entries in name table of publications

2019-04-09 Thread Hoang Le
When binding multiple services with specific type 1Ki, 2Ki..,
this leads to some entries in the name table of publications
missing when listed out via 'tipc name show'.

The problem is at identify zero last_type conditional provided
via netlink. The first is initial 'type' when starting name table
dummping. The second is continuously with zero type (node state
service type). Then, lookup function failure to finding node state
service type in next iteration.

To solve this, adding more conditional to marked as dirty type and
lookup correct service type for the next iteration instead of select
the first service as initial 'type' zero.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/name_table.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bff241f03525..89993afe0fbd 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct net *net, struct 
tipc_nl_msg *msg,
for (; i < TIPC_NAMETBL_SIZE; i++) {
head = >nametbl->services[i];
 
-   if (*last_type) {
+   if (*last_type ||
+   (!i && *last_key && (*last_lower == *last_key))) {
service = tipc_service_find(net, *last_type);
if (!service)
return -EPIPE;
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: add NULL pointer check

2019-04-03 Thread Hoang Le
skb somehow dequeued out of inputq before processing, it causes to
NULL pointer and kernel crashed.

Add checking skb valid before using.

Fixes: c55c8edafa9 ("tipc: smooth change between replicast and broadcast")
Reported-by: Tuong Lien Tong 
Acked-by: Ying Xue 
Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 76e14dc08bb9..6c997d4a6218 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -769,6 +769,9 @@ void tipc_mcast_filter_msg(struct net *net, struct 
sk_buff_head *defq,
u32 node, port;
 
skb = skb_peek(inputq);
+   if (!skb)
+   return;
+
hdr = buf_msg(skb);
 
if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq)))
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next] tipc: add NULL pointer check

2019-04-02 Thread Hoang Le
skb somehow dequeued out of inputq before processing, it causes to
NULL pointer and kernel crashed.

Add checking skb valid before using.

Fixes: c55c8edafa9 ("tipc: smooth change between replicast and broadcast")
Reported-by: Tuong Lien Tong 
Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 76e14dc08bb9..6c997d4a6218 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -769,6 +769,9 @@ void tipc_mcast_filter_msg(struct net *net, struct 
sk_buff_head *defq,
u32 node, port;
 
skb = skb_peek(inputq);
+   if (!skb)
+   return;
+
hdr = buf_msg(skb);
 
if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq)))
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next v2 1/3] tipc: add link broadcast set method and ratio

2019-03-22 Thread Hoang Le
The command added here makes it possible to forcibly configure the
broadcast link to use either broadcast or replicast, in addition to
the already existing auto selection algorithm.

A sample usage is shown below:
$tipc link set broadcast BROADCAST
$tipc link set broadcast AUTOSELECT ratio 25

$tipc link set broadcast -h
Usage: tipc link set broadcast PROPERTY

PROPERTIES
 BROADCAST - Forces all multicast traffic to be
 transmitted via broadcast only,
 irrespective of cluster size and number
 of destinations

 REPLICAST - Forces all multicast traffic to be
 transmitted via replicast only,
 irrespective of cluster size and number
 of destinations

 AUTOSELECT- Auto switching to broadcast or replicast
 depending on cluster size and destination
 node number

 ratio SIZE- Set the AUTOSELECT criteria, percentage of
 destination nodes vs cluster size

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 include/uapi/linux/tipc_netlink.h |  2 +
 tipc/link.c   | 96 ++-
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/tipc_netlink.h 
b/include/uapi/linux/tipc_netlink.h
index 0ebe02ef1a86..efb958fd167d 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -281,6 +281,8 @@ enum {
TIPC_NLA_PROP_TOL,  /* u32 */
TIPC_NLA_PROP_WIN,  /* u32 */
TIPC_NLA_PROP_MTU,  /* u32 */
+   TIPC_NLA_PROP_BROADCAST,/* u32 */
+   TIPC_NLA_PROP_BROADCAST_RATIO,  /* u32 */
 
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/tipc/link.c b/tipc/link.c
index 43e26da3fa6b..e3b10bb7b3d4 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -28,6 +28,9 @@
 #define PRIORITY_STR "priority"
 #define TOLERANCE_STR "tolerance"
 #define WINDOW_STR "window"
+#define BROADCAST_STR "broadcast"
+
+static const char tipc_bclink_name[] = "broadcast-link";
 
 static int link_list_cb(const struct nlmsghdr *nlh, void *data)
 {
@@ -521,7 +524,8 @@ static void cmd_link_set_help(struct cmdl *cmdl)
"PROPERTIES\n"
" tolerance TOLERANCE   - Set link tolerance\n"
" priority PRIORITY - Set link priority\n"
-   " window WINDOW - Set link window\n",
+   " window WINDOW - Set link window\n"
+   " broadcast BROADCAST   - Set link broadcast\n",
cmdl->argv[0]);
 }
 
@@ -585,6 +589,95 @@ static int cmd_link_set_prop(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return msg_doit(nlh, link_get_cb, );
 }
 
+static void cmd_link_set_bcast_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s link set broadcast PROPERTY\n\n"
+   "PROPERTIES\n"
+   " BROADCAST - Forces all multicast traffic to be\n"
+   " transmitted via broadcast only,\n"
+   " irrespective of cluster size and number\n"
+   " of destinations\n\n"
+   " REPLICAST - Forces all multicast traffic to be\n"
+   " transmitted via replicast only,\n"
+   " irrespective of cluster size and number\n"
+   " of destinations\n\n"
+   " AUTOSELECT- Auto switching to broadcast or 
replicast\n"
+   " depending on cluster size and 
destination\n"
+   " node number\n\n"
+   " ratio SIZE- Set the AUTOSELECT criteria, percentage 
of\n"
+   " destination nodes vs cluster size\n\n",
+   cmdl->argv[0]);
+}
+
+static int cmd_link_set_bcast(struct nlmsghdr *nlh, const struct cmd *cmd,
+struct cmdl *cmdl, void *data)
+{
+   char buf[MNL_SOCKET_BUFFER_SIZE];
+   struct nlattr *props;
+   struct nlattr *attrs;
+   struct opt *opt;
+   struct opt opts[] = {
+   { "BROADCAST",  OPT_KEY, NULL },
+   { "REPLICAST",  OPT_KEY, NULL },
+   { "AUTOSELECT", OPT_KEY, NULL },
+   { "ratio",  OPT_KEYVAL, NULL },
+   { NULL }
+   };
+   int method = 0;
+
+   if (help_flag) {
+   (cmd->help)(cmdl);
+   return -EINVAL;
+   }
+
+   

[tipc-discussion] [iproute2-next v2 2/3] tipc: add link broadcast get

2019-03-22 Thread Hoang Le
The command prints the actually method that multicast
is running in the system.
Also 'ratio' value for AUTOSELECT method.

A sample usage is shown below:
$tipc link get broadcast
BROADCAST

$tipc link get broadcast
AUTOSELECT ratio:30%

$tipc link get broadcast -j -p
[ {
"method": "AUTOSELECT"
},{
"ratio": 30
} ]

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 tipc/link.c | 85 -
 1 file changed, 84 insertions(+), 1 deletion(-)

diff --git a/tipc/link.c b/tipc/link.c
index e3b10bb7b3d4..e123c1863575 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -175,10 +175,92 @@ static void cmd_link_get_help(struct cmdl *cmdl)
"PROPERTIES\n"
" tolerance - Get link tolerance\n"
" priority  - Get link priority\n"
-   " window- Get link window\n",
+   " window- Get link window\n"
+   " broadcast - Get link broadcast\n",
cmdl->argv[0]);
 }
 
+static int cmd_link_get_bcast_cb(const struct nlmsghdr *nlh, void *data)
+{
+   int *prop = data;
+   int prop_ratio = TIPC_NLA_PROP_BROADCAST_RATIO;
+   struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+   struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+   struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1] = {};
+   struct nlattr *props[TIPC_NLA_PROP_MAX + 1] = {};
+   int bc_mode;
+
+   mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+   if (!info[TIPC_NLA_LINK])
+   return MNL_CB_ERROR;
+
+   mnl_attr_parse_nested(info[TIPC_NLA_LINK], parse_attrs, attrs);
+   if (!attrs[TIPC_NLA_LINK_PROP])
+   return MNL_CB_ERROR;
+
+   mnl_attr_parse_nested(attrs[TIPC_NLA_LINK_PROP], parse_attrs, props);
+   if (!props[*prop])
+   return MNL_CB_ERROR;
+
+   bc_mode = mnl_attr_get_u32(props[*prop]);
+
+   new_json_obj(json);
+   open_json_object(NULL);
+   switch (bc_mode) {
+   case 0x1:
+   print_string(PRINT_ANY, "method", "%s\n", "BROADCAST");
+   break;
+   case 0x2:
+   print_string(PRINT_ANY, "method", "%s\n", "REPLICAST");
+   break;
+   case 0x4:
+   print_string(PRINT_ANY, "method", "%s", "AUTOSELECT");
+   close_json_object();
+   open_json_object(NULL);
+   print_uint(PRINT_ANY, "ratio", " ratio:%u%\n",
+  mnl_attr_get_u32(props[prop_ratio]));
+   break;
+   default:
+   print_string(PRINT_ANY, NULL, "UNKNOWN\n", NULL);
+   break;
+   }
+   close_json_object();
+   delete_json_obj();
+   return MNL_CB_OK;
+}
+
+static void cmd_link_get_bcast_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s link get PPROPERTY\n\n"
+   "PROPERTIES\n"
+   " broadcast - Get link broadcast\n",
+   cmdl->argv[0]);
+}
+
+static int cmd_link_get_bcast(struct nlmsghdr *nlh, const struct cmd *cmd,
+struct cmdl *cmdl, void *data)
+{
+   int prop = TIPC_NLA_PROP_BROADCAST;
+   char buf[MNL_SOCKET_BUFFER_SIZE];
+   struct nlattr *attrs;
+
+   if (help_flag) {
+   (cmd->help)(cmdl);
+   return -EINVAL;
+   }
+
+   nlh = msg_init(buf, TIPC_NL_LINK_GET);
+   if (!nlh) {
+   fprintf(stderr, "error, message initialisation failed\n");
+   return -1;
+   }
+   attrs = mnl_attr_nest_start(nlh, TIPC_NLA_LINK);
+   /* Direct to broadcast-link setting */
+   mnl_attr_put_strz(nlh, TIPC_NLA_LINK_NAME, tipc_bclink_name);
+   mnl_attr_nest_end(nlh, attrs);
+   return msg_doit(nlh, cmd_link_get_bcast_cb, );
+}
+
 static int cmd_link_get(struct nlmsghdr *nlh, const struct cmd *cmd,
struct cmdl *cmdl, void *data)
 {
@@ -186,6 +268,7 @@ static int cmd_link_get(struct nlmsghdr *nlh, const struct 
cmd *cmd,
{ PRIORITY_STR, cmd_link_get_prop,  cmd_link_get_help },
{ TOLERANCE_STR,cmd_link_get_prop,  
cmd_link_get_help },
{ WINDOW_STR,   cmd_link_get_prop,  cmd_link_get_help },
+   { BROADCAST_STR, cmd_link_get_bcast, cmd_link_get_bcast_help },
{ NULL }
};
 
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next v2 3/3] tipc: add link broadcast man page

2019-03-22 Thread Hoang Le
Add a man page describing tipc link broadcast command get and set

Signed-off-by: Hoang Le 
---
 man/man8/tipc-link.8 | 53 +++-
 1 file changed, 48 insertions(+), 5 deletions(-)

diff --git a/man/man8/tipc-link.8 b/man/man8/tipc-link.8
index 01afa1c3ad9f..47dae25d3626 100644
--- a/man/man8/tipc-link.8
+++ b/man/man8/tipc-link.8
@@ -1,4 +1,4 @@
-.TH TIPC-LINK 8 "02 Jun 2015" "iproute2" "Linux"
+.TH TIPC-LINK 8 "22 Mar 2019" "iproute2" "Linux"
 
 .\" For consistency, please keep padding right aligned.
 .\" For example '.B "foo " bar' and not '.B foo " bar"'
@@ -14,18 +14,36 @@ tipc-link \- show links or modify link properties
 
 .ti -8
 .B tipc link set
-.RB "{ " "priority "
+.br
+.RB "[ " "{ " "priority "
 .IR PRIORITY
 .RB "| " tolerance
 .IR TOLERANCE
 .RB "| " window
 .IR "WINDOW " }
-.BI "link " LINK
+.BI "link " LINK " ]"
+.RB "|"
+.br
+.RB "[ "
+.RB "{ " broadcast " [ "
+.IR BROADCAST
+.RB " | "
+.IR REPLICAST
+.RB " | "
+.IR AUTOSELECT
+.RB "[ " ratio
+.IR SIZE
+.RB "] " ] " } " "]"
 
 .ti -8
 .B tipc link get
-.RB "{ " "priority" " | " tolerance " | " window " } " link
-.I LINK
+.br
+.RB "[ " "{ " "priority" " | " tolerance " | " window " } " link
+.IR LINK " ] "
+.RB "|"
+.br
+.RB "[ " { " broadcast " } " ]"
+.br
 
 .ti -8
 .B tipc link statistics
@@ -306,6 +324,31 @@ They are usually transient and occur during the cluster 
startup phase
 or network reconfiguration.
 Possible status are: U or D. The status U implies up and D down.
 
+.SS Broadcast properties
+.TP
+.B  BROADCAST
+.br
+Forces all multicast traffic to be transmitted via broadcast only,
+irrespective of cluster size and number of destinations.
+
+.TP
+.B REPLICAST
+.br
+Forces all multicast traffic to be transmitted via replicast only,
+irrespective of cluster size and number of destinations.
+
+.TP
+.B AUTOSELECT
+.br
+Auto switching to broadcast or replicast depending on cluster size and
+destination node number.
+
+.TP
+.B ratio SIZE
+.br
+Set the AUTOSELECT criteria, percentage of destination nodes vs cluster
+size.
+
 .SH EXAMPLES
 .PP
 tipc link monitor list
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next v1 2/2] tipc: fix a null pointer deref

2019-03-21 Thread Hoang Le
In commit c55c8edafa91 ("tipc: smooth change between replicast and
broadcast") we introduced new method to eliminate the risk of message
reordering that happen in between different nodes.
Unfortunately, we forgot checking at receiving side to ignore intra node.

We fix this by checking and returning if arrived message from intra node.

syzbot report:
==
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] PREEMPT SMP KASAN
CPU: 0 PID: 7820 Comm: syz-executor418 Not tainted 5.0.0+ #61
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
RIP: 0010:tipc_mcast_filter_msg+0x21b/0x13d0 net/tipc/bcast.c:782
Code: 45 c0 0f 84 39 06 00 00 48 89 5d 98 e8 ce ab a5 fa 49 8d bc
 24 c8 00 00 00 48 b9 00 00 00 00 00 fc ff df 48 89 f8 48 c1 e8 03
 <80> 3c 08 00 0f 85 9a 0e 00 00 49 8b 9c 24 c8 00 00 00 48 be 00 00
RSP: 0018:8880959defc8 EFLAGS: 00010202
RAX: 0019 RBX: 888081258a48 RCX: dc00
RDX:  RSI: 86cab862 RDI: 00c8
RBP: 8880959df030 R08: 8880813d0200 R09: ed1015d05bc8
R10: ed1015d05bc7 R11: 8880ae82de3b R12: 
R13: 002c R14:  R15: 888081258a48
FS:  0106a880() GS:8880ae80()
 knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 20001cc0 CR3: 94a2 CR4: 001406f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 tipc_sk_filter_rcv+0x182d/0x34f0 net/tipc/socket.c:2168
 tipc_sk_enqueue net/tipc/socket.c:2254 [inline]
 tipc_sk_rcv+0xc45/0x25a0 net/tipc/socket.c:2305
 tipc_sk_mcast_rcv+0x724/0x1020 net/tipc/socket.c:1209
 tipc_mcast_xmit+0x7fe/0x1200 net/tipc/bcast.c:410
 tipc_sendmcast+0xb36/0xfc0 net/tipc/socket.c:820
 __tipc_sendmsg+0x10df/0x18d0 net/tipc/socket.c:1358
 tipc_sendmsg+0x53/0x80 net/tipc/socket.c:1291
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:661
 ___sys_sendmsg+0x806/0x930 net/socket.c:2260
 __sys_sendmsg+0x105/0x1d0 net/socket.c:2298
 __do_sys_sendmsg net/socket.c:2307 [inline]
 __se_sys_sendmsg net/socket.c:2305 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2305
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4401c9
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8
 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05
 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:7ffd887fa9d8 EFLAGS: 0246 ORIG_RAX: 002e
RAX: ffda RBX: 004002c8 RCX: 004401c9
RDX:  RSI: 20002140 RDI: 0003
RBP: 006ca018 R08:  R09: 004002c8
R10:  R11: 0246 R12: 00401a50
R13: 00401ae0 R14:  R15: 
Modules linked in:
---[ end trace ba79875754e1708f ]---

Reported-by: syzbot+be4bdf2cc3e85e952...@syzkaller.appspotmail.com
Fixes: c55c8eda ("tipc: smooth change between replicast and broadcast")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c  | 5 -
 net/tipc/bcast.h  | 2 +-
 net/tipc/socket.c | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 5264a8ff6e01..88edfb358ae7 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -760,7 +760,7 @@ u32 tipc_bcast_get_broadcast_ratio(struct net *net)
return bb->rc_ratio;
 }
 
-void tipc_mcast_filter_msg(struct sk_buff_head *defq,
+void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
   struct sk_buff_head *inputq)
 {
struct sk_buff *skb, *_skb, *tmp;
@@ -775,6 +775,9 @@ void tipc_mcast_filter_msg(struct sk_buff_head *defq,
return;
 
node = msg_orignode(hdr);
+   if (node == tipc_own_addr(net))
+   return;
+
port = msg_origport(hdr);
 
/* Has the twin SYN message already arrived ? */
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 484bde289d3a..dadad953e2be 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -101,7 +101,7 @@ int tipc_bclink_reset_stats(struct net *net);
 u32 tipc_bcast_get_broadcast_mode(struct net *net);
 u32 tipc_bcast_get_broadcast_ratio(struct net *net);
 
-void tipc_mcast_filter_msg(struct sk_buff_head *defq,
+void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
   struct sk_buff_head *inputq);
 
 static inline void tipc_bcast_lock(struct net *net)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a7b3e1a070e4..8ac8ddf1e324 100644
--- a/net/tipc/socket.c

[tipc-discussion] [net-next v1 1/2] tipc: fix use-after-free in tipc_sk_filter_rcv

2019-03-21 Thread Hoang Le
skb free-ed in:
  1/ condition 1: tipc_sk_filter_rcv -> tipc_sk_proto_rcv
  2/ condition 2: tipc_sk_filter_rcv -> tipc_group_filter_msg
This leads to a "use-after-free" access in the next condition.

We fix this by intializing the variable at declaration, then it is safe
to check this variable to continue processing if condition matches.

syzbot report:
==
BUG: KASAN: use-after-free in tipc_sk_filter_rcv+0x2166/0x34f0
 net/tipc/socket.c:2167
Read of size 4 at addr 88808ea58534 by task kworker/u4:0/7

CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.0.0+ #61
Hardware name: Google Google Compute Engine/Google Compute Engine,
 BIOS Google 01/01/2011
Workqueue: tipc_send tipc_conn_send_work
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131
 tipc_sk_filter_rcv+0x2166/0x34f0 net/tipc/socket.c:2167
 tipc_sk_enqueue net/tipc/socket.c:2254 [inline]
 tipc_sk_rcv+0xc45/0x25a0 net/tipc/socket.c:2305
 tipc_topsrv_kern_evt+0x3b7/0x580 net/tipc/topsrv.c:610
 tipc_conn_send_to_sock+0x43e/0x5f0 net/tipc/topsrv.c:283
 tipc_conn_send_work+0x65/0x80 net/tipc/topsrv.c:303
 process_one_work+0x98e/0x1790 kernel/workqueue.c:2269
 worker_thread+0x98/0xe40 kernel/workqueue.c:2415
 kthread+0x357/0x430 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

Reported-by: syzbot+e863893591cc7a622...@syzkaller.appspotmail.com
Fixes: c55c8eda ("tipc: smooth change between replicast and broadcast")
Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 922b75ff56d3..a7b3e1a070e4 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2151,6 +2151,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct 
sk_buff *skb,
struct tipc_msg *hdr = buf_msg(skb);
struct net *net = sock_net(sk);
struct sk_buff_head inputq;
+   int mtyp = msg_type(hdr);
int limit, err = TIPC_OK;
 
trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " ");
@@ -2164,7 +2165,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct 
sk_buff *skb,
if (unlikely(grp))
tipc_group_filter_msg(grp, , xmitq);
 
-   if (msg_type(hdr) == TIPC_MCAST_MSG)
+   if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG)
tipc_mcast_filter_msg(>mc_method.deferredq, );
 
/* Validate and add to receive buffer if there is space */
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH 2/2] tipc: fix a null pointer deref

2019-03-21 Thread Hoang Le
In commit c55c8edafa91 ("tipc: smooth change between replicast and
broadcast") we introduced new method to eliminate the risk of message
reordering that happen in between different nodes.
Unfortunately, we forgot checking at receiving side to ignore intra node.

We fix this by checking and returning if arrived message from intra node.

syzbot report:
==
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] PREEMPT SMP KASAN
CPU: 0 PID: 7820 Comm: syz-executor418 Not tainted 5.0.0+ #61
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
RIP: 0010:tipc_mcast_filter_msg+0x21b/0x13d0 net/tipc/bcast.c:782
Code: 45 c0 0f 84 39 06 00 00 48 89 5d 98 e8 ce ab a5 fa 49 8d bc
 24 c8 00 00 00 48 b9 00 00 00 00 00 fc ff df 48 89 f8 48 c1 e8 03
 <80> 3c 08 00 0f 85 9a 0e 00 00 49 8b 9c 24 c8 00 00 00 48 be 00 00
RSP: 0018:8880959defc8 EFLAGS: 00010202
RAX: 0019 RBX: 888081258a48 RCX: dc00
RDX:  RSI: 86cab862 RDI: 00c8
RBP: 8880959df030 R08: 8880813d0200 R09: ed1015d05bc8
R10: ed1015d05bc7 R11: 8880ae82de3b R12: 
R13: 002c R14:  R15: 888081258a48
FS:  0106a880() GS:8880ae80()
 knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 20001cc0 CR3: 94a2 CR4: 001406f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 tipc_sk_filter_rcv+0x182d/0x34f0 net/tipc/socket.c:2168
 tipc_sk_enqueue net/tipc/socket.c:2254 [inline]
 tipc_sk_rcv+0xc45/0x25a0 net/tipc/socket.c:2305
 tipc_sk_mcast_rcv+0x724/0x1020 net/tipc/socket.c:1209
 tipc_mcast_xmit+0x7fe/0x1200 net/tipc/bcast.c:410
 tipc_sendmcast+0xb36/0xfc0 net/tipc/socket.c:820
 __tipc_sendmsg+0x10df/0x18d0 net/tipc/socket.c:1358
 tipc_sendmsg+0x53/0x80 net/tipc/socket.c:1291
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:661
 ___sys_sendmsg+0x806/0x930 net/socket.c:2260
 __sys_sendmsg+0x105/0x1d0 net/socket.c:2298
 __do_sys_sendmsg net/socket.c:2307 [inline]
 __se_sys_sendmsg net/socket.c:2305 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2305
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4401c9
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8
 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05
 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:7ffd887fa9d8 EFLAGS: 0246 ORIG_RAX: 002e
RAX: ffda RBX: 004002c8 RCX: 004401c9
RDX:  RSI: 20002140 RDI: 0003
RBP: 006ca018 R08:  R09: 004002c8
R10:  R11: 0246 R12: 00401a50
R13: 00401ae0 R14:  R15: 
Modules linked in:
---[ end trace ba79875754e1708f ]---

Reported-by: syzbot+be4bdf2cc3e85e952...@syzkaller.appspotmail.com
Fixes: c55c8eda ("tipc: smooth change between replicast and broadcast")
Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c  | 6 +-
 net/tipc/bcast.h  | 2 +-
 net/tipc/socket.c | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 5264a8ff6e01..b3e6b4892425 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -760,10 +760,11 @@ u32 tipc_bcast_get_broadcast_ratio(struct net *net)
return bb->rc_ratio;
 }
 
-void tipc_mcast_filter_msg(struct sk_buff_head *defq,
+void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
   struct sk_buff_head *inputq)
 {
struct sk_buff *skb, *_skb, *tmp;
+   u32 self = tipc_own_addr(net);
struct tipc_msg *hdr, *_hdr;
bool match = false;
u32 node, port;
@@ -775,6 +776,9 @@ void tipc_mcast_filter_msg(struct sk_buff_head *defq,
return;
 
node = msg_orignode(hdr);
+   if (node == self)
+   return;
+
port = msg_origport(hdr);
 
/* Has the twin SYN message already arrived ? */
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 484bde289d3a..dadad953e2be 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -101,7 +101,7 @@ int tipc_bclink_reset_stats(struct net *net);
 u32 tipc_bcast_get_broadcast_mode(struct net *net);
 u32 tipc_bcast_get_broadcast_ratio(struct net *net);
 
-void tipc_mcast_filter_msg(struct sk_buff_head *defq,
+void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
   struct sk_buff_head *inputq);
 
 static inline void tipc_bcast_lock(struct net *net)
diff --git a/net/tipc

[tipc-discussion] [PATCH 1/2] tipc: fix use-after-free tipc_sk_filter_rcv

2019-03-21 Thread Hoang Le
skb free-ed in:
  1/ condition 1: tipc_sk_filter_rcv -> tipc_sk_proto_rcv
  2/ condition 2: tipc_sk_filter_rcv -> tipc_group_filter_msg
This leads to a "use-after-free" access in the next condition.

We fix this by intializing the variable at declaration, then it is safe
to check this variable to continue processing if condition matches.

syzbot report:
==
BUG: KASAN: use-after-free in tipc_sk_filter_rcv+0x2166/0x34f0
 net/tipc/socket.c:2167
Read of size 4 at addr 88808ea58534 by task kworker/u4:0/7

CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.0.0+ #61
Hardware name: Google Google Compute Engine/Google Compute Engine,
 BIOS Google 01/01/2011
Workqueue: tipc_send tipc_conn_send_work
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131
 tipc_sk_filter_rcv+0x2166/0x34f0 net/tipc/socket.c:2167
 tipc_sk_enqueue net/tipc/socket.c:2254 [inline]
 tipc_sk_rcv+0xc45/0x25a0 net/tipc/socket.c:2305
 tipc_topsrv_kern_evt+0x3b7/0x580 net/tipc/topsrv.c:610
 tipc_conn_send_to_sock+0x43e/0x5f0 net/tipc/topsrv.c:283
 tipc_conn_send_work+0x65/0x80 net/tipc/topsrv.c:303
 process_one_work+0x98e/0x1790 kernel/workqueue.c:2269
 worker_thread+0x98/0xe40 kernel/workqueue.c:2415
 kthread+0x357/0x430 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

Reported-by: syzbot+e863893591cc7a622...@syzkaller.appspotmail.com
Fixes: c55c8eda ("tipc: smooth change between replicast and broadcast")
Signed-off-by: Hoang Le 
---
 net/tipc/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 922b75ff56d3..a7b3e1a070e4 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2151,6 +2151,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct 
sk_buff *skb,
struct tipc_msg *hdr = buf_msg(skb);
struct net *net = sock_net(sk);
struct sk_buff_head inputq;
+   int mtyp = msg_type(hdr);
int limit, err = TIPC_OK;
 
trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " ");
@@ -2164,7 +2165,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct 
sk_buff *skb,
if (unlikely(grp))
tipc_group_filter_msg(grp, , xmitq);
 
-   if (msg_type(hdr) == TIPC_MCAST_MSG)
+   if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG)
tipc_mcast_filter_msg(>mc_method.deferredq, );
 
/* Validate and add to receive buffer if there is space */
-- 
2.1.4



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next v3 1/3] tipc: support broadcast/replicast configurable for bc-link

2019-03-19 Thread Hoang Le
Currently, a multicast stream uses either broadcast or replicast as
transmission method, based on the ratio between number of actual
destinations nodes and cluster size.

However, when an L2 interface (e.g., VXLAN) provides pseudo
broadcast support, this becomes very inefficient, as it blindly
replicates multicast packets to all cluster/subnet nodes,
irrespective of whether they host actual target sockets or not.

The TIPC multicast algorithm is able to distinguish real destination
nodes from other nodes, and hence provides a smarter and more
efficient method for transferring multicast messages than
pseudo broadcast can do.

Because of this, we now make it possible for users to force
the broadcast link to permanently switch to using replicast,
irrespective of which capabilities the bearer provides,
or pretend to provide.
Conversely, we also make it possible to force the broadcast link
to always use true broadcast. While maybe less useful in
deployed systems, this may at least be useful for testing the
broadcast algorithm in small clusters.

We retain the current AUTOSELECT ability, i.e., to let the broadcast link
automatically select which algorithm to use, and to switch back and forth
between broadcast and replicast as the ratio between destination
node number and cluster size changes. This remains the default method.

Furthermore, we make it possible to configure the threshold ratio for
such switches. The default ratio is now set to 10%, down from 25% in the
earlier implementation.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 include/uapi/linux/tipc_netlink.h |   2 +
 net/tipc/bcast.c  | 104 --
 net/tipc/bcast.h  |   7 ++
 net/tipc/link.c   |   8 +++
 net/tipc/netlink.c|   4 +-
 5 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/tipc_netlink.h 
b/include/uapi/linux/tipc_netlink.h
index 0ebe02ef1a86..efb958fd167d 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -281,6 +281,8 @@ enum {
TIPC_NLA_PROP_TOL,  /* u32 */
TIPC_NLA_PROP_WIN,  /* u32 */
TIPC_NLA_PROP_MTU,  /* u32 */
+   TIPC_NLA_PROP_BROADCAST,/* u32 */
+   TIPC_NLA_PROP_BROADCAST_RATIO,  /* u32 */
 
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d8026543bf4c..12b59268bdd6 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link";
  * @dests: array keeping number of reachable destinations per bearer
  * @primary_bearer: a bearer having links to all broadcast destinations, if any
  * @bcast_support: indicates if primary bearer, if any, supports broadcast
+ * @force_bcast: forces broadcast for multicast traffic
  * @rcast_support: indicates if all peer nodes support replicast
+ * @force_rcast: forces replicast for multicast traffic
  * @rc_ratio: dest count as percentage of cluster size where send method 
changes
  * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast
  */
@@ -64,7 +66,9 @@ struct tipc_bc_base {
int dests[MAX_BEARERS];
int primary_bearer;
bool bcast_support;
+   bool force_bcast;
bool rcast_support;
+   bool force_rcast;
int rc_ratio;
int bc_threshold;
 };
@@ -485,10 +489,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, 
u32 limit)
return 0;
 }
 
+static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode)
+{
+   struct tipc_bc_base *bb = tipc_bc_base(net);
+
+   switch (bc_mode) {
+   case BCLINK_MODE_BCAST:
+   if (!bb->bcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = true;
+   bb->force_rcast = false;
+   break;
+   case BCLINK_MODE_RCAST:
+   if (!bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = false;
+   bb->force_rcast = true;
+   break;
+   case BCLINK_MODE_SEL:
+   if (!bb->bcast_support || !bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = false;
+   bb->force_rcast = false;
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio)
+{
+   struct tipc_bc_base *bb = tipc_bc_base(net);
+
+   if (!bb->bcast_support || !bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   if (bc_ratio > 100 || bc_ratio <= 0)
+   return -EINVAL;
+
+   bb->rc_ratio = bc_ratio;
+   tipc_bcast_lock(net);
+   tipc_bcbase_calc_bc_threshold(net);
+  

[tipc-discussion] [net-next v3 3/3] tipc: smooth change between replicast and broadcast

2019-03-19 Thread Hoang Le
Currently, a multicast stream may start out using replicast, because
there are few destinations, and then it should ideally switch to
L2/broadcast IGMP/multicast when the number of destinations grows beyond
a certain limit. The opposite should happen when the number decreases
below the limit.

To eliminate the risk of message reordering caused by method change,
a sending socket must stick to a previously selected method until it
enters an idle period of 5 seconds. Means there is a 5 seconds pause
in the traffic from the sender socket.

If the sender never makes such a pause, the method will never change,
and transmission may become very inefficient as the cluster grows.

With this commit, we allow such a switch between replicast and
broadcast without any need for a traffic pause.

Solution is to send a dummy message with only the header, also with
the SYN bit set, via broadcast or replicast. For the data message,
the SYN bit is set and sending via replicast or broadcast (inverse
method with dummy).

Then, at receiving side any messages follow first SYN bit message
(data or dummy message), they will be held in deferred queue until
another pair (dummy or data message) arrived in other link.

v2: reverse christmas tree declaration

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c  | 165 +-
 net/tipc/bcast.h  |   5 ++
 net/tipc/msg.h|  10 +++
 net/tipc/socket.c |   5 ++
 4 files changed, 184 insertions(+), 1 deletion(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 12b59268bdd6..5264a8ff6e01 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -220,9 +220,24 @@ static void tipc_bcast_select_xmit_method(struct net *net, 
int dests,
}
/* Can current method be changed ? */
method->expires = jiffies + TIPC_METHOD_EXPIRE;
-   if (method->mandatory || time_before(jiffies, exp))
+   if (method->mandatory)
return;
 
+   if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) &&
+   time_before(jiffies, exp))
+   return;
+
+   /* Configuration as force 'broadcast' method */
+   if (bb->force_bcast) {
+   method->rcast = false;
+   return;
+   }
+   /* Configuration as force 'replicast' method */
+   if (bb->force_rcast) {
+   method->rcast = true;
+   return;
+   }
+   /* Configuration as 'autoselect' or default method */
/* Determine method to use now */
method->rcast = dests <= bb->bc_threshold;
 }
@@ -285,6 +300,63 @@ static int tipc_rcast_xmit(struct net *net, struct 
sk_buff_head *pkts,
return 0;
 }
 
+/* tipc_mcast_send_sync - deliver a dummy message with SYN bit
+ * @net: the applicable net namespace
+ * @skb: socket buffer to copy
+ * @method: send method to be used
+ * @dests: destination nodes for message.
+ * @cong_link_cnt: returns number of encountered congested destination links
+ * Returns 0 if success, otherwise errno
+ */
+static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
+   struct tipc_mc_method *method,
+   struct tipc_nlist *dests,
+   u16 *cong_link_cnt)
+{
+   struct tipc_msg *hdr, *_hdr;
+   struct sk_buff_head tmpq;
+   struct sk_buff *_skb;
+
+   /* Is a cluster supporting with new capabilities ? */
+   if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL))
+   return 0;
+
+   hdr = buf_msg(skb);
+   if (msg_user(hdr) == MSG_FRAGMENTER)
+   hdr = msg_get_wrapped(hdr);
+   if (msg_type(hdr) != TIPC_MCAST_MSG)
+   return 0;
+
+   /* Allocate dummy message */
+   _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL);
+   if (!skb)
+   return -ENOMEM;
+
+   /* Preparing for 'synching' header */
+   msg_set_syn(hdr, 1);
+
+   /* Copy skb's header into a dummy header */
+   skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE);
+   skb_orphan(_skb);
+
+   /* Reverse method for dummy message */
+   _hdr = buf_msg(_skb);
+   msg_set_size(_hdr, MCAST_H_SIZE);
+   msg_set_is_rcast(_hdr, !msg_is_rcast(hdr));
+
+   skb_queue_head_init();
+   __skb_queue_tail(, _skb);
+   if (method->rcast)
+   tipc_bcast_xmit(net, , cong_link_cnt);
+   else
+   tipc_rcast_xmit(net, , dests, cong_link_cnt);
+
+   /* This queue should normally be empty by now */
+   __skb_queue_purge();
+
+   return 0;
+}
+
 /* tipc_mcast_xmit - deliver message to indicated destination nodes
  *   and to identified node local sockets
  * @net: the applicable net namespace
@@ -300,6 +372,9 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head 
*pkts,
u16 *cong_link_cnt)
 {
struct sk_buff_head inputq, localq;
+   b

[tipc-discussion] [net-next v3 2/3] tipc: introduce new capability flag for cluster

2019-03-19 Thread Hoang Le
As a preparation for introducing a smooth switching between replicast
and broadcast method for multicast message, We have to introduce a new
capability flag TIPC_MCAST_RBCTL to handle this new feature.

During a cluster upgrade a node can come back with this new capabilities
which also must be reflected in the cluster capabilities field.
The new feature is only applicable if all node in the cluster supports
this new capability.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c |  2 ++
 net/tipc/core.h |  3 +++
 net/tipc/node.c | 18 ++
 net/tipc/node.h |  6 --
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5b38f5164281..27cccd101ef6 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -43,6 +43,7 @@
 #include "net.h"
 #include "socket.h"
 #include "bcast.h"
+#include "node.h"
 
 #include 
 
@@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net)
tn->node_addr = 0;
tn->trial_addr = 0;
tn->addr_trial_end = 0;
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
memset(tn->node_id, 0, sizeof(tn->node_id));
memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 8020a6c360ff..7a68e1b6a066 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -122,6 +122,9 @@ struct tipc_net {
/* Topology subscription server */
struct tipc_topsrv *topsrv;
atomic_t subscription_count;
+
+   /* Cluster capabilities */
+   u16 capabilities;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2dc4919ab23c..2717893e9dbe 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -383,6 +383,11 @@ static struct tipc_node *tipc_node_create(struct net *net, 
u32 addr,
tipc_link_update_caps(l, capabilities);
}
write_unlock_bh(>lock);
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -433,6 +438,11 @@ static struct tipc_node *tipc_node_create(struct net *net, 
u32 addr,
break;
}
list_add_tail_rcu(>list, _node->list);
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
trace_tipc_node_create(n, true, " ");
 exit:
spin_unlock_bh(>node_list_lock);
@@ -589,6 +599,7 @@ static void  tipc_node_clear_links(struct tipc_node *node)
  */
 static bool tipc_node_cleanup(struct tipc_node *peer)
 {
+   struct tipc_node *temp_node;
struct tipc_net *tn = tipc_net(peer->net);
bool deleted = false;
 
@@ -604,6 +615,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
deleted = true;
}
tipc_node_write_unlock(peer);
+
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
+
spin_unlock_bh(>node_list_lock);
return deleted;
 }
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 4f59a30e989a..2404225c5d58 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -51,7 +51,8 @@ enum {
TIPC_BLOCK_FLOWCTL= (1 << 3),
TIPC_BCAST_RCAST  = (1 << 4),
TIPC_NODE_ID128   = (1 << 5),
-   TIPC_LINK_PROTO_SEQNO = (1 << 6)
+   TIPC_LINK_PROTO_SEQNO = (1 << 6),
+   TIPC_MCAST_RBCTL  = (1 << 7)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT   |  \
@@ -60,7 +61,8 @@ enum {
TIPC_BCAST_RCAST   |   \
TIPC_BLOCK_FLOWCTL |   \
TIPC_NODE_ID128|   \
-   TIPC_LINK_PROTO_SEQNO)
+   TIPC_LINK_PROTO_SEQNO  |   \
+   TIPC_MCAST_RBCTL)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next 2/2] tipc: add link broadcast get

2019-03-19 Thread Hoang Le
The command prints the actually method that multicast
is running in the system.
Also 'ratio' value for AUTOSELECT method.

A sample usage is shown below:
$tipc link get broadcast
BROADCAST

$tipc link get broadcast
AUTOSELECT ratio:30%

$tipc link get broadcast -j -p
[ {
"method": "AUTOSELECT"
},{
"ratio": 30
} ]

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 tipc/link.c | 85 -
 1 file changed, 84 insertions(+), 1 deletion(-)

diff --git a/tipc/link.c b/tipc/link.c
index e3b10bb7b3d4..e123c1863575 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -175,10 +175,92 @@ static void cmd_link_get_help(struct cmdl *cmdl)
"PROPERTIES\n"
" tolerance - Get link tolerance\n"
" priority  - Get link priority\n"
-   " window- Get link window\n",
+   " window- Get link window\n"
+   " broadcast - Get link broadcast\n",
cmdl->argv[0]);
 }
 
+static int cmd_link_get_bcast_cb(const struct nlmsghdr *nlh, void *data)
+{
+   int *prop = data;
+   int prop_ratio = TIPC_NLA_PROP_BROADCAST_RATIO;
+   struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+   struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+   struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1] = {};
+   struct nlattr *props[TIPC_NLA_PROP_MAX + 1] = {};
+   int bc_mode;
+
+   mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+   if (!info[TIPC_NLA_LINK])
+   return MNL_CB_ERROR;
+
+   mnl_attr_parse_nested(info[TIPC_NLA_LINK], parse_attrs, attrs);
+   if (!attrs[TIPC_NLA_LINK_PROP])
+   return MNL_CB_ERROR;
+
+   mnl_attr_parse_nested(attrs[TIPC_NLA_LINK_PROP], parse_attrs, props);
+   if (!props[*prop])
+   return MNL_CB_ERROR;
+
+   bc_mode = mnl_attr_get_u32(props[*prop]);
+
+   new_json_obj(json);
+   open_json_object(NULL);
+   switch (bc_mode) {
+   case 0x1:
+   print_string(PRINT_ANY, "method", "%s\n", "BROADCAST");
+   break;
+   case 0x2:
+   print_string(PRINT_ANY, "method", "%s\n", "REPLICAST");
+   break;
+   case 0x4:
+   print_string(PRINT_ANY, "method", "%s", "AUTOSELECT");
+   close_json_object();
+   open_json_object(NULL);
+   print_uint(PRINT_ANY, "ratio", " ratio:%u%\n",
+  mnl_attr_get_u32(props[prop_ratio]));
+   break;
+   default:
+   print_string(PRINT_ANY, NULL, "UNKNOWN\n", NULL);
+   break;
+   }
+   close_json_object();
+   delete_json_obj();
+   return MNL_CB_OK;
+}
+
+static void cmd_link_get_bcast_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s link get PPROPERTY\n\n"
+   "PROPERTIES\n"
+   " broadcast - Get link broadcast\n",
+   cmdl->argv[0]);
+}
+
+static int cmd_link_get_bcast(struct nlmsghdr *nlh, const struct cmd *cmd,
+struct cmdl *cmdl, void *data)
+{
+   int prop = TIPC_NLA_PROP_BROADCAST;
+   char buf[MNL_SOCKET_BUFFER_SIZE];
+   struct nlattr *attrs;
+
+   if (help_flag) {
+   (cmd->help)(cmdl);
+   return -EINVAL;
+   }
+
+   nlh = msg_init(buf, TIPC_NL_LINK_GET);
+   if (!nlh) {
+   fprintf(stderr, "error, message initialisation failed\n");
+   return -1;
+   }
+   attrs = mnl_attr_nest_start(nlh, TIPC_NLA_LINK);
+   /* Direct to broadcast-link setting */
+   mnl_attr_put_strz(nlh, TIPC_NLA_LINK_NAME, tipc_bclink_name);
+   mnl_attr_nest_end(nlh, attrs);
+   return msg_doit(nlh, cmd_link_get_bcast_cb, );
+}
+
 static int cmd_link_get(struct nlmsghdr *nlh, const struct cmd *cmd,
struct cmdl *cmdl, void *data)
 {
@@ -186,6 +268,7 @@ static int cmd_link_get(struct nlmsghdr *nlh, const struct 
cmd *cmd,
{ PRIORITY_STR, cmd_link_get_prop,  cmd_link_get_help },
{ TOLERANCE_STR,cmd_link_get_prop,  
cmd_link_get_help },
{ WINDOW_STR,   cmd_link_get_prop,  cmd_link_get_help },
+   { BROADCAST_STR, cmd_link_get_bcast, cmd_link_get_bcast_help },
{ NULL }
};
 
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [iproute2-next 1/2] tipc: add link broadcast set method and ratio

2019-03-19 Thread Hoang Le
The command added here makes it possible to forcibly configure the
broadcast link to use either broadcast or replicast, in addition to
the already existing auto selection algorithm.

A sample usage is shown below:
$tipc link set broadcast BROADCAST
$tipc link set broadcast AUTOSELECT ratio 25

$tipc link set broadcast -h
Usage: tipc link set broadcast PROPERTY

PROPERTIES
 BROADCAST - Forces all multicast traffic to be
 transmitted via broadcast only,
 irrespective of cluster size and number
 of destinations

 REPLICAST - Forces all multicast traffic to be
 transmitted via replicast only,
 irrespective of cluster size and number
 of destinations

 AUTOSELECT- Auto switching to broadcast or replicast
 depending on cluster size and destination
 node number

 ratio SIZE- Set the AUTOSELECT criteria, percentage of
 destination nodes vs cluster size

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 include/uapi/linux/tipc_netlink.h |  2 +
 tipc/link.c   | 96 ++-
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/tipc_netlink.h 
b/include/uapi/linux/tipc_netlink.h
index 0ebe02ef1a86..efb958fd167d 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -281,6 +281,8 @@ enum {
TIPC_NLA_PROP_TOL,  /* u32 */
TIPC_NLA_PROP_WIN,  /* u32 */
TIPC_NLA_PROP_MTU,  /* u32 */
+   TIPC_NLA_PROP_BROADCAST,/* u32 */
+   TIPC_NLA_PROP_BROADCAST_RATIO,  /* u32 */
 
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/tipc/link.c b/tipc/link.c
index 43e26da3fa6b..e3b10bb7b3d4 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -28,6 +28,9 @@
 #define PRIORITY_STR "priority"
 #define TOLERANCE_STR "tolerance"
 #define WINDOW_STR "window"
+#define BROADCAST_STR "broadcast"
+
+static const char tipc_bclink_name[] = "broadcast-link";
 
 static int link_list_cb(const struct nlmsghdr *nlh, void *data)
 {
@@ -521,7 +524,8 @@ static void cmd_link_set_help(struct cmdl *cmdl)
"PROPERTIES\n"
" tolerance TOLERANCE   - Set link tolerance\n"
" priority PRIORITY - Set link priority\n"
-   " window WINDOW - Set link window\n",
+   " window WINDOW - Set link window\n"
+   " broadcast BROADCAST   - Set link broadcast\n",
cmdl->argv[0]);
 }
 
@@ -585,6 +589,95 @@ static int cmd_link_set_prop(struct nlmsghdr *nlh, const 
struct cmd *cmd,
return msg_doit(nlh, link_get_cb, );
 }
 
+static void cmd_link_set_bcast_help(struct cmdl *cmdl)
+{
+   fprintf(stderr, "Usage: %s link set broadcast PROPERTY\n\n"
+   "PROPERTIES\n"
+   " BROADCAST - Forces all multicast traffic to be\n"
+   " transmitted via broadcast only,\n"
+   " irrespective of cluster size and number\n"
+   " of destinations\n\n"
+   " REPLICAST - Forces all multicast traffic to be\n"
+   " transmitted via replicast only,\n"
+   " irrespective of cluster size and number\n"
+   " of destinations\n\n"
+   " AUTOSELECT- Auto switching to broadcast or 
replicast\n"
+   " depending on cluster size and 
destination\n"
+   " node number\n\n"
+   " ratio SIZE- Set the AUTOSELECT criteria, percentage 
of\n"
+   " destination nodes vs cluster size\n\n",
+   cmdl->argv[0]);
+}
+
+static int cmd_link_set_bcast(struct nlmsghdr *nlh, const struct cmd *cmd,
+struct cmdl *cmdl, void *data)
+{
+   char buf[MNL_SOCKET_BUFFER_SIZE];
+   struct nlattr *props;
+   struct nlattr *attrs;
+   struct opt *opt;
+   struct opt opts[] = {
+   { "BROADCAST",  OPT_KEY, NULL },
+   { "REPLICAST",  OPT_KEY, NULL },
+   { "AUTOSELECT", OPT_KEY, NULL },
+   { "ratio",  OPT_KEYVAL, NULL },
+   { NULL }
+   };
+   int method = 0;
+
+   if (help_flag) {
+   (cmd->help)(cmdl);
+   return -EINVAL;
+   }
+
+   

[tipc-discussion] [net-next v3 1/3] tipc: support broadcast/replicast configurable for bc-link

2019-02-25 Thread Hoang Le
Currently, a multicast stream uses either broadcast or replicast as
transmission method, based on the ratio between number of actual
destinations nodes and cluster size.

However, when an L2 interface (e.g., VXLAN) provides pseudo
broadcast support, this becomes very inefficient, as it blindly
replicates multicast packets to all cluster/subnet nodes,
irrespective of whether they host actual target sockets or not.

The TIPC multicast algorithm is able to distinguish real destination
nodes from other nodes, and hence provides a smarter and more
efficient method for transferring multicast messages than
pseudo broadcast can do.

Because of this, we now make it possible for users to force
the broadcast link to permanently switch to using replicast,
irrespective of which capabilities the bearer provides,
or pretend to provide.
Conversely, we also make it possible to force the broadcast link
to always use true broadcast. While maybe less useful in
deployed systems, this may at least be useful for testing the
broadcast algorithm in small clusters.

We retain the current AUTOSELECT ability, i.e., to let the broadcast link
automatically select which algorithm to use, and to switch back and forth
between broadcast and replicast as the ratio between destination
node number and cluster size changes. This remains the default method.

Furthermore, we make it possible to configure the threshold ratio for
such switches. The default ratio is now set to 10%, down from 25% in the
earlier implementation.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 include/uapi/linux/tipc_netlink.h |   2 +
 net/tipc/bcast.c  | 104 --
 net/tipc/bcast.h  |   7 ++
 net/tipc/link.c   |   8 +++
 net/tipc/netlink.c|   4 +-
 5 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/tipc_netlink.h 
b/include/uapi/linux/tipc_netlink.h
index 0ebe02ef1a86..efb958fd167d 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -281,6 +281,8 @@ enum {
TIPC_NLA_PROP_TOL,  /* u32 */
TIPC_NLA_PROP_WIN,  /* u32 */
TIPC_NLA_PROP_MTU,  /* u32 */
+   TIPC_NLA_PROP_BROADCAST,/* u32 */
+   TIPC_NLA_PROP_BROADCAST_RATIO,  /* u32 */
 
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d8026543bf4c..12b59268bdd6 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link";
  * @dests: array keeping number of reachable destinations per bearer
  * @primary_bearer: a bearer having links to all broadcast destinations, if any
  * @bcast_support: indicates if primary bearer, if any, supports broadcast
+ * @force_bcast: forces broadcast for multicast traffic
  * @rcast_support: indicates if all peer nodes support replicast
+ * @force_rcast: forces replicast for multicast traffic
  * @rc_ratio: dest count as percentage of cluster size where send method 
changes
  * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast
  */
@@ -64,7 +66,9 @@ struct tipc_bc_base {
int dests[MAX_BEARERS];
int primary_bearer;
bool bcast_support;
+   bool force_bcast;
bool rcast_support;
+   bool force_rcast;
int rc_ratio;
int bc_threshold;
 };
@@ -485,10 +489,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, 
u32 limit)
return 0;
 }
 
+static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode)
+{
+   struct tipc_bc_base *bb = tipc_bc_base(net);
+
+   switch (bc_mode) {
+   case BCLINK_MODE_BCAST:
+   if (!bb->bcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = true;
+   bb->force_rcast = false;
+   break;
+   case BCLINK_MODE_RCAST:
+   if (!bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = false;
+   bb->force_rcast = true;
+   break;
+   case BCLINK_MODE_SEL:
+   if (!bb->bcast_support || !bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = false;
+   bb->force_rcast = false;
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio)
+{
+   struct tipc_bc_base *bb = tipc_bc_base(net);
+
+   if (!bb->bcast_support || !bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   if (bc_ratio > 100 || bc_ratio <= 0)
+   return -EINVAL;
+
+   bb->rc_ratio = bc_ratio;
+   tipc_bcast_lock(net);
+   tipc_bcbase_calc_bc_threshold(net);
+  

[tipc-discussion] [net-next v3 3/3] tipc: smooth change between replicast and broadcast

2019-02-25 Thread Hoang Le
Currently, a multicast stream may start out using replicast, because
there are few destinations, and then it should ideally switch to
L2/broadcast IGMP/multicast when the number of destinations grows beyond
a certain limit. The opposite should happen when the number decreases
below the limit.

To eliminate the risk of message reordering caused by method change,
a sending socket must stick to a previously selected method until it
enters an idle period of 5 seconds. Means there is a 5 seconds pause
in the traffic from the sender socket.

If the sender never makes such a pause, the method will never change,
and transmission may become very inefficient as the cluster grows.

With this commit, we allow such a switch between replicast and
broadcast without any need for a traffic pause.

Solution is to send a dummy message with only the header, also with
the SYN bit set, via broadcast or replicast. For the data message,
the SYN bit is set and sending via replicast or broadcast (inverse
method with dummy).

Then, at receiving side any messages follow first SYN bit message
(data or dummy message), they will be held in deferred queue until
another pair (dummy or data message) arrived in other link.

v2: reverse christmas tree declaration

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/bcast.c  | 165 +-
 net/tipc/bcast.h  |   5 ++
 net/tipc/msg.h|  10 +++
 net/tipc/socket.c |   5 ++
 4 files changed, 184 insertions(+), 1 deletion(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 12b59268bdd6..5264a8ff6e01 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -220,9 +220,24 @@ static void tipc_bcast_select_xmit_method(struct net *net, 
int dests,
}
/* Can current method be changed ? */
method->expires = jiffies + TIPC_METHOD_EXPIRE;
-   if (method->mandatory || time_before(jiffies, exp))
+   if (method->mandatory)
return;
 
+   if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) &&
+   time_before(jiffies, exp))
+   return;
+
+   /* Configuration as force 'broadcast' method */
+   if (bb->force_bcast) {
+   method->rcast = false;
+   return;
+   }
+   /* Configuration as force 'replicast' method */
+   if (bb->force_rcast) {
+   method->rcast = true;
+   return;
+   }
+   /* Configuration as 'autoselect' or default method */
/* Determine method to use now */
method->rcast = dests <= bb->bc_threshold;
 }
@@ -285,6 +300,63 @@ static int tipc_rcast_xmit(struct net *net, struct 
sk_buff_head *pkts,
return 0;
 }
 
+/* tipc_mcast_send_sync - deliver a dummy message with SYN bit
+ * @net: the applicable net namespace
+ * @skb: socket buffer to copy
+ * @method: send method to be used
+ * @dests: destination nodes for message.
+ * @cong_link_cnt: returns number of encountered congested destination links
+ * Returns 0 if success, otherwise errno
+ */
+static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
+   struct tipc_mc_method *method,
+   struct tipc_nlist *dests,
+   u16 *cong_link_cnt)
+{
+   struct tipc_msg *hdr, *_hdr;
+   struct sk_buff_head tmpq;
+   struct sk_buff *_skb;
+
+   /* Is a cluster supporting with new capabilities ? */
+   if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL))
+   return 0;
+
+   hdr = buf_msg(skb);
+   if (msg_user(hdr) == MSG_FRAGMENTER)
+   hdr = msg_get_wrapped(hdr);
+   if (msg_type(hdr) != TIPC_MCAST_MSG)
+   return 0;
+
+   /* Allocate dummy message */
+   _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL);
+   if (!skb)
+   return -ENOMEM;
+
+   /* Preparing for 'synching' header */
+   msg_set_syn(hdr, 1);
+
+   /* Copy skb's header into a dummy header */
+   skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE);
+   skb_orphan(_skb);
+
+   /* Reverse method for dummy message */
+   _hdr = buf_msg(_skb);
+   msg_set_size(_hdr, MCAST_H_SIZE);
+   msg_set_is_rcast(_hdr, !msg_is_rcast(hdr));
+
+   skb_queue_head_init();
+   __skb_queue_tail(, _skb);
+   if (method->rcast)
+   tipc_bcast_xmit(net, , cong_link_cnt);
+   else
+   tipc_rcast_xmit(net, , dests, cong_link_cnt);
+
+   /* This queue should normally be empty by now */
+   __skb_queue_purge();
+
+   return 0;
+}
+
 /* tipc_mcast_xmit - deliver message to indicated destination nodes
  *   and to identified node local sockets
  * @net: the applicable net namespace
@@ -300,6 +372,9 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head 
*pkts,
u16 *cong_link_cnt)
 {
struct sk_buff_head inputq, localq;
+   b

[tipc-discussion] [net-next v3 0/3] smooth change between replicast and broadcast

2019-02-25 Thread Hoang Le
v3: update commit message

Hoang Le (3):
  tipc: support broadcast/replicast configurable for bc-link
  tipc: introduce new capability flag for cluster
  tipc: smooth change between replicast and broadcast

 include/uapi/linux/tipc_netlink.h |   2 +
 net/tipc/bcast.c  | 269 +-
 net/tipc/bcast.h  |  12 ++
 net/tipc/core.c   |   2 +
 net/tipc/core.h   |   3 +
 net/tipc/link.c   |   8 +
 net/tipc/msg.h|  10 ++
 net/tipc/netlink.c|   4 +-
 net/tipc/node.c   |  18 ++
 net/tipc/node.h   |   6 +-
 net/tipc/socket.c |   5 +
 11 files changed, 331 insertions(+), 8 deletions(-)

-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next v3 2/3] tipc: introduce new capability flag for cluster

2019-02-25 Thread Hoang Le
As a preparation for introducing a smooth switching between replicast
and broadcast method for multicast message, We have to introduce a new
capability flag TIPC_MCAST_RBCTL to handle this new feature.

During a cluster upgrade a node can come back with this new capabilities
which also must be reflected in the cluster capabilities field.
The new feature is only applicable if all node in the cluster supports
this new capability.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c |  2 ++
 net/tipc/core.h |  3 +++
 net/tipc/node.c | 18 ++
 net/tipc/node.h |  6 --
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5b38f5164281..27cccd101ef6 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -43,6 +43,7 @@
 #include "net.h"
 #include "socket.h"
 #include "bcast.h"
+#include "node.h"
 
 #include 
 
@@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net)
tn->node_addr = 0;
tn->trial_addr = 0;
tn->addr_trial_end = 0;
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
memset(tn->node_id, 0, sizeof(tn->node_id));
memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 8020a6c360ff..7a68e1b6a066 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -122,6 +122,9 @@ struct tipc_net {
/* Topology subscription server */
struct tipc_topsrv *topsrv;
atomic_t subscription_count;
+
+   /* Cluster capabilities */
+   u16 capabilities;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2dc4919ab23c..2717893e9dbe 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -383,6 +383,11 @@ static struct tipc_node *tipc_node_create(struct net *net, 
u32 addr,
tipc_link_update_caps(l, capabilities);
}
write_unlock_bh(>lock);
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -433,6 +438,11 @@ static struct tipc_node *tipc_node_create(struct net *net, 
u32 addr,
break;
}
list_add_tail_rcu(>list, _node->list);
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
trace_tipc_node_create(n, true, " ");
 exit:
spin_unlock_bh(>node_list_lock);
@@ -589,6 +599,7 @@ static void  tipc_node_clear_links(struct tipc_node *node)
  */
 static bool tipc_node_cleanup(struct tipc_node *peer)
 {
+   struct tipc_node *temp_node;
struct tipc_net *tn = tipc_net(peer->net);
bool deleted = false;
 
@@ -604,6 +615,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
deleted = true;
}
tipc_node_write_unlock(peer);
+
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
+
spin_unlock_bh(>node_list_lock);
return deleted;
 }
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 4f59a30e989a..2404225c5d58 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -51,7 +51,8 @@ enum {
TIPC_BLOCK_FLOWCTL= (1 << 3),
TIPC_BCAST_RCAST  = (1 << 4),
TIPC_NODE_ID128   = (1 << 5),
-   TIPC_LINK_PROTO_SEQNO = (1 << 6)
+   TIPC_LINK_PROTO_SEQNO = (1 << 6),
+   TIPC_MCAST_RBCTL  = (1 << 7)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT   |  \
@@ -60,7 +61,8 @@ enum {
TIPC_BCAST_RCAST   |   \
TIPC_BLOCK_FLOWCTL |   \
TIPC_NODE_ID128|   \
-   TIPC_LINK_PROTO_SEQNO)
+   TIPC_LINK_PROTO_SEQNO  |   \
+   TIPC_MCAST_RBCTL)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next v2 2/3] tipc: introduce new capability flag for cluster

2019-02-21 Thread Hoang Le
As a preparation for introducing a moothly switching between replicast
and broadcast method for multicast message. We have to introduce a new
capability flag TIPC_MCAST_RBCTL to handle this new feature because of
compatibility reasons.
When a cluster upgrade a node can come back with this new capabilities
which also must be reflected in the cluster capabilities field and new
feature only applicable if the cluster supports this new capability.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 net/tipc/core.c |  2 ++
 net/tipc/core.h |  3 +++
 net/tipc/node.c | 18 ++
 net/tipc/node.h |  6 --
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5b38f5164281..27cccd101ef6 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -43,6 +43,7 @@
 #include "net.h"
 #include "socket.h"
 #include "bcast.h"
+#include "node.h"
 
 #include 
 
@@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net)
tn->node_addr = 0;
tn->trial_addr = 0;
tn->addr_trial_end = 0;
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
memset(tn->node_id, 0, sizeof(tn->node_id));
memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 8020a6c360ff..7a68e1b6a066 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -122,6 +122,9 @@ struct tipc_net {
/* Topology subscription server */
struct tipc_topsrv *topsrv;
atomic_t subscription_count;
+
+   /* Cluster capabilities */
+   u16 capabilities;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2dc4919ab23c..2717893e9dbe 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -383,6 +383,11 @@ static struct tipc_node *tipc_node_create(struct net *net, 
u32 addr,
tipc_link_update_caps(l, capabilities);
}
write_unlock_bh(>lock);
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -433,6 +438,11 @@ static struct tipc_node *tipc_node_create(struct net *net, 
u32 addr,
break;
}
list_add_tail_rcu(>list, _node->list);
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
trace_tipc_node_create(n, true, " ");
 exit:
spin_unlock_bh(>node_list_lock);
@@ -589,6 +599,7 @@ static void  tipc_node_clear_links(struct tipc_node *node)
  */
 static bool tipc_node_cleanup(struct tipc_node *peer)
 {
+   struct tipc_node *temp_node;
struct tipc_net *tn = tipc_net(peer->net);
bool deleted = false;
 
@@ -604,6 +615,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
deleted = true;
}
tipc_node_write_unlock(peer);
+
+   /* Calculate cluster capabilities */
+   tn->capabilities = TIPC_NODE_CAPABILITIES;
+   list_for_each_entry_rcu(temp_node, >node_list, list) {
+   tn->capabilities &= temp_node->capabilities;
+   }
+
spin_unlock_bh(>node_list_lock);
return deleted;
 }
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 4f59a30e989a..2404225c5d58 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -51,7 +51,8 @@ enum {
TIPC_BLOCK_FLOWCTL= (1 << 3),
TIPC_BCAST_RCAST  = (1 << 4),
TIPC_NODE_ID128   = (1 << 5),
-   TIPC_LINK_PROTO_SEQNO = (1 << 6)
+   TIPC_LINK_PROTO_SEQNO = (1 << 6),
+   TIPC_MCAST_RBCTL  = (1 << 7)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT   |  \
@@ -60,7 +61,8 @@ enum {
TIPC_BCAST_RCAST   |   \
TIPC_BLOCK_FLOWCTL |   \
TIPC_NODE_ID128|   \
-   TIPC_LINK_PROTO_SEQNO)
+   TIPC_LINK_PROTO_SEQNO  |   \
+   TIPC_MCAST_RBCTL)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
-- 
2.17.1



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [net-next v2 1/3] tipc: support broadcast/replicast configurable for bc-link

2019-02-21 Thread Hoang Le
Currently, a multicast stream uses either broadcast or replicast as
transmission method, based on the ratio between number of actual
destinations nodes and cluster size.

However, when an L2 interface (e.g., VXLAN) provides pseudo
broadcast support, this becomes very inefficient, as it blindly
replicates multicast packets to all cluster/subnet nodes,
irrespective of whether they host actual target sockets or not.

The TIPC multicast algorithm is able to distinguish real destination
nodes from other nodes, and hence provides a smarter and more
efficient method for transferring multicast messages than
pseudo broadcast can do.

Because of this, we now make it possible for users to force
the broadcast link to permanently switch to using replicast,
irrespective of which capabilities the bearer provides,
or pretend to provide.
Conversely, we also make it possible to force the broadcast link
to always use true broadcast. While maybe less useful in
deployed systems, this may at least be useful for testing the
broadcast algorithm in small clusters.

We retain the current AUTOSELECT ability, i.e., to let the broadcast link
automatically select which algorithm to use, and to switch back and forth
between broadcast and replicast as the ratio between destination
node number and cluster size changes. This remains the default method.

Furthermore, we make it possible to configure the threshold ratio for
such switches. The default ratio is now set to 10%, down from 25% in the
earlier implementation.

Acked-by: Jon Maloy 
Signed-off-by: Hoang Le 
---
 include/uapi/linux/tipc_netlink.h |   2 +
 net/tipc/bcast.c  | 104 --
 net/tipc/bcast.h  |   7 ++
 net/tipc/link.c   |   8 +++
 net/tipc/netlink.c|   4 +-
 5 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/tipc_netlink.h 
b/include/uapi/linux/tipc_netlink.h
index 0ebe02ef1a86..efb958fd167d 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -281,6 +281,8 @@ enum {
TIPC_NLA_PROP_TOL,  /* u32 */
TIPC_NLA_PROP_WIN,  /* u32 */
TIPC_NLA_PROP_MTU,  /* u32 */
+   TIPC_NLA_PROP_BROADCAST,/* u32 */
+   TIPC_NLA_PROP_BROADCAST_RATIO,  /* u32 */
 
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d8026543bf4c..12b59268bdd6 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link";
  * @dests: array keeping number of reachable destinations per bearer
  * @primary_bearer: a bearer having links to all broadcast destinations, if any
  * @bcast_support: indicates if primary bearer, if any, supports broadcast
+ * @force_bcast: forces broadcast for multicast traffic
  * @rcast_support: indicates if all peer nodes support replicast
+ * @force_rcast: forces replicast for multicast traffic
  * @rc_ratio: dest count as percentage of cluster size where send method 
changes
  * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast
  */
@@ -64,7 +66,9 @@ struct tipc_bc_base {
int dests[MAX_BEARERS];
int primary_bearer;
bool bcast_support;
+   bool force_bcast;
bool rcast_support;
+   bool force_rcast;
int rc_ratio;
int bc_threshold;
 };
@@ -485,10 +489,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, 
u32 limit)
return 0;
 }
 
+static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode)
+{
+   struct tipc_bc_base *bb = tipc_bc_base(net);
+
+   switch (bc_mode) {
+   case BCLINK_MODE_BCAST:
+   if (!bb->bcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = true;
+   bb->force_rcast = false;
+   break;
+   case BCLINK_MODE_RCAST:
+   if (!bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = false;
+   bb->force_rcast = true;
+   break;
+   case BCLINK_MODE_SEL:
+   if (!bb->bcast_support || !bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   bb->force_bcast = false;
+   bb->force_rcast = false;
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio)
+{
+   struct tipc_bc_base *bb = tipc_bc_base(net);
+
+   if (!bb->bcast_support || !bb->rcast_support)
+   return -ENOPROTOOPT;
+
+   if (bc_ratio > 100 || bc_ratio <= 0)
+   return -EINVAL;
+
+   bb->rc_ratio = bc_ratio;
+   tipc_bcast_lock(net);
+   tipc_bcbase_calc_bc_threshold(net);
+  

  1   2   >