Re: [tipc-discussion] FW: [net 1/1] tipc: sanity check on received netlink buffer

2018-12-21 Thread Xin Long
On Fri, Dec 21, 2018 at 12:35 AM Jon Maloy  wrote:
>
> Hi Ying and Xin,
> Any viewpoints on this before I send it in?
> It should be noted that skb->tail in the worst case will be the same as 
> skb->end, which points to the first byte of the skb_shared_info area. 
> Lucklily, and not only due to luck, I think, this byte happens to be named  " 
> __unused" in that structure. So the change should be safe, and my tests have 
> not revealed any problems. Whether this really solves the problem reported by 
> syzbot I don't know, since I am unable to reproduce it, but this is my take 
> on it.

I built a KMSAN env a couple of weeks ago, but it's gone now.
I can rebuild one and try to reproduce it if you still need.

>
> Regards
> ///jon
>
>
> -Original Message-
> From: Jon Maloy
> Sent: 18-Dec-18 15:29
> To: Jon Maloy ; Jon Maloy 
> Cc: Mohan Krishna Ghanta Krishnamurthy 
> ; 
> parthasarathy.bhuvara...@gmail.com; Tung Quang Nguyen 
> ; Hoang Huu Le ; 
> Canh Duc Luu ; Tuong Tong Lien 
> ; Gordan Mihaljevic 
> ; ying@windriver.com; 
> tipc-discussion@lists.sourceforge.net
> Subject: [net 1/1] tipc: sanity check on received netlink buffer
>
> When tipc receives a sk buffer in tipc_net_link_compat_rcv() it performs no 
> controls that the buffer has the required minimum size. Furthermore, the 
> buffer may contain a string, which we have no guarantee is zero- terminated.
>
> We now introduce a check that the buffer at least is large enough to contain 
> a generic and a TIPC specific netlink header, since those must be present in 
> all valid messages.
>
> We also set the buffer tail to point to a zero character. This ensures that 
> subsequent string operations on buffer data never can fail, even if the given 
> string is invalid.
>
> Reported-by: syzbot+d78b8a29241a195ae...@syzkaller.appspotmail.com
> Reported-by: syzbot+e820fdc8ce362f2de...@syzkaller.appspotmail.com
> Signed-off-by: Jon Maloy 
> ---
>  net/tipc/netlink_compat.c | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 
> 6376467..b37ed6e 100644
> --- a/net/tipc/netlink_compat.c
> +++ b/net/tipc/netlink_compat.c
> @@ -1188,6 +1188,13 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, 
> struct genl_info *info)
>
> memset(, 0, sizeof(msg));
>
> +   if (skb_headlen(skb) < GENL_HDRLEN + TIPC_GENL_HDRLEN) {
> +   err = -EINVAL;
> +   msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR);
> +   goto send;
> +   }
1. This check may not be necessary, as genl_family_rcv_msg() could cover it, no?

hdrlen = GENL_HDRLEN + family->hdrsize;
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
return -EINVAL;


> +   *skb_tail_pointer(skb) = 0;
> +
It may be a little bit tricky, but yes, a very easy fix. I'm thinking:

2. https://www.spinics.net/lists/netdev/msg540733.html

This is a common issue in TIPC netlink when parsing name string. I was
looking at the processing for ifname in rtnl_setlink(), and it's using
nla_strlcpy(). So maybe it's better to do the same here?

--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -857,7 +857,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb,
struct genl_info *info)
 int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 {
int err;
-   char *bearer;
+   char bearer[TIPC_MAX_BEARER_NAME];
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
struct net *net = sock_net(skb->sk);
u32 domain = 0;
@@ -868,6 +868,7 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb,
struct genl_info *info)
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;

+   nla_strlcpy(bearer, info->attrs[TIPC_NLA_BEARER], TIPC_MAX_BEARER_NAME);
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
   info->attrs[TIPC_NLA_BEARER],
   tipc_nl_bearer_policy, info->extack);

3. https://www.spinics.net/lists/netdev/msg540734.html

the similar thing below in tipc_nl_compat_link_set()?
but should do something more for tipc_link_config which is more than a string.

@@ -723,19 +723,21 @@ static int tipc_nl_compat_link_set(struct
tipc_nl_compat_cmd_doit *cmd,
   struct sk_buff *skb,
   struct tipc_nl_compat_msg *msg)
 {
-   struct tipc_link_config *lc;
+   int len = TLV_GET_LEN(msg->req) - TLV_LENGTH(0);
+   struct tipc_link_config lc = {0};
struct tipc_bearer *bearer;
struct tipc_media *media;

-   lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+   memcpy(, TLV_DATA(msg->req),
+  len >= sizeof(lc) ? sizeof(lc) - 1 : len);

-   media = tipc_media_find(lc->name);
+   media = tipc_media_find(lc.name);


> req_nlh = (struct nlmsghdr *)skb->data;
> msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
> msg.cmd = 

Re: [tipc-discussion] KMSAN: uninit-value in tipc_nl_compat_bearer_enable (2)

2019-03-30 Thread Xin Long
On Thu, Mar 28, 2019 at 1:55 AM syzbot
 wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:9536b452 kmsan: uaccess.h: fix variable name conflicts
> git tree:   kmsan
> console output: https://syzkaller.appspot.com/x/log.txt?x=15f5d58320
> kernel config:  https://syzkaller.appspot.com/x/.config?x=a5675814e8eae69e
> dashboard link: https://syzkaller.appspot.com/bug?extid=8b707430713eb46e1e45
> compiler:   clang version 8.0.0 (trunk 350509)
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=16438e1b20
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=16898cd720
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+8b707430713eb46e1...@syzkaller.appspotmail.com
>
> ==
> BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:961
> CPU: 0 PID: 10526 Comm: syz-executor961 Not tainted 5.0.0+ #13
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>   __dump_stack lib/dump_stack.c:77 [inline]
>   dump_stack+0x173/0x1d0 lib/dump_stack.c:113
>   kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600
>   __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313
>   memchr+0xce/0x110 lib/string.c:961
>   string_is_valid net/tipc/netlink_compat.c:176 [inline]
>   tipc_nl_compat_bearer_enable+0x2c4/0x910 net/tipc/netlink_compat.c:401
It's using the wrong length to check if b->name is valid when
b->name size < TIPC_MAX_BEARER_NAME. The right count
should start from tipc_bearer_config->name.

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4ad3586..895f368 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -397,7 +397,9 @@ static int tipc_nl_compat_bearer_enable(struct
tipc_nl_compat_cmd_doit *cmd,
if (!bearer)
return -EMSGSIZE;

-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   len -= offsetof(struct tipc_bearer_config, name);
+   len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(b->name, len))
return -EINVAL;

The simliar thing should be done in:
tipc_nl_compat_media_set()
tipc_nl_compat_bearer_set()
tipc_nl_compat_link_set()

>   __tipc_nl_compat_doit net/tipc/netlink_compat.c:321 [inline]
>   tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:354
>   tipc_nl_compat_handle net/tipc/netlink_compat.c:1162 [inline]
>   tipc_nl_compat_recv+0x1ae7/0x2750 net/tipc/netlink_compat.c:1265
>   genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
>   genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
>   netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
>   genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
>   netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
>   netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
>   netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
>   sock_sendmsg_nosec net/socket.c:622 [inline]
>   sock_sendmsg net/socket.c:632 [inline]
>   ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2115
>   __sys_sendmsg net/socket.c:2153 [inline]
>   __do_sys_sendmsg net/socket.c:2162 [inline]
>   __se_sys_sendmsg+0x305/0x460 net/socket.c:2160
>   __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2160
>   do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
>   entry_SYSCALL_64_after_hwframe+0x63/0xe7
> RIP: 0033:0x440209
> Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7
> 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
> ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:7ffc77bfd0a8 EFLAGS: 0246 ORIG_RAX: 002e
> RAX: ffda RBX: 004002c8 RCX: 00440209
> RDX:  RSI: 2140 RDI: 0003
> RBP: 006ca018 R08:  R09: 004002c8
> R10:  R11: 0246 R12: 00401a90
> R13: 00401b20 R14:  R15: 
>
> Uninit was created at:
>   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:205 [inline]
>   kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:159
>   kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176
>   kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185
>   slab_post_alloc_hook mm/slab.h:445 [inline]
>   slab_alloc_node mm/slub.c:2773 [inline]
>   __kmalloc_node_track_caller+0xe9e/0xff0 mm/slub.c:4398
>   __kmalloc_reserve net/core/skbuff.c:140 [inline]
>   __alloc_skb+0x309/0xa20 net/core/skbuff.c:208
>   alloc_skb include/linux/skbuff.h:1012 [inline]
>   netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
>   netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
>   sock_sendmsg_nosec net/socket.c:622 [inline]
>   sock_sendmsg net/socket.c:632 [inline]
>   ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2115
>   __sys_sendmsg 

Re: [tipc-discussion] KMSAN: uninit-value in tipc_nl_compat_name_table_dump (2)

2019-03-30 Thread Xin Long
On Fri, Mar 29, 2019 at 12:26 AM syzbot
 wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:a695dc5e kmsan: fixup mm/sl[au]b.[ch] after rebase
> git tree:   kmsan
> console output: https://syzkaller.appspot.com/x/log.txt?x=1683e04d20
> kernel config:  https://syzkaller.appspot.com/x/.config?x=a5675814e8eae69e
> dashboard link: https://syzkaller.appspot.com/bug?extid=3ce8520484b0d4e260a5
> compiler:   clang version 8.0.0 (trunk 350509)
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=15bdf95f20
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=11b5a4cf20
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+3ce8520484b0d4e26...@syzkaller.appspotmail.com
>
> sshd (11234) used greatest stack depth: 54160 bytes left
> ==
> BUG: KMSAN: uninit-value in __arch_swab32
> arch/x86/include/uapi/asm/swab.h:10 [inline]
> BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline]
> BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x54f/0xcd0
> net/tipc/netlink_compat.c:872
> CPU: 1 PID: 11248 Comm: syz-executor646 Not tainted 5.0.0+ #11
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>   __dump_stack lib/dump_stack.c:77 [inline]
>   dump_stack+0x173/0x1d0 lib/dump_stack.c:113
>   kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600
>   __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313
>   __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline]
>   __fswab32 include/uapi/linux/swab.h:59 [inline]
>   tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872
commit 974cb0e3e7c963ced06c4e32c5b2884173fa5e01
Author: Ying Xue 
Date:   Mon Jan 14 17:22:28 2019 +0800

tipc: fix uninit-value in tipc_nl_compat_name_table_dump

This patch tried to fix it in tipc_nl_compat_name_table_dump_header().
But it forgot to handler the err returned from cmd->header():

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4ad3586..25bc39b 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct
tipc_nl_compat_cmd_dump *cmd,
if (msg->rep_type)
tipc_tlv_init(msg->rep, msg->rep_type);

-   if (cmd->header)
-   (*cmd->header)(msg);
+   if (cmd->header) {
+   err = (*cmd->header)(msg);
+   if (err) {
+   kfree_skb(msg->rep);
+   msg->rep = NULL;
+   return err;
+   }
+   }

>   __tipc_nl_compat_dumpit+0x59e/0xda0 net/tipc/netlink_compat.c:215
>   tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:280
>   tipc_nl_compat_handle net/tipc/netlink_compat.c:1226 [inline]
>   tipc_nl_compat_recv+0x1b5f/0x2750 net/tipc/netlink_compat.c:1265
>   genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
>   genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
>   netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
>   genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
>   netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
>   netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
>   netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
>   sock_sendmsg_nosec net/socket.c:622 [inline]
>   sock_sendmsg net/socket.c:632 [inline]
>   ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2115
>   __sys_sendmsg net/socket.c:2153 [inline]
>   __do_sys_sendmsg net/socket.c:2162 [inline]
>   __se_sys_sendmsg+0x305/0x460 net/socket.c:2160
>   __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2160
>   do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
>   entry_SYSCALL_64_after_hwframe+0x63/0xe7
> RIP: 0033:0x444069
> Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7
> 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
> ff 0f 83 1b d8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:7ffda17b3718 EFLAGS: 0246 ORIG_RAX: 002e
> RAX: ffda RBX: 004002e0 RCX: 00444069
> RDX:  RSI: 21c0 RDI: 0003
> RBP: 006ce018 R08:  R09: 004002e0
> R10: 1900 R11: 0246 R12: 00401d10
> R13: 00401da0 R14:  R15: 
>
> Uninit was created at:
>   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:205 [inline]
>   kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:159
>   kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176
>   kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185
>   slab_post_alloc_hook mm/slab.h:445 [inline]
>   slab_alloc_node mm/slub.c:2773 [inline]
>   __kmalloc_node_track_caller+0xe9e/0xff0 mm/slub.c:4398
>   __kmalloc_reserve net/core/skbuff.c:140 [inline]
>   __alloc_skb+0x309/0xa20 net/core/skbuff.c:208
>   

Re: [tipc-discussion] KMSAN: uninit-value in tipc_nl_compat_link_set (3)

2019-03-30 Thread Xin Long
On Thu, Mar 28, 2019 at 1:54 AM syzbot
 wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:c530a275 kmsan: call vmap hooks from vmalloc and ioremap f..
> git tree:   kmsan
> console output: https://syzkaller.appspot.com/x/log.txt?x=13bd473320
> kernel config:  https://syzkaller.appspot.com/x/.config?x=a5675814e8eae69e
> dashboard link: https://syzkaller.appspot.com/bug?extid=de00a87b8644a582ae79
> compiler:   clang version 8.0.0 (trunk 350509)
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=12c534d720
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=10f9e73320
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+de00a87b8644a582a...@syzkaller.appspotmail.com
>
> ==
> BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:961
> CPU: 1 PID: 10538 Comm: syz-executor101 Not tainted 5.0.0+ #12
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>   __dump_stack lib/dump_stack.c:77 [inline]
>   dump_stack+0x173/0x1d0 lib/dump_stack.c:113
>   kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600
>   __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313
>   memchr+0xce/0x110 lib/string.c:961
>   string_is_valid net/tipc/netlink_compat.c:176 [inline]
>   tipc_nl_compat_link_set+0x121/0x1550 net/tipc/netlink_compat.c:770
@@ -766,8 +775,10 @@ static int tipc_nl_compat_link_set(struct
tipc_nl_compat_cmd_doit *cmd,

lc = (struct tipc_link_config *)TLV_DATA(msg->req);

-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
-   if (!string_is_valid(lc->name, len))
+   len = TLV_GET_DATA_LEN(msg->req) -
+ offsetof(struct tipc_link_config, name);
+   if (len <= 0 ||
+   !string_is_valid(lc->name, min_t(int, len, TIPC_MAX_BEARER_NAME)))
return -EINVAL;

>   __tipc_nl_compat_doit net/tipc/netlink_compat.c:321 [inline]
>   tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:354
>   tipc_nl_compat_handle net/tipc/netlink_compat.c:1162 [inline]
>   tipc_nl_compat_recv+0x1ae7/0x2750 net/tipc/netlink_compat.c:1265
>   genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
>   genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
>   netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
>   genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
>   netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
>   netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
>   netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
>   sock_sendmsg_nosec net/socket.c:622 [inline]
>   sock_sendmsg net/socket.c:632 [inline]
>   ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2115
>   __sys_sendmsg net/socket.c:2153 [inline]
>   __do_sys_sendmsg net/socket.c:2162 [inline]
>   __se_sys_sendmsg+0x305/0x460 net/socket.c:2160
>   __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2160
>   do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
>   entry_SYSCALL_64_after_hwframe+0x63/0xe7
> RIP: 0033:0x440259
> Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7
> 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
> ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:7ffcb435a248 EFLAGS: 0246 ORIG_RAX: 002e
> RAX: ffda RBX: 004002c8 RCX: 00440259
> RDX:  RSI: 20c0 RDI: 0003
> RBP: 006ca018 R08:  R09: 004002c8
> R10:  R11: 0246 R12: 00401ae0
> R13: 00401b70 R14:  R15: 
>
> Uninit was created at:
>   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:205 [inline]
>   kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:159
>   kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176
>   kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185
>   slab_post_alloc_hook mm/slab.h:445 [inline]
>   slab_alloc_node mm/slub.c:2773 [inline]
>   __kmalloc_node_track_caller+0xe9e/0xff0 mm/slub.c:4398
>   __kmalloc_reserve net/core/skbuff.c:140 [inline]
>   __alloc_skb+0x309/0xa20 net/core/skbuff.c:208
>   alloc_skb include/linux/skbuff.h:1012 [inline]
>   netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
>   netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
>   sock_sendmsg_nosec net/socket.c:622 [inline]
>   sock_sendmsg net/socket.c:632 [inline]
>   ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2115
>   __sys_sendmsg net/socket.c:2153 [inline]
>   __do_sys_sendmsg net/socket.c:2162 [inline]
>   __se_sys_sendmsg+0x305/0x460 net/socket.c:2160
>   __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2160
>   do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
>   entry_SYSCALL_64_after_hwframe+0x63/0xe7
> ==
>
>
> ---
> This bug is generated by a bot. It 

[tipc-discussion] [PATCH net 0/3] tipc: a batch of uninit-value fixes for netlink_compat

2019-03-31 Thread Xin Long
These issues were all reported by syzbot, and exist since very beginning.
See the details on each patch.

Xin Long (3):
  tipc: check bearer name with right length in
tipc_nl_compat_bearer_enable
  tipc: check link name with right length in tipc_nl_compat_link_set
  tipc: handle the err returned from cmd header function

 net/tipc/netlink_compat.c | 24 
 1 file changed, 20 insertions(+), 4 deletions(-)

-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net 3/3] tipc: handle the err returned from cmd header function

2019-03-31 Thread Xin Long
Syzbot found a crash:

  BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x54f/0xcd0 
net/tipc/netlink_compat.c:872
  Call Trace:
tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872
__tipc_nl_compat_dumpit+0x59e/0xda0 net/tipc/netlink_compat.c:215
tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:280
tipc_nl_compat_handle net/tipc/netlink_compat.c:1226 [inline]
tipc_nl_compat_recv+0x1b5f/0x2750 net/tipc/netlink_compat.c:1265
genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
sock_sendmsg_nosec net/socket.c:622 [inline]
sock_sendmsg net/socket.c:632 [inline]

  Uninit was created at:
__alloc_skb+0x309/0xa20 net/core/skbuff.c:208
alloc_skb include/linux/skbuff.h:1012 [inline]
netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
sock_sendmsg_nosec net/socket.c:622 [inline]
sock_sendmsg net/socket.c:632 [inline]

It was supposed to be fixed on commit 974cb0e3e7c9 ("tipc: fix uninit-value
in tipc_nl_compat_name_table_dump") by checking TLV_GET_DATA_LEN(msg->req)
in cmd->header()/tipc_nl_compat_name_table_dump_header(), which is called
ahead of tipc_nl_compat_name_table_dump().

However, tipc_nl_compat_dumpit() doesn't handle the error returned from cmd
header function. It means even when the check added in that fix fails, it
won't stop calling tipc_nl_compat_name_table_dump(), and the issue will be
triggered again.

So this patch is to add the process for the err returned from cmd header
function in tipc_nl_compat_dumpit().

Reported-by: syzbot+3ce8520484b0d4e26...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
---
 net/tipc/netlink_compat.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 0bfd03d6..340a6e7 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct 
tipc_nl_compat_cmd_dump *cmd,
if (msg->rep_type)
tipc_tlv_init(msg->rep, msg->rep_type);
 
-   if (cmd->header)
-   (*cmd->header)(msg);
+   if (cmd->header) {
+   err = (*cmd->header)(msg);
+   if (err) {
+   kfree_skb(msg->rep);
+   msg->rep = NULL;
+   return err;
+   }
+   }
 
arg = nlmsg_new(0, GFP_KERNEL);
if (!arg) {
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net 1/3] tipc: check bearer name with right length in tipc_nl_compat_bearer_enable

2019-03-31 Thread Xin Long
Syzbot reported the following crash:

BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:961
  memchr+0xce/0x110 lib/string.c:961
  string_is_valid net/tipc/netlink_compat.c:176 [inline]
  tipc_nl_compat_bearer_enable+0x2c4/0x910 net/tipc/netlink_compat.c:401
  __tipc_nl_compat_doit net/tipc/netlink_compat.c:321 [inline]
  tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:354
  tipc_nl_compat_handle net/tipc/netlink_compat.c:1162 [inline]
  tipc_nl_compat_recv+0x1ae7/0x2750 net/tipc/netlink_compat.c:1265
  genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
  genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
  netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
  genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
  netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
  netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
  netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
  sock_sendmsg_nosec net/socket.c:622 [inline]
  sock_sendmsg net/socket.c:632 [inline]

Uninit was created at:
  __alloc_skb+0x309/0xa20 net/core/skbuff.c:208
  alloc_skb include/linux/skbuff.h:1012 [inline]
  netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
  netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
  sock_sendmsg_nosec net/socket.c:622 [inline]
  sock_sendmsg net/socket.c:632 [inline]

It was triggered when the bearer name size < TIPC_MAX_BEARER_NAME,
it would check with a wrong len/TLV_GET_DATA_LEN(msg->req), which
also includes priority and disc_domain length.

This patch is to fix it by checking it with a right length:
'TLV_GET_DATA_LEN(msg->req) - offsetof(struct tipc_bearer_config, name)'.

Reported-by: syzbot+8b707430713eb46e1...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
---
 net/tipc/netlink_compat.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4ad3586..5f8e53c 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -397,7 +397,12 @@ static int tipc_nl_compat_bearer_enable(struct 
tipc_nl_compat_cmd_doit *cmd,
if (!bearer)
return -EMSGSIZE;
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   len -= offsetof(struct tipc_bearer_config, name);
+   if (len <= 0)
+   return -EINVAL;
+
+   len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(b->name, len))
return -EINVAL;
 
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net 2/3] tipc: check link name with right length in tipc_nl_compat_link_set

2019-03-31 Thread Xin Long
A similar issue as fixed by Patch "tipc: check bearer name with right
length in tipc_nl_compat_bearer_enable" was also found by syzbot in
tipc_nl_compat_link_set().

The length to check with should be 'TLV_GET_DATA_LEN(msg->req) -
offsetof(struct tipc_link_config, name)'.

Reported-by: syzbot+de00a87b8644a582a...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
---
 net/tipc/netlink_compat.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 5f8e53c..0bfd03d6 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -771,7 +771,12 @@ static int tipc_nl_compat_link_set(struct 
tipc_nl_compat_cmd_doit *cmd,
 
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   len -= offsetof(struct tipc_link_config, name);
+   if (len <= 0)
+   return -EINVAL;
+
+   len = min_t(int, len, TIPC_MAX_LINK_NAME);
if (!string_is_valid(lc->name, len))
return -EINVAL;
 
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: change to check tipc_own_id to return in tipc_net_stop

2019-03-23 Thread Xin Long
When running a syz script, a panic occurred:

[  156.088228] BUG: KASAN: use-after-free in tipc_disc_timeout+0x9c9/0xb20 
[tipc]
[  156.094315] Call Trace:
[  156.094844]  
[  156.095306]  dump_stack+0x7c/0xc0
[  156.097346]  print_address_description+0x65/0x22e
[  156.100445]  kasan_report.cold.3+0x37/0x7a
[  156.102402]  tipc_disc_timeout+0x9c9/0xb20 [tipc]
[  156.106517]  call_timer_fn+0x19a/0x610
[  156.112749]  run_timer_softirq+0xb51/0x1090

It was caused by the netns freed without deleting the discoverer timer,
while later on the netns would be accessed in the timer handler.

The timer should have been deleted by tipc_net_stop() when cleaning up a
netns. However, tipc has been able to enable a bearer and start d->timer
without the local node_addr set since Commit 52dfae5c85a4 ("tipc: obtain
node identity from interface by default"), which caused the timer not to
be deleted in tipc_net_stop() then.

So fix it in tipc_net_stop() by changing to check local node_id instead
of local node_addr, as Jon suggested.

While at it, remove the calling of tipc_nametbl_withdraw() there, since
tipc_nametbl_stop() will take of the nametbl's freeing after.

Fixes: 52dfae5c85a4 ("tipc: obtain node identity from interface by default")
Reported-by: syzbot+a25307ad099309f1c...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
---
 net/tipc/net.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/tipc/net.c b/net/tipc/net.c
index f076edb..7ce1e86 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -163,12 +163,9 @@ void tipc_sched_net_finalize(struct net *net, u32 addr)
 
 void tipc_net_stop(struct net *net)
 {
-   u32 self = tipc_own_addr(net);
-
-   if (!self)
+   if (!tipc_own_id(net))
return;
 
-   tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self);
rtnl_lock();
tipc_bearer_stop(net);
tipc_node_stop(net);
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] memory leak in tipc_buf_acquire

2019-06-09 Thread Xin Long
On Sat, May 25, 2019 at 5:18 AM syzbot
 wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:4dde821e Merge tag 'xfs-5.2-fixes-1' of git://git.kernel.o..
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=107db73aa0
> kernel config:  https://syzkaller.appspot.com/x/.config?x=61dd9e15a761691d
> dashboard link: https://syzkaller.appspot.com/bug?extid=78fbe679c8ca8d264a8d
> compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=162bd84ca0
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=160c605ca0
>
Looks we need to purge each member's deferredq list in tipc_group_delete():
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 992be61..23823eb 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -218,6 +218,7 @@ void tipc_group_delete(struct net *net, struct
tipc_group *grp)

  rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
  tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, );
+ __skb_queue_purge(>deferredq);
  list_del(>list);
  kfree(m);
  }

> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+78fbe679c8ca8d264...@syzkaller.appspotmail.com
>
> type=1400 audit(1558701681.775:36): avc:  denied  { map } for  pid=7128
> comm="syz-executor987" path="/root/syz-executor987656147" dev="sda1"
> ino=15900 scontext=unconfined_u:system_r:insmod_t:s0-s0:c0.c1023
> tcontext=unconfined_u:object_r:user_home_t:s0 tclass=file permissive=1
> executing program
> executing program
> executing program
> executing program
> BUG: memory leak
> unreferenced object 0x88810df83c00 (size 512):
>comm "softirq", pid 0, jiffies 4294942354 (age 19.830s)
>hex dump (first 32 bytes):
>  38 1a 0d 0f 81 88 ff ff 38 1a 0d 0f 81 88 ff ff  8...8...
>  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
>backtrace:
>  [<9375ee42>] kmemleak_alloc_recursive
> include/linux/kmemleak.h:55 [inline]
>  [<9375ee42>] slab_post_alloc_hook mm/slab.h:439 [inline]
>  [<9375ee42>] slab_alloc_node mm/slab.c:3269 [inline]
>  [<9375ee42>] kmem_cache_alloc_node+0x153/0x2a0 mm/slab.c:3579
>  [<4c563922>] __alloc_skb+0x6e/0x210 net/core/skbuff.c:198
>  [] alloc_skb_fclone include/linux/skbuff.h:1107
> [inline]
>  [] tipc_buf_acquire+0x2f/0x80 net/tipc/msg.c:66
>  [] tipc_msg_create+0x37/0xe0 net/tipc/msg.c:98
>  [<8bb437b0>] tipc_group_create_event+0xb3/0x1b0
> net/tipc/group.c:679
>  [<947b1d0f>] tipc_group_proto_rcv+0x569/0x640
> net/tipc/group.c:781
>  [] tipc_sk_proto_rcv net/tipc/socket.c:1996 [inline]
>  [] tipc_sk_filter_rcv+0x9ac/0xf20
> net/tipc/socket.c:2163
>  [<0dab7a6c>] tipc_sk_enqueue net/tipc/socket.c:2255 [inline]
>  [<0dab7a6c>] tipc_sk_rcv+0x494/0x8a0 net/tipc/socket.c:2306
>  [<023a7ddd>] tipc_node_xmit+0x196/0x1f0 net/tipc/node.c:1442
>  [<337dd9eb>] tipc_node_xmit_skb net/tipc/node.c:1491 [inline]
>  [<337dd9eb>] tipc_node_distr_xmit+0x7d/0x120
> net/tipc/node.c:1506
>  [] tipc_group_delete+0xe6/0x130 net/tipc/group.c:224
>  [<0361ba2b>] tipc_sk_leave+0x57/0xb0 net/tipc/socket.c:2925
>  [<9df90505>] tipc_release+0x7b/0x5e0 net/tipc/socket.c:584
>  [<9f3189da>] __sock_release+0x4b/0xe0 net/socket.c:607
>  [] sock_close+0x1b/0x30 net/socket.c:1279
>  [<266a6215>] __fput+0xed/0x300 fs/file_table.c:280
>
> BUG: memory leak
> unreferenced object 0x888111895400 (size 1024):
>comm "softirq", pid 0, jiffies 4294942354 (age 19.830s)
>hex dump (first 32 bytes):
>  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
>  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
>backtrace:
>  [] kmemleak_alloc_recursive
> include/linux/kmemleak.h:55 [inline]
>  [] slab_post_alloc_hook mm/slab.h:439 [inline]
>  [] slab_alloc_node mm/slab.c:3269 [inline]
>  [] kmem_cache_alloc_node_trace+0x15b/0x2a0
> mm/slab.c:3597
>  [] __do_kmalloc_node mm/slab.c:3619 [inline]
>  [] __kmalloc_node_track_caller+0x38/0x50
> mm/slab.c:3634
>  [<39212451>] __kmalloc_reserve.isra.0+0x40/0xb0
> net/core/skbuff.c:142
>  [<307cb4cf>] __alloc_skb+0xa0/0x210 net/core/skbuff.c:210
>  [] alloc_skb_fclone include/linux/skbuff.h:1107
> [inline]
>  [] tipc_buf_acquire+0x2f/0x80 net/tipc/msg.c:66
>  [] tipc_msg_create+0x37/0xe0 net/tipc/msg.c:98
>  [<8bb437b0>] tipc_group_create_event+0xb3/0x1b0
> net/tipc/group.c:679
>  

[tipc-discussion] [PATCH net] tipc: add dst_cache support for udp media

2019-06-20 Thread Xin Long
As other udp/ip tunnels do, tipc udp media should also have a
lockless dst_cache supported on its tx path.

Here we add dst_cache into udp_replicast to support dst cache
for both rmcast and rcast, and rmcast uses ub->rcast and each
rcast uses its own node in ub->rcast.list.

Signed-off-by: Xin Long 
---
 net/tipc/udp_media.c | 72 ++--
 1 file changed, 47 insertions(+), 25 deletions(-)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 1405ccc..b8962df 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -76,6 +76,7 @@ struct udp_media_addr {
 /* struct udp_replicast - container for UDP remote addresses */
 struct udp_replicast {
struct udp_media_addr addr;
+   struct dst_cache dst_cache;
struct rcu_head rcu;
struct list_head list;
 };
@@ -158,22 +159,27 @@ static int tipc_udp_addr2msg(char *msg, struct 
tipc_media_addr *a)
 /* tipc_send_msg - enqueue a send request */
 static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
 struct udp_bearer *ub, struct udp_media_addr *src,
-struct udp_media_addr *dst)
+struct udp_media_addr *dst, struct dst_cache *cache)
 {
+   struct dst_entry *ndst = dst_cache_get(cache);
int ttl, err = 0;
-   struct rtable *rt;
 
if (dst->proto == htons(ETH_P_IP)) {
-   struct flowi4 fl = {
-   .daddr = dst->ipv4.s_addr,
-   .saddr = src->ipv4.s_addr,
-   .flowi4_mark = skb->mark,
-   .flowi4_proto = IPPROTO_UDP
-   };
-   rt = ip_route_output_key(net, );
-   if (IS_ERR(rt)) {
-   err = PTR_ERR(rt);
-   goto tx_error;
+   struct rtable *rt = (struct rtable *)ndst;
+
+   if (!rt) {
+   struct flowi4 fl = {
+   .daddr = dst->ipv4.s_addr,
+   .saddr = src->ipv4.s_addr,
+   .flowi4_mark = skb->mark,
+   .flowi4_proto = IPPROTO_UDP
+   };
+   rt = ip_route_output_key(net, );
+   if (IS_ERR(rt)) {
+   err = PTR_ERR(rt);
+   goto tx_error;
+   }
+   dst_cache_set_ip4(cache, >dst, fl.saddr);
}
 
ttl = ip4_dst_hoplimit(>dst);
@@ -182,17 +188,19 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff 
*skb,
dst->port, false, true);
 #if IS_ENABLED(CONFIG_IPV6)
} else {
-   struct dst_entry *ndst;
-   struct flowi6 fl6 = {
-   .flowi6_oif = ub->ifindex,
-   .daddr = dst->ipv6,
-   .saddr = src->ipv6,
-   .flowi6_proto = IPPROTO_UDP
-   };
-   err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk, ,
-);
-   if (err)
-   goto tx_error;
+   if (!ndst) {
+   struct flowi6 fl6 = {
+   .flowi6_oif = ub->ifindex,
+   .daddr = dst->ipv6,
+   .saddr = src->ipv6,
+   .flowi6_proto = IPPROTO_UDP
+   };
+   err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk,
+, );
+   if (err)
+   goto tx_error;
+   dst_cache_set_ip6(cache, ndst, );
+   }
ttl = ip6_dst_hoplimit(ndst);
err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
   >ipv6, >ipv6, 0, ttl, 0,
@@ -230,7 +238,8 @@ static int tipc_udp_send_msg(struct net *net, struct 
sk_buff *skb,
}
 
if (addr->broadcast != TIPC_REPLICAST_SUPPORT)
-   return tipc_udp_xmit(net, skb, ub, src, dst);
+   return tipc_udp_xmit(net, skb, ub, src, dst,
+>rcast.dst_cache);
 
/* Replicast, send an skb to each configured IP address */
list_for_each_entry_rcu(rcast, >rcast.list, list) {
@@ -242,7 +251,8 @@ static int tipc_udp_send_msg(struct net *net, struct 
sk_buff *skb,
goto out;
}
 
-   err = tipc_udp_xmit(net, _skb, ub, src, >addr);
+   err = tipc_udp_xmit(net, _skb, ub, src, >addr,
+   >dst_cache);
if (err)
  

Re: [tipc-discussion] [PATCH net 0/3] net: fix quite a few dst_cache crashes reported by syzbot

2019-06-20 Thread Xin Long
On Mon, Jun 17, 2019 at 10:28 PM Jon Maloy  wrote:
>
> Hi Xin,
> As I remember the discussion around introduction of UDP media a few years 
> ago, the developer, Erik Huge, only chose to register TIPC as a udp tunnel 
> user instead of regular udp user because it provides a more efficient way to 
> receive packet in kernel space.
> With UDP tunnel, we could receive packet directly in a callback, while TIPC 
> had to run in a work queue thread in order to read packets from the socket. 
> So, in reality we don't need any tunnel at all. Another upside is that it is 
> possible to hook in a GSO callback function from the tunnel user, something I 
> am uncertain if we can do as a regular UDP user.

Right, udp tunnel was invented for this kind of encapsulation.

To implement this gso callback, we need to require an ipproto number for TIPC,
and register the callback into inet_offloads by inet_add_offload().
And on tx path set:
skb->encapsulation = 1,
skb_shinfo(skb)->gso_type|= SKB_GSO_UDP_TUNNEL,
skb->inner_protocol_type = ENCAP_TYPE_IPPROTO.

Then it will be called by:
dev_queue_xmit() .. -> skb_mac_gso_segment() ... ->
udp4_ufo_fragment() -> skb_udp_tunnel_segment() ->
skb_udp_tunnel_segment() -> tipc_gso_fragment()

btw, do we have an official ipproto number for TIPC already?

> Do you have any comments on this? Could it possibly be done differently?
>
> ///jon
>
>
> > -Original Message-
> > From: netdev-ow...@vger.kernel.org  On
> > Behalf Of Xin Long
> > Sent: 17-Jun-19 09:34
> > To: network dev 
> > Cc: da...@davemloft.net; Jon Maloy ; Ying Xue
> > ; tipc-discussion@lists.sourceforge.net; Marcelo
> > Ricardo Leitner ; Neil Horman
> > ; Su Yanjun ; David
> > Ahern ; syzkaller-b...@googlegroups.com; Dmitry
> > Vyukov ; Pravin B Shelar 
> > Subject: [PATCH net 0/3] net: fix quite a few dst_cache crashes reported by
> > syzbot
> >
> > There are two kinds of crashes reported many times by syzbot with no
> > reproducer. Call Traces are like:
> >
> >  BUG: KASAN: slab-out-of-bounds in rt_cache_valid+0x158/0x190
> >  net/ipv4/route.c:1556
> >rt_cache_valid+0x158/0x190 net/ipv4/route.c:1556
> >__mkroute_output net/ipv4/route.c:2332 [inline]
> >ip_route_output_key_hash_rcu+0x819/0x2d50 net/ipv4/route.c:2564
> >ip_route_output_key_hash+0x1ef/0x360 net/ipv4/route.c:2393
> >__ip_route_output_key include/net/route.h:125 [inline]
> >ip_route_output_flow+0x28/0xc0 net/ipv4/route.c:2651
> >ip_route_output_key include/net/route.h:135 [inline]
> >  ...
> >
> >or:
> >
> >  kasan: GPF could be caused by NULL-ptr deref or user memory access
> >  RIP: 0010:dst_dev_put+0x24/0x290 net/core/dst.c:168
> >
> >rt_fibinfo_free_cpus net/ipv4/fib_semantics.c:200 [inline]
> >free_fib_info_rcu+0x2e1/0x490 net/ipv4/fib_semantics.c:217
> >__rcu_reclaim kernel/rcu/rcu.h:240 [inline]
> >rcu_do_batch kernel/rcu/tree.c:2437 [inline]
> >invoke_rcu_callbacks kernel/rcu/tree.c:2716 [inline]
> >rcu_process_callbacks+0x100a/0x1ac0 kernel/rcu/tree.c:2697
> >  ...
> >
> > They were caused by the fib_nh_common percpu member
> > 'nhc_pcpu_rth_output'
> > overwritten by another percpu variable 'dev->tstats' access overflow in tipc
> > udp media xmit path when counting packets on a non tunnel device.
> >
> > The fix is to make udp tunnel work with no tunnel device by allowing not to
> > count packets on the tstats when the tunnel dev is NULL in Patches 1/3 and
> > 2/3, then pass a NULL tunnel dev in tipc_udp_tunnel() in Patch 3/3.
> >
> > Xin Long (3):
> >   ip_tunnel: allow not to count pkts on tstats by setting skb's dev to
> > NULL
> >   ip6_tunnel: allow not to count pkts on tstats by passing dev as NULL
> >   tipc: pass tunnel dev as NULL to udp_tunnel(6)_xmit_skb
> >
> >  include/net/ip6_tunnel.h  | 9 ++---  net/ipv4/ip_tunnel_core.c | 9
> > ++---
> >  net/tipc/udp_media.c  | 8 +++-
> >  3 files changed, 15 insertions(+), 11 deletions(-)
> >
> > --
> > 2.1.0
>


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: change to use register_pernet_device

2019-06-20 Thread Xin Long
This patch is to fix a dst defcnt leak, which can be reproduced by doing:

  # ip net a c; ip net a s; modprobe tipc
  # ip net e s ip l a n eth1 type veth peer n eth1 netns c
  # ip net e c ip l s lo up; ip net e c ip l s eth1 up
  # ip net e s ip l s lo up; ip net e s ip l s eth1 up
  # ip net e c ip a a 1.1.1.2/8 dev eth1
  # ip net e s ip a a 1.1.1.1/8 dev eth1
  # ip net e c tipc b e m udp n u1 localip 1.1.1.2
  # ip net e s tipc b e m udp n u1 localip 1.1.1.1
  # ip net d c; ip net d s; rmmod tipc

and it will get stuck and keep logging the error:

  unregister_netdevice: waiting for lo to become free. Usage count = 1

The cause is that a dst is held by the udp sock's sk_rx_dst set on udp rx
path with udp_early_demux == 1, and this dst (eventually holding lo dev)
can't be released as bearer's removal in tipc pernet .exit happens after
lo dev's removal, default_device pernet .exit.

 "There are two distinct types of pernet_operations recognized: subsys and
  device.  At creation all subsys init functions are called before device
  init functions, and at destruction all device exit functions are called
  before subsys exit function."

So by calling register_pernet_device instead to register tipc_net_ops, the
pernet .exit() will be invoked earlier than loopback dev's removal when a
netns is being destroyed, as fou/gue does.

Note that vxlan and geneve udp tunnels don't have this issue, as the udp
sock is released in their device ndo_stop().

This fix is also necessary for tipc dst_cache, which will hold dsts on tx
path and I will introduce in my next patch.

Reported-by: Li Shuang 
Signed-off-by: Xin Long 
---
 net/tipc/core.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index ed536c0..c837072 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -134,7 +134,7 @@ static int __init tipc_init(void)
if (err)
goto out_sysctl;
 
-   err = register_pernet_subsys(_net_ops);
+   err = register_pernet_device(_net_ops);
if (err)
goto out_pernet;
 
@@ -142,7 +142,7 @@ static int __init tipc_init(void)
if (err)
goto out_socket;
 
-   err = register_pernet_subsys(_topsrv_net_ops);
+   err = register_pernet_device(_topsrv_net_ops);
if (err)
goto out_pernet_topsrv;
 
@@ -153,11 +153,11 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
 out_bearer:
-   unregister_pernet_subsys(_topsrv_net_ops);
+   unregister_pernet_device(_topsrv_net_ops);
 out_pernet_topsrv:
tipc_socket_stop();
 out_socket:
-   unregister_pernet_subsys(_net_ops);
+   unregister_pernet_device(_net_ops);
 out_pernet:
tipc_unregister_sysctl();
 out_sysctl:
@@ -172,9 +172,9 @@ static int __init tipc_init(void)
 static void __exit tipc_exit(void)
 {
tipc_bearer_cleanup();
-   unregister_pernet_subsys(_topsrv_net_ops);
+   unregister_pernet_device(_topsrv_net_ops);
tipc_socket_stop();
-   unregister_pernet_subsys(_net_ops);
+   unregister_pernet_device(_net_ops);
tipc_netlink_stop();
tipc_netlink_compat_stop();
tipc_unregister_sysctl();
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] KMSAN: uninit-value in tipc_nl_compat_bearer_disable

2019-06-21 Thread Xin Long
On Wed, Jun 19, 2019 at 11:48 PM syzbot
 wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:f75e4cfe kmsan: use kmsan_handle_urb() in urb.c
> git tree:   kmsan
> console output: https://syzkaller.appspot.com/x/log.txt?x=13d0a6fea0
> kernel config:  https://syzkaller.appspot.com/x/.config?x=602468164ccdc30a
> dashboard link: https://syzkaller.appspot.com/bug?extid=30eaa8bf392f7fafffaf
> compiler:   clang version 9.0.0 (/home/glider/llvm/clang
> 06d00afa61eef8f7f501ebdb4e8612ea43ec2d78)
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=15b4a95aa0
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=162fc761a0
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+30eaa8bf392f7faff...@syzkaller.appspotmail.com
>
> IPv6: ADDRCONF(NETDEV_CHANGE): hsr0: link becomes ready
> 8021q: adding VLAN 0 to HW filter on device batadv0
> ==
> BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:981
> CPU: 0 PID: 12554 Comm: syz-executor731 Not tainted 5.1.0+ #1
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>   __dump_stack lib/dump_stack.c:77 [inline]
>   dump_stack+0x191/0x1f0 lib/dump_stack.c:113
>   kmsan_report+0x130/0x2a0 mm/kmsan/kmsan.c:622
>   __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:310
>   memchr+0xce/0x110 lib/string.c:981
>   string_is_valid net/tipc/netlink_compat.c:176 [inline]
>   tipc_nl_compat_bearer_disable+0x2a1/0x480 net/tipc/netlink_compat.c:449
TLV_GET_DATA_LEN(msg->req) may return a negtive value, which will be
used as size_t (a big unsigned long) passed into  memchr(),
triggered this issue.

@@ -446,7 +446,7 @@ static int tipc_nl_compat_bearer_disable(struct
tipc_nl_compat_cmd_doit *cmd,
  return -EMSGSIZE;

  len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
- if (!string_is_valid(name, len))
+ if (len <=0 || !string_is_valid(name, len))
  return -EINVAL;

The same fix is needed for some other places, and I will give a fix-ups.

>   __tipc_nl_compat_doit net/tipc/netlink_compat.c:327 [inline]
>   tipc_nl_compat_doit+0x3ac/0xb00 net/tipc/netlink_compat.c:360
>   tipc_nl_compat_handle net/tipc/netlink_compat.c:1178 [inline]
>   tipc_nl_compat_recv+0x1b1b/0x27b0 net/tipc/netlink_compat.c:1281
>   genl_family_rcv_msg net/netlink/genetlink.c:602 [inline]
>   genl_rcv_msg+0x185a/0x1a40 net/netlink/genetlink.c:627
>   netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2486
>   genl_rcv+0x63/0x80 net/netlink/genetlink.c:638
>   netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline]
>   netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1337
>   netlink_sendmsg+0x127e/0x12f0 net/netlink/af_netlink.c:1926
>   sock_sendmsg_nosec net/socket.c:651 [inline]
>   sock_sendmsg net/socket.c:661 [inline]
>   ___sys_sendmsg+0xcc6/0x1200 net/socket.c:2260
>   __sys_sendmsg net/socket.c:2298 [inline]
>   __do_sys_sendmsg net/socket.c:2307 [inline]
>   __se_sys_sendmsg+0x305/0x460 net/socket.c:2305
>   __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2305
>   do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
>   entry_SYSCALL_64_after_hwframe+0x63/0xe7
> RIP: 0033:0x442639
> Code: 41 02 00 85 c0 b8 00 00 00 00 48 0f 44 c3 5b c3 90 48 89 f8 48 89 f7
> 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
> ff 0f 83 fb 10 fc ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:007efea8 EFLAGS: 0246 ORIG_RAX: 002e
> RAX: ffda RBX: 0003 RCX: 00442639
> RDX:  RSI: 2080 RDI: 0003
> RBP: 007eff00 R08: 0003 R09: 0003
> R10: bb1414ac R11: 0246 R12: 0003
> R13: 00403c50 R14:  R15: 
>
> Uninit was created at:
>   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:208 [inline]
>   kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:162
>   kmsan_kmalloc+0xa4/0x130 mm/kmsan/kmsan_hooks.c:175
>   kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:184
>   slab_post_alloc_hook mm/slab.h:442 [inline]
>   slab_alloc_node mm/slub.c:2771 [inline]
>   __kmalloc_node_track_caller+0xcba/0xf30 mm/slub.c:4399
>   __kmalloc_reserve net/core/skbuff.c:140 [inline]
>   __alloc_skb+0x306/0xa10 net/core/skbuff.c:208
>   alloc_skb include/linux/skbuff.h:1059 [inline]
>   netlink_alloc_large_skb net/netlink/af_netlink.c:1183 [inline]
>   netlink_sendmsg+0xb81/0x12f0 net/netlink/af_netlink.c:1901
>   sock_sendmsg_nosec net/socket.c:651 [inline]
>   sock_sendmsg net/socket.c:661 [inline]
>   ___sys_sendmsg+0xcc6/0x1200 net/socket.c:2260
>   __sys_sendmsg net/socket.c:2298 [inline]
>   __do_sys_sendmsg net/socket.c:2307 [inline]
>   __se_sys_sendmsg+0x305/0x460 net/socket.c:2305
>   __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2305
>   do_syscall_64+0xbc/0xf0 

Re: [tipc-discussion] [PATCH net 0/3] net: fix quite a few dst_cache crashes reported by syzbot

2019-06-21 Thread Xin Long
On Fri, Jun 21, 2019 at 3:48 AM Erik Hugne  wrote:
>
> Commenting on Jon's response fist.
>
> Den tors 20 juni 2019 kl 13:26 skrev Xin Long :
> >
> > On Mon, Jun 17, 2019 at 10:28 PM Jon Maloy  wrote:
> > > Hi Xin,
> > > As I remember the discussion around introduction of UDP media a few years 
> > > ago, the developer, Erik Huge, only chose to register TIPC as a udp 
> > > tunnel user instead of regular udp user because it provides a more 
> > > efficient way to receive packet in kernel space.
> > >With UDP tunnel, we could receive packet directly in a callback, while 
> > >TIPC had to run in a work queue thread in order to read packets from the 
> > >socket.
> The performance was largely dependant on TIPC message size, for large packets 
> there was no measurable difference, but the tunnel approach was considerably 
> faster for small packets than the kernel socket interface.
> I dont have the numbers, but i think i posted them on this list around 8 
> years ago.
>
> >[...]
> > To implement this gso callback, we need to require an ipproto number for 
> > TIPC,
> > and register the callback into inet_offloads by inet_add_offload().
> > And on tx path set:
> > skb->encapsulation = 1,
> > skb_shinfo(skb)->gso_type|= SKB_GSO_UDP_TUNNEL,
> > skb->inner_protocol_type = ENCAP_TYPE_IPPROTO.
> >
> > Then it will be called by:
> > dev_queue_xmit() .. -> skb_mac_gso_segment() ... ->
> > udp4_ufo_fragment() -> skb_udp_tunnel_segment() ->
> > skb_udp_tunnel_segment() -> tipc_gso_fragment()
> >
> > btw, do we have an official ipproto number for TIPC already?
>
> Not afak, but we have an IANA assigned UDP port for TIPC though, 6118.
> https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml?search=tipc
>
> TIPC does:
> skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
> which will in turn set skb_inner_protocol_type to ENCAP_TYPE_ETHER.
> So how about implementing something similar to what's done for 
> ENCAP_TYPE_IPPROTO, but for ENCAP_TYPE_ETHER?
>
> In udp_offload.c, something in the line of:
>
> ...
> skb_udp_tunnel_segment()
> 
>
> switch (skb->inner_protocol_type) {
> case ENCAP_TYPE_ETHER:
> protocol = skb->inner_protocol;
> ops = rcu_dereference(ether_offloads[protocol]);
> if (!ops || !ops->callbacks->gso_segment)
> goto out_unlock;
> gso_inner_segment = ops->callbacks.gso_segment;
> break;
>
> 
> And obviously define ether_offloads, and corresponding ether_add_protocol and 
> ether_add_offload functions.
>
Maybe no need ether_offloads, dev_add_offload(_packet_offload) is
enough to make the callback be called by skb_mac_gso_segment() from
skb_udp_tunnel_segment(), I believe that's also what Jon does now.

It depends on which layer protocol we think TIPC. If we don't have a
plan for TIPC working over IP, a transport protocol in the future,
packet_offload is fine, otherwise, inet_offloads is also an option.


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: remove the unnecessary msg->req check from tipc_nl_compat_bearer_set

2019-06-24 Thread Xin Long
tipc_nl_compat_bearer_set() is only called by tipc_nl_compat_link_set()
which already does the check for msg->req check, so remove it from
tipc_nl_compat_bearer_set(), and do the same in tipc_nl_compat_media_set().

Signed-off-by: Xin Long 
---
 net/tipc/netlink_compat.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index cf15506..d86030e 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -691,7 +691,6 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb,
struct nlattr *prop;
struct nlattr *media;
struct tipc_link_config *lc;
-   int len;
 
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
@@ -699,10 +698,6 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb,
if (!media)
return -EMSGSIZE;
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME);
-   if (!string_is_valid(lc->name, len))
-   return -EINVAL;
-
if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name))
return -EMSGSIZE;
 
@@ -723,7 +718,6 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb,
struct nlattr *prop;
struct nlattr *bearer;
struct tipc_link_config *lc;
-   int len;
 
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
@@ -731,10 +725,6 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb,
if (!bearer)
return -EMSGSIZE;
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME);
-   if (!string_is_valid(lc->name, len))
-   return -EINVAL;
-
if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name))
return -EMSGSIZE;
 
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net] tipc: remove the unnecessary msg->req check from tipc_nl_compat_bearer_set

2019-06-24 Thread Xin Long
On Mon, Jun 24, 2019 at 10:35 PM David Miller  wrote:
>
> From: Xin Long 
> Date: Mon, 24 Jun 2019 16:02:42 +0800
>
> > tipc_nl_compat_bearer_set() is only called by tipc_nl_compat_link_set()
> > which already does the check for msg->req check, so remove it from
> > tipc_nl_compat_bearer_set(), and do the same in tipc_nl_compat_media_set().
> >
> > Signed-off-by: Xin Long 
>
> Is this really appropriate as a fix for 'net'?  Seems more like net-next 
> material
> to me.
kind of code fix, sure, you can apply it to net-next, no conflict.
do you need me to repost?


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net] tipc: check msg->req data len in tipc_nl_compat_bearer_disable

2019-06-24 Thread Xin Long
On Mon, Jun 24, 2019 at 4:33 PM Eric Dumazet  wrote:
>
>
>
> On 6/24/19 12:59 AM, Xin Long wrote:
> > This patch is to fix an uninit-value issue, reported by syzbot:
> >
> >   BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:981
> >   Call Trace:
> > __dump_stack lib/dump_stack.c:77 [inline]
> > dump_stack+0x191/0x1f0 lib/dump_stack.c:113
> > kmsan_report+0x130/0x2a0 mm/kmsan/kmsan.c:622
> > __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:310
> > memchr+0xce/0x110 lib/string.c:981
> > string_is_valid net/tipc/netlink_compat.c:176 [inline]
> > tipc_nl_compat_bearer_disable+0x2a1/0x480 net/tipc/netlink_compat.c:449
> > __tipc_nl_compat_doit net/tipc/netlink_compat.c:327 [inline]
> > tipc_nl_compat_doit+0x3ac/0xb00 net/tipc/netlink_compat.c:360
> > tipc_nl_compat_handle net/tipc/netlink_compat.c:1178 [inline]
> > tipc_nl_compat_recv+0x1b1b/0x27b0 net/tipc/netlink_compat.c:1281
> >
> > TLV_GET_DATA_LEN() may return a negtive int value, which will be
> > used as size_t (becoming a big unsigned long) passed into memchr,
> > cause this issue.
> >
> > Similar to what it does in tipc_nl_compat_bearer_enable(), this
> > fix is to return -EINVAL when TLV_GET_DATA_LEN() is negtive in
> > tipc_nl_compat_bearer_disable(), as well as in
> > tipc_nl_compat_link_stat_dump() and tipc_nl_compat_link_reset_stats().
> >
> > Reported-by: syzbot+30eaa8bf392f7faff...@syzkaller.appspotmail.com
> > Signed-off-by: Xin Long 
>
> Please add an appropriate Fixes: tag, thanks !
>
Fixes: 0762216c0ad2 ("tipc: fix uninit-value in tipc_nl_compat_bearer_enable")
Fixes: 8b66fee7f8ee (:tipc: fix uninit-value in
tipc_nl_compat_link_reset_stats")

Sorry, David, do I need to resend this one?


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCHv2 net] tipc: check msg->req data len in tipc_nl_compat_bearer_disable

2019-06-24 Thread Xin Long
This patch is to fix an uninit-value issue, reported by syzbot:

  BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:981
  Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x191/0x1f0 lib/dump_stack.c:113
kmsan_report+0x130/0x2a0 mm/kmsan/kmsan.c:622
__msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:310
memchr+0xce/0x110 lib/string.c:981
string_is_valid net/tipc/netlink_compat.c:176 [inline]
tipc_nl_compat_bearer_disable+0x2a1/0x480 net/tipc/netlink_compat.c:449
__tipc_nl_compat_doit net/tipc/netlink_compat.c:327 [inline]
tipc_nl_compat_doit+0x3ac/0xb00 net/tipc/netlink_compat.c:360
tipc_nl_compat_handle net/tipc/netlink_compat.c:1178 [inline]
tipc_nl_compat_recv+0x1b1b/0x27b0 net/tipc/netlink_compat.c:1281

TLV_GET_DATA_LEN() may return a negtive int value, which will be
used as size_t (becoming a big unsigned long) passed into memchr,
cause this issue.

Similar to what it does in tipc_nl_compat_bearer_enable(), this
fix is to return -EINVAL when TLV_GET_DATA_LEN() is negtive in
tipc_nl_compat_bearer_disable(), as well as in
tipc_nl_compat_link_stat_dump() and tipc_nl_compat_link_reset_stats().

v1->v2:
  - add the missing Fixes tags per Eric's request.

Fixes: 0762216c0ad2 ("tipc: fix uninit-value in tipc_nl_compat_bearer_enable")
Fixes: 8b66fee7f8ee ("tipc: fix uninit-value in 
tipc_nl_compat_link_reset_stats")
Reported-by: syzbot+30eaa8bf392f7faff...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
---
 net/tipc/netlink_compat.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index c6a04c0..cf15506 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -445,7 +445,11 @@ static int tipc_nl_compat_bearer_disable(struct 
tipc_nl_compat_cmd_doit *cmd,
if (!bearer)
return -EMSGSIZE;
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   if (len <= 0)
+   return -EINVAL;
+
+   len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
 
@@ -539,7 +543,11 @@ static int tipc_nl_compat_link_stat_dump(struct 
tipc_nl_compat_msg *msg,
 
name = (char *)TLV_DATA(msg->req);
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   if (len <= 0)
+   return -EINVAL;
+
+   len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
 
@@ -817,7 +825,11 @@ static int tipc_nl_compat_link_reset_stats(struct 
tipc_nl_compat_cmd_doit *cmd,
if (!link)
return -EMSGSIZE;
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   if (len <= 0)
+   return -EINVAL;
+
+   len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
 
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net-next] tipc: use rcu dereference functions properly

2019-07-03 Thread Xin Long
On Wed, Jul 3, 2019 at 6:08 AM David Miller  wrote:
>
> From: Xin Long 
> Date: Tue,  2 Jul 2019 00:54:55 +0800
>
> > For these places are protected by rcu_read_lock, we change from
> > rcu_dereference_rtnl to rcu_dereference, as there is no need to
> > check if rtnl lock is held.
> >
> > For these places are protected by rtnl_lock, we change from
> > rcu_dereference_rtnl to rtnl_dereference/rcu_dereference_protected,
> > as no extra memory barriers are needed under rtnl_lock() which also
> > protects tn->bearer_list[] and dev->tipc_ptr/b->media_ptr updating.
> >
> > rcu_dereference_rtnl will be only used in the places where it could
> > be under rcu_read_lock or rtnl_lock.
> >
> > Signed-off-by: Xin Long 
>
> In the cases where RTNL is held, even if rcu_read_lock() is also taken,
> we should use rtnl_dereference() because that avoids the READ_ONCE().
Right, that's what I did in this patch.

But for the places where it's sometimes called under rtnl_lock() only and
sometimes called under rcu_read_lock() only, like tipc_udp_is_known_peer()
and tipc_udp_rcast_add(), I kept rcu_dereference_rtnl(). makes sense?


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next] tipc: use rcu dereference functions properly

2019-07-01 Thread Xin Long
For these places are protected by rcu_read_lock, we change from
rcu_dereference_rtnl to rcu_dereference, as there is no need to
check if rtnl lock is held.

For these places are protected by rtnl_lock, we change from
rcu_dereference_rtnl to rtnl_dereference/rcu_dereference_protected,
as no extra memory barriers are needed under rtnl_lock() which also
protects tn->bearer_list[] and dev->tipc_ptr/b->media_ptr updating.

rcu_dereference_rtnl will be only used in the places where it could
be under rcu_read_lock or rtnl_lock.

Signed-off-by: Xin Long 
---
 net/tipc/bearer.c| 14 +++---
 net/tipc/udp_media.c |  8 
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2bed658..a809c0e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -62,7 +62,7 @@ static struct tipc_bearer *bearer_get(struct net *net, int 
bearer_id)
 {
struct tipc_net *tn = tipc_net(net);
 
-   return rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+   return rcu_dereference(tn->bearer_list[bearer_id]);
 }
 
 static void bearer_disable(struct net *net, struct tipc_bearer *b);
@@ -210,7 +210,7 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, 
u32 dest)
struct tipc_bearer *b;
 
rcu_read_lock();
-   b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+   b = rcu_dereference(tn->bearer_list[bearer_id]);
if (b)
tipc_disc_add_dest(b->disc);
rcu_read_unlock();
@@ -222,7 +222,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 
bearer_id, u32 dest)
struct tipc_bearer *b;
 
rcu_read_lock();
-   b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+   b = rcu_dereference(tn->bearer_list[bearer_id]);
if (b)
tipc_disc_remove_dest(b->disc);
rcu_read_unlock();
@@ -444,7 +444,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
struct net_device *dev;
int delta;
 
-   dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
+   dev = (struct net_device *)rcu_dereference(b->media_ptr);
if (!dev)
return 0;
 
@@ -481,7 +481,7 @@ int tipc_bearer_mtu(struct net *net, u32 bearer_id)
struct tipc_bearer *b;
 
rcu_read_lock();
-   b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]);
+   b = rcu_dereference(tipc_net(net)->bearer_list[bearer_id]);
if (b)
mtu = b->mtu;
rcu_read_unlock();
@@ -574,8 +574,8 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct 
net_device *dev,
struct tipc_bearer *b;
 
rcu_read_lock();
-   b = rcu_dereference_rtnl(dev->tipc_ptr) ?:
-   rcu_dereference_rtnl(orig_dev->tipc_ptr);
+   b = rcu_dereference(dev->tipc_ptr) ?:
+   rcu_dereference(orig_dev->tipc_ptr);
if (likely(b && test_bit(0, >up) &&
   (skb->pkt_type <= PACKET_MULTICAST))) {
skb_mark_not_on_list(skb);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index b8962df..62b85db 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -231,7 +231,7 @@ static int tipc_udp_send_msg(struct net *net, struct 
sk_buff *skb,
}
 
skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
-   ub = rcu_dereference_rtnl(b->media_ptr);
+   ub = rcu_dereference(b->media_ptr);
if (!ub) {
err = -ENODEV;
goto out;
@@ -490,7 +490,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct 
netlink_callback *cb)
}
}
 
-   ub = rcu_dereference_rtnl(b->media_ptr);
+   ub = rtnl_dereference(b->media_ptr);
if (!ub) {
rtnl_unlock();
return -EINVAL;
@@ -532,7 +532,7 @@ int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, 
struct tipc_bearer *b)
struct udp_bearer *ub;
struct nlattr *nest;
 
-   ub = rcu_dereference_rtnl(b->media_ptr);
+   ub = rtnl_dereference(b->media_ptr);
if (!ub)
return -ENODEV;
 
@@ -806,7 +806,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
 {
struct udp_bearer *ub;
 
-   ub = rcu_dereference_rtnl(b->media_ptr);
+   ub = rtnl_dereference(b->media_ptr);
if (!ub) {
pr_err("UDP bearer instance not found\n");
return;
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next] tipc: remove ub->ubsock checks

2019-07-01 Thread Xin Long
Both tipc_udp_enable and tipc_udp_disable are called under rtnl_lock,
ub->ubsock could never be NULL in tipc_udp_disable and cleanup_bearer,
so remove the check.

Also remove the one in tipc_udp_enable by adding "free" label.

Signed-off-by: Xin Long 
---
 net/tipc/udp_media.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 62b85db..287df687 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -759,7 +759,7 @@ static int tipc_udp_enable(struct net *net, struct 
tipc_bearer *b,
 
err = dst_cache_init(>rcast.dst_cache, GFP_ATOMIC);
if (err)
-   goto err;
+   goto free;
 
/**
 * The bcast media address port is used for all peers and the ip
@@ -771,13 +771,14 @@ static int tipc_udp_enable(struct net *net, struct 
tipc_bearer *b,
else
err = tipc_udp_rcast_add(b, );
if (err)
-   goto err;
+   goto free;
 
return 0;
-err:
+
+free:
dst_cache_destroy(>rcast.dst_cache);
-   if (ub->ubsock)
-   udp_tunnel_sock_release(ub->ubsock);
+   udp_tunnel_sock_release(ub->ubsock);
+err:
kfree(ub);
return err;
 }
@@ -795,8 +796,7 @@ static void cleanup_bearer(struct work_struct *work)
}
 
dst_cache_destroy(>rcast.dst_cache);
-   if (ub->ubsock)
-   udp_tunnel_sock_release(ub->ubsock);
+   udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
kfree(ub);
 }
@@ -811,8 +811,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
pr_err("UDP bearer instance not found\n");
return;
}
-   if (ub->ubsock)
-   sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
+   sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
RCU_INIT_POINTER(ub->bearer, NULL);
 
/* sock_release need to be done outside of rtnl lock */
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: add NULL pointer check before calling kfree_rcu

2019-09-03 Thread Xin Long
Unlike kfree(p), kfree_rcu(p, rcu) won't do NULL pointer check. When
tipc_nametbl_remove_publ returns NULL, the panic below happens:

   BUG: unable to handle kernel NULL pointer dereference at 0068
   RIP: 0010:__call_rcu+0x1d/0x290
   Call Trace:

tipc_publ_notify+0xa9/0x170 [tipc]
tipc_node_write_unlock+0x8d/0x100 [tipc]
tipc_node_link_down+0xae/0x1d0 [tipc]
tipc_node_check_dest+0x3ea/0x8f0 [tipc]
? tipc_disc_rcv+0x2c7/0x430 [tipc]
tipc_disc_rcv+0x2c7/0x430 [tipc]
? tipc_rcv+0x6bb/0xf20 [tipc]
tipc_rcv+0x6bb/0xf20 [tipc]
? ip_route_input_slow+0x9cf/0xb10
tipc_udp_recv+0x195/0x1e0 [tipc]
? tipc_udp_is_known_peer+0x80/0x80 [tipc]
udp_queue_rcv_skb+0x180/0x460
udp_unicast_rcv_skb.isra.56+0x75/0x90
__udp4_lib_rcv+0x4ce/0xb90
ip_local_deliver_finish+0x11c/0x210
ip_local_deliver+0x6b/0xe0
? ip_rcv_finish+0xa9/0x410
ip_rcv+0x273/0x362

Fixes: 97ede29e80ee ("tipc: convert name table read-write lock to RCU")
Reported-by: Li Shuang 
Signed-off-by: Xin Long 
---
 net/tipc/name_distr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 44abc8e..241ed22 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -223,7 +223,8 @@ static void tipc_publ_purge(struct net *net, struct 
publication *publ, u32 addr)
   publ->key);
}
 
-   kfree_rcu(p, rcu);
+   if (p)
+   kfree_rcu(p, rcu);
 }
 
 /**
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: purge deferredq list for each grp member in tipc_group_delete

2019-06-16 Thread Xin Long
Syzbot reported a memleak caused by grp members' deferredq list not
purged when the grp is be deleted.

The issue occurs when more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt) in
tipc_group_filter_msg() and the skb will stay in deferredq.

So fix it by calling __skb_queue_purge for each member's deferredq
in tipc_group_delete() when a tipc sk leaves the grp.

Fixes: b87a5ea31c93 ("tipc: guarantee group unicast doesn't bypass group 
broadcast")
Reported-by: syzbot+78fbe679c8ca8d264...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
---
 net/tipc/group.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/group.c b/net/tipc/group.c
index 992be61..5f98d38 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -218,6 +218,7 @@ void tipc_group_delete(struct net *net, struct tipc_group 
*grp)
 
rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, );
+   __skb_queue_purge(>deferredq);
list_del(>list);
kfree(m);
}
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net-next] tipc: use rcu dereference functions properly

2019-07-06 Thread Xin Long
On Wed, Jul 3, 2019 at 4:33 PM Xin Long  wrote:
>
> On Wed, Jul 3, 2019 at 6:08 AM David Miller  wrote:
> >
> > From: Xin Long 
> > Date: Tue,  2 Jul 2019 00:54:55 +0800
> >
> > > For these places are protected by rcu_read_lock, we change from
> > > rcu_dereference_rtnl to rcu_dereference, as there is no need to
> > > check if rtnl lock is held.
> > >
> > > For these places are protected by rtnl_lock, we change from
> > > rcu_dereference_rtnl to rtnl_dereference/rcu_dereference_protected,
> > > as no extra memory barriers are needed under rtnl_lock() which also
> > > protects tn->bearer_list[] and dev->tipc_ptr/b->media_ptr updating.
> > >
> > > rcu_dereference_rtnl will be only used in the places where it could
> > > be under rcu_read_lock or rtnl_lock.
> > >
> > > Signed-off-by: Xin Long 
> >
> > In the cases where RTNL is held, even if rcu_read_lock() is also taken,
> > we should use rtnl_dereference() because that avoids the READ_ONCE().
> Right, that's what I did in this patch.
>
> But for the places where it's sometimes called under rtnl_lock() only and
> sometimes called under rcu_read_lock() only, like tipc_udp_is_known_peer()
> and tipc_udp_rcast_add(), I kept rcu_dereference_rtnl(). makes sense?
Hi, David, I saw this patch in "Changes Requested".

I've checked all places with this patch, no function calling rcu_dereference()
and rcu_dereference_rtnl() will be ONLY called under rtnl_lock() protection.
So I can't see a problem with it.

Do I need to resend?


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH v2 3/3] tipc: fix issue of calling smp_processor_id() in preemptible

2020-02-20 Thread Xin Long
On Wed, Feb 19, 2020 at 4:34 PM Dmitry Vyukov  wrote:
>
> On Wed, Feb 19, 2020 at 9:29 AM Dmitry Vyukov  wrote:
> >
> > On Mon, Aug 12, 2019 at 9:44 AM Ying Xue  wrote:
> > >
> > > syzbot found the following issue:
> > >
> > > [   81.119772][ T8612] BUG: using smp_processor_id() in preemptible 
> > > [] code: syz-executor834/8612
> > > [   81.136212][ T8612] caller is dst_cache_get+0x3d/0xb0
> > > [   81.141450][ T8612] CPU: 0 PID: 8612 Comm: syz-executor834 Not tainted 
> > > 5.2.0-rc6+ #48
> > > [   81.149435][ T8612] Hardware name: Google Google Compute Engine/Google 
> > > Compute Engine, BIOS Google 01/01/2011
> > > [   81.159480][ T8612] Call Trace:
> > > [   81.162789][ T8612]  dump_stack+0x172/0x1f0
> > > [   81.167123][ T8612]  debug_smp_processor_id+0x251/0x280
> > > [   81.172479][ T8612]  dst_cache_get+0x3d/0xb0
> > > [   81.176928][ T8612]  tipc_udp_xmit.isra.0+0xc4/0xb80
> > > [   81.182046][ T8612]  ? kasan_kmalloc+0x9/0x10
> > > [   81.186531][ T8612]  ? tipc_udp_addr2str+0x170/0x170
> > > [   81.191641][ T8612]  ? __copy_skb_header+0x2e8/0x560
> > > [   81.196750][ T8612]  ? __skb_checksum_complete+0x3f0/0x3f0
> > > [   81.202364][ T8612]  ? netdev_alloc_frag+0x1b0/0x1b0
> > > [   81.207452][ T8612]  ? skb_copy_header+0x21/0x2b0
> > > [   81.212282][ T8612]  ? __pskb_copy_fclone+0x516/0xc90
> > > [   81.217470][ T8612]  tipc_udp_send_msg+0x29a/0x4b0
In tipc_bearer_xmit_skb(), b->media->send_msg()/tipc_udp_send_msg()
is called under rcu_read_lock(), which is already ensure it's a
non-preemptible context.

What I saw here is imbalance rcu_read_(un)lock() call somewhere.

> > > [   81.222400][ T8612]  tipc_bearer_xmit_skb+0x16c/0x360
> > > [   81.227585][ T8612]  tipc_enable_bearer+0xabe/0xd20
> > > [   81.232606][ T8612]  ? __nla_validate_parse+0x2d0/0x1ee0
> > > [   81.238048][ T8612]  ? tipc_bearer_xmit_skb+0x360/0x360
> > > [   81.243401][ T8612]  ? nla_memcpy+0xb0/0xb0
> > > [   81.247710][ T8612]  ? nla_memcpy+0xb0/0xb0
> > > [   81.252020][ T8612]  ? __nla_parse+0x43/0x60
> > > [   81.256417][ T8612]  __tipc_nl_bearer_enable+0x2de/0x3a0
> > > [   81.261856][ T8612]  ? __tipc_nl_bearer_enable+0x2de/0x3a0
> > > [   81.267467][ T8612]  ? tipc_nl_bearer_disable+0x40/0x40
> > > [   81.272848][ T8612]  ? unwind_get_return_address+0x58/0xa0
> > > [   81.278501][ T8612]  ? lock_acquire+0x16f/0x3f0
> > > [   81.283190][ T8612]  tipc_nl_bearer_enable+0x23/0x40
> > > [   81.288300][ T8612]  genl_family_rcv_msg+0x74b/0xf90
> > > [   81.293404][ T8612]  ? genl_unregister_family+0x790/0x790
> > > [   81.298935][ T8612]  ? __lock_acquire+0x54f/0x5490
> > > [   81.303852][ T8612]  ? __netlink_lookup+0x3fa/0x7b0
> > > [   81.308865][ T8612]  genl_rcv_msg+0xca/0x16c
> > > [   81.313266][ T8612]  netlink_rcv_skb+0x177/0x450
> > > [   81.318043][ T8612]  ? genl_family_rcv_msg+0xf90/0xf90
> > > [   81.323311][ T8612]  ? netlink_ack+0xb50/0xb50
> > > [   81.327906][ T8612]  ? lock_acquire+0x16f/0x3f0
> > > [   81.332589][ T8612]  ? kasan_check_write+0x14/0x20
> > > [   81.337511][ T8612]  genl_rcv+0x29/0x40
> > > [   81.341485][ T8612]  netlink_unicast+0x531/0x710
> > > [   81.346268][ T8612]  ? netlink_attachskb+0x770/0x770
> > > [   81.351374][ T8612]  ? _copy_from_iter_full+0x25d/0x8c0
> > > [   81.356765][ T8612]  ? __sanitizer_cov_trace_cmp8+0x18/0x20
> > > [   81.362479][ T8612]  ? __check_object_size+0x3d/0x42f
> > > [   81.367667][ T8612]  netlink_sendmsg+0x8ae/0xd70
> > > [   81.372415][ T8612]  ? netlink_unicast+0x710/0x710
> > > [   81.377520][ T8612]  ? aa_sock_msg_perm.isra.0+0xba/0x170
> > > [   81.383051][ T8612]  ? apparmor_socket_sendmsg+0x2a/0x30
> > > [   81.388530][ T8612]  ? __sanitizer_cov_trace_const_cmp4+0x16/0x20
> > > [   81.394775][ T8612]  ? security_socket_sendmsg+0x8d/0xc0
> > > [   81.400240][ T8612]  ? netlink_unicast+0x710/0x710
> > > [   81.405161][ T8612]  sock_sendmsg+0xd7/0x130
> > > [   81.409561][ T8612]  ___sys_sendmsg+0x803/0x920
> > > [   81.414220][ T8612]  ? copy_msghdr_from_user+0x430/0x430
> > > [   81.419667][ T8612]  ? _raw_spin_unlock_irqrestore+0x6b/0xe0
> > > [   81.425461][ T8612]  ? debug_object_active_state+0x25d/0x380
> > > [   81.431255][ T8612]  ? __lock_acquire+0x54f/0x5490
> > > [   81.436174][ T8612]  ? kasan_check_read+0x11/0x20
> > > [   81.441208][ T8612]  ? _raw_spin_unlock_irqrestore+0xa4/0xe0
> > > [   81.447008][ T8612]  ? mark_held_locks+0xf0/0xf0
> > > [   81.451768][ T8612]  ? __call_rcu.constprop.0+0x28b/0x720
> > > [   81.457298][ T8612]  ? call_rcu+0xb/0x10
> > > [   81.461353][ T8612]  ? __sanitizer_cov_trace_const_cmp4+0x16/0x20
> > > [   81.467589][ T8612]  ? __fget_light+0x1a9/0x230
> > > [   81.472249][ T8612]  ? __fdget+0x1b/0x20
> > > [   81.476301][ T8612]  ? __sanitizer_cov_trace_const_cmp8+0x18/0x20
> > > [   81.482545][ T8612]  __sys_sendmsg+0x105/0x1d0
> > > [   81.487115][ T8612]  ? __ia32_sys_shutdown+0x80/0x80
> > > [   81.492208][ T8612]  ? blkcg_maybe_throttle_current+0x5e2/0xfb0
> > > [   81.498272][ T8612]  

Re: [tipc-discussion] [PATCH net] tipc: block BH before using dst_cache

2020-05-22 Thread Xin Long
On Fri, May 22, 2020 at 1:55 PM Eric Dumazet  wrote:
>
> Resend to the list in non HTML form
>
>
> On Thu, May 21, 2020 at 10:53 PM Eric Dumazet  wrote:
> >
> >
> >
> > On Thu, May 21, 2020 at 10:50 PM Xin Long  wrote:
> >>
> >> On Fri, May 22, 2020 at 2:30 AM Eric Dumazet  wrote:
> >> >
> >> > dst_cache_get() documents it must be used with BH disabled.
> >> Interesting, I thought under rcu_read_lock() is enough, which calls
> >> preempt_disable().
> >
> >
> > rcu_read_lock() does not disable BH, never.
> >
> > And rcu_read_lock() does not necessarily disable preemption.
Then I need to think again if it's really worth using dst_cache here.

Also add tipc-discussion and Jon to CC list.

Thanks.

> >
> >
> >>
> >> have you checked other places where dst_cache_get() are used?
> >
> >
> >
> > Yes, other paths are fine.
> >
> >>
> >>
> >> >
> >> > sysbot reported :
> >> >
> >> > BUG: using smp_processor_id() in preemptible [] code: /21697
> >> > caller is dst_cache_get+0x3a/0xb0 net/core/dst_cache.c:68
> >> > CPU: 0 PID: 21697 Comm:  Not tainted 5.7.0-rc6-syzkaller #0
> >> > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
> >> > Google 01/01/2011
> >> > Call Trace:
> >> >  __dump_stack lib/dump_stack.c:77 [inline]
> >> >  dump_stack+0x188/0x20d lib/dump_stack.c:118
> >> >  check_preemption_disabled lib/smp_processor_id.c:47 [inline]
> >> >  debug_smp_processor_id.cold+0x88/0x9b lib/smp_processor_id.c:57
> >> >  dst_cache_get+0x3a/0xb0 net/core/dst_cache.c:68
> >> >  tipc_udp_xmit.isra.0+0xb9/0xad0 net/tipc/udp_media.c:164
> >> >  tipc_udp_send_msg+0x3e6/0x490 net/tipc/udp_media.c:244
> >> >  tipc_bearer_xmit_skb+0x1de/0x3f0 net/tipc/bearer.c:526
> >> >  tipc_enable_bearer+0xb2f/0xd60 net/tipc/bearer.c:331
> >> >  __tipc_nl_bearer_enable+0x2bf/0x390 net/tipc/bearer.c:995
> >> >  tipc_nl_bearer_enable+0x1e/0x30 net/tipc/bearer.c:1003
> >> >  genl_family_rcv_msg_doit net/netlink/genetlink.c:673 [inline]
> >> >  genl_family_rcv_msg net/netlink/genetlink.c:718 [inline]
> >> >  genl_rcv_msg+0x627/0xdf0 net/netlink/genetlink.c:735
> >> >  netlink_rcv_skb+0x15a/0x410 net/netlink/af_netlink.c:2469
> >> >  genl_rcv+0x24/0x40 net/netlink/genetlink.c:746
> >> >  netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
> >> >  netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
> >> >  netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
> >> >  sock_sendmsg_nosec net/socket.c:652 [inline]
> >> >  sock_sendmsg+0xcf/0x120 net/socket.c:672
> >> >  sys_sendmsg+0x6bf/0x7e0 net/socket.c:2362
> >> >  ___sys_sendmsg+0x100/0x170 net/socket.c:2416
> >> >  __sys_sendmsg+0xec/0x1b0 net/socket.c:2449
> >> >  do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
> >> >  entry_SYSCALL_64_after_hwframe+0x49/0xb3
> >> > RIP: 0033:0x45ca29
> >> >
> >> > Fixes: e9c1a793210f ("tipc: add dst_cache support for udp media")
> >> > Cc: Xin Long 
> >> > Cc: Jon Maloy 
> >> > Signed-off-by: Eric Dumazet 
> >> > Reported-by: syzbot 
> >> > ---
> >> >  net/tipc/udp_media.c | 6 +-
> >> >  1 file changed, 5 insertions(+), 1 deletion(-)
> >> >
> >> > diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
> >> > index 
> >> > d6620ad535461a4d04ed5ba90569ce8b7df9f994..28a283f26a8dff24d613e6ed57e5e69d894dae66
> >> >  100644
> >> > --- a/net/tipc/udp_media.c
> >> > +++ b/net/tipc/udp_media.c
> >> > @@ -161,9 +161,11 @@ static int tipc_udp_xmit(struct net *net, struct 
> >> > sk_buff *skb,
> >> >  struct udp_bearer *ub, struct udp_media_addr 
> >> > *src,
> >> >  struct udp_media_addr *dst, struct dst_cache 
> >> > *cache)
> >> >  {
> >> > -   struct dst_entry *ndst = dst_cache_get(cache);
> >> > +   struct dst_entry *ndst;
> >> > int ttl, err = 0;
> >> >
> >> > +   local_bh_disable();
> >> > +   ndst = dst_cache_get(cache);
> >> > if (dst->proto == htons(ETH_P_IP)) {
> >> > struct rtable *rt = (struct rtable *)ndst;
> >> >
> >> > @@ -210,9 +212,11 @@ static int tipc_udp_xmit(struct net *net, struct 
> >> > sk_buff *skb,
> >> >src->port, dst->port, false);
> >> >  #endif
> >> > }
> >> > +   local_bh_enable();
> >> > return err;
> >> >
> >> >  tx_error:
> >> > +   local_bh_enable();
> >> > kfree_skb(skb);
> >> > return err;
> >> >  }
> >> > --
> >> > 2.27.0.rc0.183.gde8f92d652-goog
> >> >


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: use skb_unshare() instead in tipc_buf_append()

2020-09-13 Thread Xin Long
In tipc_buf_append() it may change skb's frag_list, and it causes
problems when this skb is cloned. skb_unclone() doesn't really
make this skb's flag_list available to change.

Shuang Li has reported an use-after-free issue because of this
when creating quite a few macvlan dev over the same dev, where
the broadcast packets will be cloned and go up to the stack:

 [ ] BUG: KASAN: use-after-free in pskb_expand_head+0x86d/0xea0
 [ ] Call Trace:
 [ ]  dump_stack+0x7c/0xb0
 [ ]  print_address_description.constprop.7+0x1a/0x220
 [ ]  kasan_report.cold.10+0x37/0x7c
 [ ]  check_memory_region+0x183/0x1e0
 [ ]  pskb_expand_head+0x86d/0xea0
 [ ]  process_backlog+0x1df/0x660
 [ ]  net_rx_action+0x3b4/0xc90
 [ ]
 [ ] Allocated by task 1786:
 [ ]  kmem_cache_alloc+0xbf/0x220
 [ ]  skb_clone+0x10a/0x300
 [ ]  macvlan_broadcast+0x2f6/0x590 [macvlan]
 [ ]  macvlan_process_broadcast+0x37c/0x516 [macvlan]
 [ ]  process_one_work+0x66a/0x1060
 [ ]  worker_thread+0x87/0xb10
 [ ]
 [ ] Freed by task 3253:
 [ ]  kmem_cache_free+0x82/0x2a0
 [ ]  skb_release_data+0x2c3/0x6e0
 [ ]  kfree_skb+0x78/0x1d0
 [ ]  tipc_recvmsg+0x3be/0xa40 [tipc]

So fix it by using skb_unshare() instead, which would create a new
skb for the cloned frag and it'll be safe to change its frag_list.
The similar things were also done in sctp_make_reassembled_event(),
which is using skb_copy().

Reported-by: Shuang Li 
Fixes: 37e22164a8a3 ("tipc: rename and move message reassembly function")
Signed-off-by: Xin Long 
---
 net/tipc/msg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 848fae6..52e93ba 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct 
sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
-   if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
+   frag = skb_unshare(frag, GFP_ATOMIC);
+   if (unlikely(!frag))
goto err;
head = *headbuf = frag;
*buf = NULL;
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net] tipc: not enable tipc when ipv6 works as a module

2020-08-18 Thread Xin Long
On Tue, Aug 18, 2020 at 6:20 AM Cong Wang  wrote:
>
> On Mon, Aug 17, 2020 at 2:39 PM David Miller  wrote:
> >
> > From: Cong Wang 
> > Date: Mon, 17 Aug 2020 13:59:46 -0700
> >
> > > Is this a new Kconfig feature? ipv6_stub was introduced for
> > > VXLAN, at that time I don't remember we have such kind of
> > > Kconfig rules, otherwise it would not be needed.
> >
> > The ipv6_stub exists in order to allow the troublesome
> > "ipv6=m && feature_using_ipv6=y" combination.
For certain code, instead of IS_ENABLE(), use IS_REACHABLE().

>
> Hmm, so "IPV6=m && TIPC=y" is not a concern here as you pick
> this patch over adding a ipv6_stub?
>
This is more a question for TIPC users.

Hi, Jon and Ying,

Have you met any users having "IPV6=m && TIPC=y" in their kernels?


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] WARNING: suspicious RCU usage in tipc_l2_send_msg

2020-08-19 Thread Xin Long
On Sat, Jun 27, 2020 at 1:25 AM syzbot
 wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:b835a71e usbnet: smsc95xx: Fix use-after-free after removal
> git tree:   net
> console output: https://syzkaller.appspot.com/x/log.txt?x=1095a51d10
> kernel config:  https://syzkaller.appspot.com/x/.config?x=dcc6334acae363d4
> dashboard link: https://syzkaller.appspot.com/bug?extid=47bbc6b678d317cccbe0
> compiler:   gcc (GCC) 10.1.0-syz 20200507
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+47bbc6b678d317ccc...@syzkaller.appspotmail.com
>
> =
> WARNING: suspicious RCU usage
> 5.8.0-rc1-syzkaller #0 Not tainted
> -
> net/tipc/bearer.c:466 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
>
> rcu_scheduler_active = 2, debug_locks = 1
> 2 locks held by kworker/0:16/19143:
>  #0: 8880a6901d38 ((wq_completion)cryptd){+.+.}-{0:0}, at: 
> arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline]
>  #0: 8880a6901d38 ((wq_completion)cryptd){+.+.}-{0:0}, at: atomic64_set 
> include/asm-generic/atomic-instrumented.h:856 [inline]
>  #0: 8880a6901d38 ((wq_completion)cryptd){+.+.}-{0:0}, at: 
> atomic_long_set include/asm-generic/atomic-long.h:41 [inline]
>  #0: 8880a6901d38 ((wq_completion)cryptd){+.+.}-{0:0}, at: set_work_data 
> kernel/workqueue.c:616 [inline]
>  #0: 8880a6901d38 ((wq_completion)cryptd){+.+.}-{0:0}, at: 
> set_work_pool_and_clear_pending kernel/workqueue.c:643 [inline]
>  #0: 8880a6901d38 ((wq_completion)cryptd){+.+.}-{0:0}, at: 
> process_one_work+0x82b/0x1670 kernel/workqueue.c:2240
>  #1: c90006f9fda8 ((work_completion)(_queue->work)){+.+.}-{0:0}, at: 
> process_one_work+0x85f/0x1670 kernel/workqueue.c:2244
>
> stack backtrace:
> CPU: 0 PID: 19143 Comm: kworker/0:16 Not tainted 5.8.0-rc1-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
> Google 01/01/2011
> Workqueue: cryptd cryptd_queue_worker
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x18f/0x20d lib/dump_stack.c:118
>  tipc_l2_send_msg+0x354/0x420 net/tipc/bearer.c:466
>  tipc_aead_encrypt_done+0x204/0x3a0 net/tipc/crypto.c:761
>  cryptd_aead_crypt+0xe8/0x1d0 crypto/cryptd.c:739
>  cryptd_queue_worker+0x118/0x1b0 crypto/cryptd.c:181
>  process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
>  worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
>  kthread+0x3b5/0x4a0 kernel/kthread.c:291
>  ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293
>
Like in bearer.c, rcu_read_lock() is needed before calling
b->media->send_msg() in tipc_aead_encrypt_done():

@@ -757,10 +757,12 @@ static void tipc_aead_encrypt_done(struct
crypto_async_request *base, int err)
switch (err) {
case 0:
this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]);
+   rcu_read_lock();
if (likely(test_bit(0, >up)))
b->media->send_msg(net, skb, b, _ctx->dst);
else
kfree_skb(skb);
+   rcu_read_unlock();
break;
>
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkal...@googlegroups.com.
>
> syzbot will keep track of this bug report. See:
> https://goo.gl/tpsmEJ#status for how to communicate with syzbot.


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: call rcu_read_lock() in tipc_aead_encrypt_done()

2020-08-20 Thread Xin Long
b->media->send_msg() requires rcu_read_lock(), as we can see
elsewhere in tipc,  tipc_bearer_xmit, tipc_bearer_xmit_skb
and tipc_bearer_bc_xmit().

Syzbot has reported this issue as:

  net/tipc/bearer.c:466 suspicious rcu_dereference_check() usage!
  Workqueue: cryptd cryptd_queue_worker
  Call Trace:
   tipc_l2_send_msg+0x354/0x420 net/tipc/bearer.c:466
   tipc_aead_encrypt_done+0x204/0x3a0 net/tipc/crypto.c:761
   cryptd_aead_crypt+0xe8/0x1d0 crypto/cryptd.c:739
   cryptd_queue_worker+0x118/0x1b0 crypto/cryptd.c:181
   process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
   worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
   kthread+0x3b5/0x4a0 kernel/kthread.c:291
   ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293

So fix it by calling rcu_read_lock() in tipc_aead_encrypt_done()
for b->media->send_msg().

Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication")
Reported-by: syzbot+47bbc6b678d317ccc...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
---
 net/tipc/crypto.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 001bcb0..c38baba 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -757,10 +757,12 @@ static void tipc_aead_encrypt_done(struct 
crypto_async_request *base, int err)
switch (err) {
case 0:
this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]);
+   rcu_read_lock();
if (likely(test_bit(0, >up)))
b->media->send_msg(net, skb, b, _ctx->dst);
else
kfree_skb(skb);
+   rcu_read_unlock();
break;
case -EINPROGRESS:
return;
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] ipv6: some fixes for ipv6_dev_find()

2020-08-17 Thread Xin Long
This patch is to do 3 things for ipv6_dev_find():

  As David A. noticed,

  - rt6_lookup() is not really needed. Different from __ip_dev_find(),
ipv6_dev_find() doesn't have a compatibility problem, so remove it.

  As Hideaki suggested,

  - "valid" (non-tentative) check for the address is also needed.
ipv6_chk_addr() calls ipv6_chk_addr_and_flags(), which will
traverse the address hash list, but it's heavy to be called
inside ipv6_dev_find(). This patch is to reuse the code of
ipv6_chk_addr_and_flags() for ipv6_dev_find().

  - dev parameter is passed into ipv6_dev_find(), as link-local
addresses from user space has sin6_scope_id set and the dev
lookup needs it.

Fixes: 81f6cb31222d ("ipv6: add ipv6_dev_find()")
Suggested-by: YOSHIFUJI Hideaki 
Reported-by: David Ahern 
Signed-off-by: Xin Long 
---
 include/net/addrconf.h |  3 ++-
 net/ipv6/addrconf.c| 60 +++---
 net/tipc/udp_media.c   |  8 +++
 3 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index ba3f6c15..18f783d 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -97,7 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
 
 int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);
 
-struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr);
+struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr,
+struct net_device *dev);
 
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
 const struct in6_addr *addr,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8e761b8..01146b6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1893,12 +1893,13 @@ EXPORT_SYMBOL(ipv6_chk_addr);
  *   2. does the address exist on the specific device
  *  (skip_dev_check = false)
  */
-int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-   const struct net_device *dev, bool skip_dev_check,
-   int strict, u32 banned_flags)
+static struct net_device *
+__ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
+ const struct net_device *dev, bool skip_dev_check,
+ int strict, u32 banned_flags)
 {
unsigned int hash = inet6_addr_hash(net, addr);
-   const struct net_device *l3mdev;
+   struct net_device *l3mdev, *ndev;
struct inet6_ifaddr *ifp;
u32 ifp_flags;
 
@@ -1909,10 +1910,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const 
struct in6_addr *addr,
dev = NULL;
 
hlist_for_each_entry_rcu(ifp, _addr_lst[hash], addr_lst) {
-   if (!net_eq(dev_net(ifp->idev->dev), net))
+   ndev = ifp->idev->dev;
+   if (!net_eq(dev_net(ndev), net))
continue;
 
-   if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+   if (l3mdev_master_dev_rcu(ndev) != l3mdev)
continue;
 
/* Decouple optimistic from tentative for evaluation here.
@@ -1923,15 +1925,23 @@ int ipv6_chk_addr_and_flags(struct net *net, const 
struct in6_addr *addr,
: ifp->flags;
if (ipv6_addr_equal(>addr, addr) &&
!(ifp_flags_flags) &&
-   (!dev || ifp->idev->dev == dev ||
+   (!dev || ndev == dev ||
 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
rcu_read_unlock();
-   return 1;
+   return ndev;
}
}
 
rcu_read_unlock();
-   return 0;
+   return NULL;
+}
+
+int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
+   const struct net_device *dev, bool skip_dev_check,
+   int strict, u32 banned_flags)
+{
+   return __ipv6_chk_addr_and_flags(net, addr, dev, skip_dev_check,
+strict, banned_flags) ? 1 : 0;
 }
 EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
 
@@ -1990,35 +2000,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
  *
  * The caller should be protected by RCU, or RTNL.
  */
-struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr)
+struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr,
+struct net_device *dev)
 {
-   unsigned int hash = inet6_addr_hash(net, addr);
-   struct inet6_ifaddr *ifp, *result = NULL;
-   struct net_device *dev = NULL;
-
-   rcu_read_lock();
-   hlist_for_each_entry_rcu(ifp, _addr_lst[hash], addr_lst) {
-   if (net_eq(dev_net(ifp->idev->dev), net) &&
-

Re: [tipc-discussion] [PATCH net] tipc: not enable tipc when ipv6 works as a module

2020-08-17 Thread Xin Long
On Mon, Aug 17, 2020 at 2:29 AM Cong Wang  wrote:
>
> On Sun, Aug 16, 2020 at 4:54 AM Xin Long  wrote:
> >
> > When using ipv6_dev_find() in one module, it requires ipv6 not to
> > work as a module. Otherwise, this error occurs in build:
> >
> >   undefined reference to `ipv6_dev_find'.
> >
> > So fix it by adding "depends on IPV6 || IPV6=n" to tipc/Kconfig,
> > as it does in sctp/Kconfig.
>
> Or put it into struct ipv6_stub?
Hi Cong,

That could be one way. We may do it when this new function becomes more common.
By now, I think it's okay to make TIPC depend on IPV6 || IPV6=n.

Thanks.


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] KASAN: use-after-free Read in tipc_mcast_xmit (2)

2020-10-03 Thread Xin Long
On Fri, Oct 2, 2020 at 11:38 PM syzbot
 wrote:
>
> Hello,
>
> syzbot found the following issue on:
>
> HEAD commit:a59cf619 Merge branch 'Fix-bugs-in-Octeontx2-netdev-driver'
> git tree:   bpf
> console output: https://syzkaller.appspot.com/x/log.txt?x=163c246790
> kernel config:  https://syzkaller.appspot.com/x/.config?x=99a7c78965c75e07
> dashboard link: https://syzkaller.appspot.com/bug?extid=e96a7ba46281824cc46a
> compiler:   gcc (GCC) 10.1.0-syz 20200507
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=15ada44d90
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1400746790
>
> The issue was bisected to:
>
> commit ff48b6222e65ebdba5a403ef1deba6214e749193
> Author: Xin Long 
> Date:   Sun Sep 13 11:37:31 2020 +
>
> tipc: use skb_unshare() instead in tipc_buf_append()
>
> bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=125402b390
> final oops: https://syzkaller.appspot.com/x/report.txt?x=115402b390
> console output: https://syzkaller.appspot.com/x/log.txt?x=165402b390
>
> IMPORTANT: if you fix the issue, please add the following tag to the commit:
> Reported-by: syzbot+e96a7ba46281824cc...@syzkaller.appspotmail.com
> Fixes: ff48b6222e65 ("tipc: use skb_unshare() instead in tipc_buf_append()")
>
> R10:  R11: 0246 R12: 004028a0
> R13: 00402930 R14:  R15: 
> tipc: Failed do clone local mcast rcv buffer
> ==
> BUG: KASAN: use-after-free in __skb_unlink include/linux/skbuff.h:2063 
> [inline]
> BUG: KASAN: use-after-free in __skb_dequeue include/linux/skbuff.h:2082 
> [inline]
> BUG: KASAN: use-after-free in __skb_queue_purge include/linux/skbuff.h:2793 
> [inline]
> BUG: KASAN: use-after-free in tipc_mcast_xmit+0xfaa/0x1170 
> net/tipc/bcast.c:422
> Read of size 8 at addr 8880a73e2040 by task syz-executor657/6887
>
> CPU: 1 PID: 6887 Comm: syz-executor657 Not tainted 5.9.0-rc6-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x198/0x1fd lib/dump_stack.c:118
>  print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383
>  __kasan_report mm/kasan/report.c:513 [inline]
>  kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
>  __skb_unlink include/linux/skbuff.h:2063 [inline]
>  __skb_dequeue include/linux/skbuff.h:2082 [inline]
>  __skb_queue_purge include/linux/skbuff.h:2793 [inline]
>  tipc_mcast_xmit+0xfaa/0x1170 net/tipc/bcast.c:422
>  tipc_sendmcast+0xaaf/0xef0 net/tipc/socket.c:865
>  __tipc_sendmsg+0xee3/0x18a0 net/tipc/socket.c:1454
>  tipc_sendmsg+0x4c/0x70 net/tipc/socket.c:1387
>  sock_sendmsg_nosec net/socket.c:651 [inline]
>  sock_sendmsg+0xcf/0x120 net/socket.c:671
>  sys_sendmsg+0x6e8/0x810 net/socket.c:2353
>  ___sys_sendmsg+0xf3/0x170 net/socket.c:2407
>  __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440
>  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
>  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> RIP: 0033:0x4419d9
> Code: e8 cc ac 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 
> 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 
> 83 3b 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:7ffe0cace4c8 EFLAGS: 0246 ORIG_RAX: 002e
> RAX: ffda RBX:  RCX: 004419d9
> RDX:  RSI: 2280 RDI: 0004
> RBP: f0ee R08: 0001 R09: 00402930
> R10:  R11: 0246 R12: 004028a0
> R13: 00402930 R14:  R15: 
>
> Allocated by task 6887:
>  kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
>  kasan_set_track mm/kasan/common.c:56 [inline]
>  __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461
>  slab_post_alloc_hook mm/slab.h:518 [inline]
>  slab_alloc_node mm/slab.c:3254 [inline]
>  kmem_cache_alloc_node+0x136/0x430 mm/slab.c:3574
>  __alloc_skb+0x71/0x550 net/core/skbuff.c:198
>  alloc_skb_fclone include/linux/skbuff.h:1144 [inline]
>  tipc_buf_acquire+0x28/0xf0 net/tipc/msg.c:76
>  tipc_msg_build+0x6b8/0x10c0 net/tipc/msg.c:428
>  tipc_sendmcast+0x855/0xef0 net/tipc/socket.c:859
>  __tipc_sendmsg+0xee3/0x18a0 net/tipc/socket.c:1454
>  tipc_sendmsg+0x4c/0x70 net/tipc/socket.c:1387
>  sock_sendmsg_nosec net/socket.c:651 [inline]
>  sock_sendmsg+0xcf/0x120 net/socket.c:671
>  sys_sendmsg+0x6e8/0x810 net/socket.c:2353
>  ___sys_sendmsg+0xf3/0x170 net/socket.c:2407
>  __sys_sendmsg+

Re: [tipc-discussion] [net v3 1/1] tipc: fix memory leak caused by tipc_buf_append()

2020-10-27 Thread Xin Long
On Tue, Oct 27, 2020 at 11:25 AM Tung Nguyen
 wrote:
>
> Commit ed42989eab57 ("tipc: fix the skb_unshare() in tipc_buf_append()")
> replaced skb_unshare() with skb_copy() to not reduce the data reference
> counter of the original skb intentionally. This is not the correct
> way to handle the cloned skb because it causes memory leak in 2
> following cases:
>  1/ Sending multicast messages via broadcast link
>   The original skb list is cloned to the local skb list for local
>   destination. After that, the data reference counter of each skb
>   in the original list has the value of 2. This causes each skb not
>   to be freed after receiving ACK:
>   tipc_link_advance_transmq()
>   {
>...
>/* release skb */
>__skb_unlink(skb, >transmq);
>kfree_skb(skb); <-- memory exists after being freed
>   }
>
>  2/ Sending multicast messages via replicast link
>   Similar to the above case, each skb cannot be freed after purging
>   the skb list:
>   tipc_mcast_xmit()
>   {
>...
>__skb_queue_purge(pkts); <-- memory exists after being freed
>   }
>
> This commit fixes this issue by using skb_unshare() instead. Besides,
> to avoid use-after-free error reported by KASAN, the pointer to the
> fragment is set to NULL before calling skb_unshare() to make sure that
> the original skb is not freed after freeing the fragment 2 times in
> case skb_unshare() returns NULL.
>
> Fixes: ed42989eab57 ("tipc: fix the skb_unshare() in tipc_buf_append()")
> Acked-by: Jon Maloy 
> Reported-by: Thang Hoang Ngo 
> Signed-off-by: Tung Nguyen 
Reviewed-by: Xin Long 

> ---
>  net/tipc/msg.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/net/tipc/msg.c b/net/tipc/msg.c
> index 2a78aa701572..32c79c59052b 100644
> --- a/net/tipc/msg.c
> +++ b/net/tipc/msg.c
> @@ -150,12 +150,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct 
> sk_buff **buf)
> if (fragid == FIRST_FRAGMENT) {
> if (unlikely(head))
> goto err;
> -   if (skb_cloned(frag))
> -   frag = skb_copy(frag, GFP_ATOMIC);
> +   *buf = NULL;
> +   frag = skb_unshare(frag, GFP_ATOMIC);
> if (unlikely(!frag))
> goto err;
> head = *headbuf = frag;
> -   *buf = NULL;
> TIPC_SKB_CB(head)->tail = NULL;
> if (skb_is_nonlinear(head)) {
> skb_walk_frags(head, tail) {
> --
> 2.17.1
>
>
>
> ___
> tipc-discussion mailing list
> tipc-discussion@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net v1 1/1] tipc: fix memory leak caused by tipc_buf_append()

2020-10-26 Thread Xin Long
On Mon, Oct 26, 2020 at 5:30 PM Tung Quang Nguyen
 wrote:
>
> Hi Xin,
>
> Yes, I know that it should not be a problem if skb_free(NULL) is called.
> But I relied on your analysis for syzbot report:
> "
> in tipc_msg_reassemble():
>
> if ((, ))
> break;
> if (!head)
> goto error; <--- [1]
> }
> __skb_queue_tail(rcvq, frag);
> return true;
> error:
> pr_warn("Failed do clone local mcast rcv buffer\n");
> kfree_skb(head); <---[2]
> return false;
>
> when head is NULL at [1], it goes [2] and could cause a crash.
> from the log, we can see "Failed do clone local mcast rcv buffer" as well.
> "
>
> I will check again your new analysis and create the correct patch.
Sorry, I realized it was a false one after double-checking.

>
> Thanks.
> Tung Nguyen
>
> -Original Message-
> From: Xin Long 
> Sent: Monday, October 26, 2020 4:10 PM
> To: Tung Quang Nguyen 
> Cc: tipc-discussion@lists.sourceforge.net; Jon Maloy ; 
> ma...@donjonn.com; Ying Xue ; Cong Wang 
> 
> Subject: Re: [tipc-discussion] [net v1 1/1] tipc: fix memory leak caused by 
> tipc_buf_append()
>
> On Fri, Oct 23, 2020 at 4:20 PM Tung Nguyen
>  wrote:
> >
> > Commit ed42989eab57 ("fix the skb_unshare() in tipc_buf_append()")
> > replaced skb_unshare() with skb_copy() to not reduce the data reference
> > counter of the original skb intentionally. This is not the correct
> > way to handle the cloned skb because it causes memory leak in 2
> > following cases:
> >  1/ Sending multicast messages via broadcast link
> >   The original skb list is cloned to the local skb list for local
> >   destination. After that, the data reference counter of each skb
> >   in the original list has the value of 2. This causes each skb not
> >   to be freed after receiving ACK:
> >   tipc_link_advance_transmq()
> >   {
> >...
> >/* release skb */
> >__skb_unlink(skb, >transmq);
> >kfree_skb(skb); <-- memory exists after being freed
> >   }
> >
> >  2/ Sending multicast messages via replicast link
> >   Similar to the above case, each skb cannot be freed after purging
> >   the skb list:
> >   tipc_mcast_xmit()
> >   {
> >...
> >__skb_queue_purge(pkts); <-- memory exists after being freed
> >   }
> >
> > This commit fixes this issue by using skb_unshare() instead. Besides,
> > to avoid use-after-free error reported by KASAN, kfree_skb(head) in
> > tipc_buf_append() is called only if the pointer to the appending skb
> > is not NULL.
> >
> > v2: improve condition for freeing the appending skb to cover all error
> > cases.
> >
> > Fixes: ed42989eab57 ("fix the skb_unshare() in tipc_buf_append()")
> > Reported-by: Thang Hoang Ngo 
> > Signed-off-by: Tung Nguyen 
> > ---
> >  net/tipc/msg.c | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/net/tipc/msg.c b/net/tipc/msg.c
> > index 2a78aa701572..46c36c5093de 100644
> > --- a/net/tipc/msg.c
> > +++ b/net/tipc/msg.c
> > @@ -150,8 +150,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct 
> > sk_buff **buf)
> > if (fragid == FIRST_FRAGMENT) {
> > if (unlikely(head))
> > goto err;
> > -   if (skb_cloned(frag))
> > -   frag = skb_copy(frag, GFP_ATOMIC);
> > +   frag = skb_unshare(frag, GFP_ATOMIC);
> > if (unlikely(!frag))
> > goto err;
> > head = *headbuf = frag;
> > @@ -797,7 +796,8 @@ bool tipc_msg_reassemble(struct sk_buff_head *list, 
> > struct sk_buff_head *rcvq)
> > return true;
> >  error:
> > pr_warn("Failed do clone local mcast rcv buffer\n");
> > -   kfree_skb(head);
> > +   if (head)
> > +   kfree_skb(head);
> Hi Tung,
>
> kfree_skb(NULL) won't cause any use-after-free issue, as kfree_skb(skb)
> will return when skb is NULL.
>
> The root cause of use-after-free is as Cong fixed in
> commit ed42989eab57 ("fix the skb_unshare() in tipc_buf_append()"):
>
> When skb_unshare() returns NULL, the 'frag' is freed, and on the err
> path the 'buf'(==the 'frag') get freed again, then the original skb
> is freed.
>
> But that commit indeed caused the memleak on the success path, and
> the right fix should 

Re: [tipc-discussion] [net v1 1/1] tipc: fix memory leak caused by tipc_buf_append()

2020-10-26 Thread Xin Long
On Fri, Oct 23, 2020 at 4:20 PM Tung Nguyen
 wrote:
>
> Commit ed42989eab57 ("fix the skb_unshare() in tipc_buf_append()")
> replaced skb_unshare() with skb_copy() to not reduce the data reference
> counter of the original skb intentionally. This is not the correct
> way to handle the cloned skb because it causes memory leak in 2
> following cases:
>  1/ Sending multicast messages via broadcast link
>   The original skb list is cloned to the local skb list for local
>   destination. After that, the data reference counter of each skb
>   in the original list has the value of 2. This causes each skb not
>   to be freed after receiving ACK:
>   tipc_link_advance_transmq()
>   {
>...
>/* release skb */
>__skb_unlink(skb, >transmq);
>kfree_skb(skb); <-- memory exists after being freed
>   }
>
>  2/ Sending multicast messages via replicast link
>   Similar to the above case, each skb cannot be freed after purging
>   the skb list:
>   tipc_mcast_xmit()
>   {
>...
>__skb_queue_purge(pkts); <-- memory exists after being freed
>   }
>
> This commit fixes this issue by using skb_unshare() instead. Besides,
> to avoid use-after-free error reported by KASAN, kfree_skb(head) in
> tipc_buf_append() is called only if the pointer to the appending skb
> is not NULL.
>
> v2: improve condition for freeing the appending skb to cover all error
> cases.
>
> Fixes: ed42989eab57 ("fix the skb_unshare() in tipc_buf_append()")
> Reported-by: Thang Hoang Ngo 
> Signed-off-by: Tung Nguyen 
> ---
>  net/tipc/msg.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/net/tipc/msg.c b/net/tipc/msg.c
> index 2a78aa701572..46c36c5093de 100644
> --- a/net/tipc/msg.c
> +++ b/net/tipc/msg.c
> @@ -150,8 +150,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct 
> sk_buff **buf)
> if (fragid == FIRST_FRAGMENT) {
> if (unlikely(head))
> goto err;
> -   if (skb_cloned(frag))
> -   frag = skb_copy(frag, GFP_ATOMIC);
> +   frag = skb_unshare(frag, GFP_ATOMIC);
> if (unlikely(!frag))
> goto err;
> head = *headbuf = frag;
> @@ -797,7 +796,8 @@ bool tipc_msg_reassemble(struct sk_buff_head *list, 
> struct sk_buff_head *rcvq)
> return true;
>  error:
> pr_warn("Failed do clone local mcast rcv buffer\n");
> -   kfree_skb(head);
> +   if (head)
> +   kfree_skb(head);
Hi Tung,

kfree_skb(NULL) won't cause any use-after-free issue, as kfree_skb(skb)
will return when skb is NULL.

The root cause of use-after-free is as Cong fixed in
commit ed42989eab57 ("fix the skb_unshare() in tipc_buf_append()"):

When skb_unshare() returns NULL, the 'frag' is freed, and on the err
path the 'buf'(==the 'frag') get freed again, then the original skb
is freed.

But that commit indeed caused the memleak on the success path, and
the right fix should be:

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 2a78aa7..73068fb 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -155,6 +155,7 @@ int tipc_buf_append(struct sk_buff **headbuf,
struct sk_buff **buf)
if (unlikely(!frag))
goto err;
head = *headbuf = frag;
+   kfree_skb(*buf)
*buf = NULL;
TIPC_SKB_CB(head)->tail = NULL;
if (skb_is_nonlinear(head)) {

or:

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 2a78aa7..32c79c5 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -150,12 +150,11 @@ int tipc_buf_append(struct sk_buff **headbuf,
struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
-   if (skb_cloned(frag))
-   frag = skb_copy(frag, GFP_ATOMIC);
+   *buf = NULL;
+   frag = skb_unshare(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
head = *headbuf = frag;
-   *buf = NULL;
TIPC_SKB_CB(head)->tail = NULL;
if (skb_is_nonlinear(head)) {
skb_walk_frags(head, tail) {

Thanks.

> return false;
>  }
>
> --
> 2.17.1
>
>
>
> ___
> tipc-discussion mailing list
> tipc-discussion@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net 0/2] net: fix a mcast issue for tipc udp media

2020-08-03 Thread Xin Long
Patch 1 is to add a function to get the dev by source address,
whcih will be used by Patch 2.

Xin Long (2):
  ipv6: add ipv6_dev_find()
  tipc: set ub->ifindex for local ipv6 address

 include/net/addrconf.h |  2 ++
 net/ipv6/addrconf.c| 39 +++
 net/tipc/udp_media.c   |  8 
 3 files changed, 49 insertions(+)

-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net 2/2] tipc: set ub->ifindex for local ipv6 address

2020-08-03 Thread Xin Long
Without ub->ifindex set for ipv6 address in tipc_udp_enable(),
ipv6_sock_mc_join() may make the wrong dev join the multicast
address in enable_mcast(). This causes that tipc links would
never be created.

So fix it by getting the right netdev and setting ub->ifindex,
as it does for ipv4 address.

Reported-by: Shuang Li 
Signed-off-by: Xin Long 
---
 net/tipc/udp_media.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 28a283f..9dec596 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -738,6 +738,13 @@ static int tipc_udp_enable(struct net *net, struct 
tipc_bearer *b,
b->mtu = b->media->mtu;
 #if IS_ENABLED(CONFIG_IPV6)
} else if (local.proto == htons(ETH_P_IPV6)) {
+   struct net_device *dev;
+
+   dev = ipv6_dev_find(net, );
+   if (!dev) {
+   err = -ENODEV;
+   goto err;
+   }
udp_conf.family = AF_INET6;
udp_conf.use_udp6_tx_checksums = true;
udp_conf.use_udp6_rx_checksums = true;
@@ -745,6 +752,7 @@ static int tipc_udp_enable(struct net *net, struct 
tipc_bearer *b,
udp_conf.local_ip6 = in6addr_any;
else
udp_conf.local_ip6 = local.ipv6;
+   ub->ifindex = dev->ifindex;
b->mtu = 1280;
 #endif
} else {
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net 1/2] ipv6: add ipv6_dev_find()

2020-08-03 Thread Xin Long
This is to add an ip_dev_find like function for ipv6, used to find
the dev by saddr.

It will be used by TIPC protocol. So also export it.

Signed-off-by: Xin Long 
---
 include/net/addrconf.h |  2 ++
 net/ipv6/addrconf.c| 39 +++
 2 files changed, 41 insertions(+)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 8418b7d..ba3f6c15 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -97,6 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
 
 int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);
 
+struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr);
+
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
 const struct in6_addr *addr,
 struct net_device *dev, int strict);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 840bfdb..857d6f9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1983,6 +1983,45 @@ int ipv6_chk_prefix(const struct in6_addr *addr, struct 
net_device *dev)
 }
 EXPORT_SYMBOL(ipv6_chk_prefix);
 
+/**
+ * ipv6_dev_find - find the first device with a given source address.
+ * @net: the net namespace
+ * @addr: the source address
+ *
+ * The caller should be protected by RCU, or RTNL.
+ */
+struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr)
+{
+   unsigned int hash = inet6_addr_hash(net, addr);
+   struct inet6_ifaddr *ifp, *result = NULL;
+   struct net_device *dev = NULL;
+
+   rcu_read_lock();
+   hlist_for_each_entry_rcu(ifp, _addr_lst[hash], addr_lst) {
+   if (net_eq(dev_net(ifp->idev->dev), net) &&
+   ipv6_addr_equal(>addr, addr)) {
+   result = ifp;
+   break;
+   }
+   }
+
+   if (!result) {
+   struct rt6_info *rt;
+
+   rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
+   if (rt) {
+   dev = rt->dst.dev;
+   ip6_rt_put(rt);
+   }
+   } else {
+   dev = result->idev->dev;
+   }
+   rcu_read_unlock();
+
+   return dev;
+}
+EXPORT_SYMBOL(ipv6_dev_find);
+
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr 
*addr,
 struct net_device *dev, int strict)
 {
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net 1/2] ipv6: add ipv6_dev_find()

2020-08-07 Thread Xin Long
On Thu, Aug 6, 2020 at 10:03 PM David Ahern  wrote:
>
> On 8/6/20 2:55 AM, Xin Long wrote:
> > On Thu, Aug 6, 2020 at 10:50 AM Hideaki Yoshifuji
> >  wrote:
> >>
> >> Hi,
> >>
> >> 2020幓8꜈4ę—„(ē«) 0:35 Xin Long :
> >>>
> >>> This is to add an ip_dev_find like function for ipv6, used to find
> >>> the dev by saddr.
> >>>
> >>> It will be used by TIPC protocol. So also export it.
> >>>
> >>> Signed-off-by: Xin Long 
> >>> ---
> >>>  include/net/addrconf.h |  2 ++
> >>>  net/ipv6/addrconf.c| 39 +++
> >>>  2 files changed, 41 insertions(+)
> >>>
> >>> diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> >>> index 8418b7d..ba3f6c15 100644
> >>> --- a/include/net/addrconf.h
> >>> +++ b/include/net/addrconf.h
> >>> @@ -97,6 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
> >>>
> >>>  int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);
> >>>
> >>> +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr 
> >>> *addr);
> >>> +
> >>
> >> How do we handle link-local addresses?
> > This is what "if (!result)" branch meant to do:
> >
> > +   if (!result) {
> > +   struct rt6_info *rt;
> > +
> > +   rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
> > +   if (rt) {
> > +   dev = rt->dst.dev;
> > +   ip6_rt_put(rt);
> > +   }
> > +   } else {
> > +   dev = result->idev->dev;
> > +   }
> >
>
> the stated purpose of this function is to find the netdevice to which an
> address is attached. A route lookup should not be needed. Walking the
> address hash list finds the address and hence the netdev or it does not.
Hi, David,
Sorry. it does. I misunderstood the code in __ip_dev_find().
I will delete the rt6_lookup() part from ipv6_dev_find().

Also for the compatibility, tipc part should change to:
@@ -741,10 +741,8 @@ static int tipc_udp_enable(struct net *net,
struct tipc_bearer *b,
struct net_device *dev;

   dev = ipv6_dev_find(net, );
   if (!dev)
  ub->ifindex = dev->ifindex;

as when dev is not found from the hash list, it should fall back to
the old tipc code.

Ying, what do you think?


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net 1/2] ipv6: add ipv6_dev_find()

2020-08-09 Thread Xin Long
On Fri, Aug 7, 2020 at 5:26 PM Hideaki Yoshifuji
 wrote:
>
> Hi,
>
> 2020幓8꜈6ę—„(ęœØ) 23:03 David Ahern :
> >
> > On 8/6/20 2:55 AM, Xin Long wrote:
> > > On Thu, Aug 6, 2020 at 10:50 AM Hideaki Yoshifuji
> > >  wrote:
> > >>
> > >> Hi,
> > >>
> > >> 2020幓8꜈4ę—„(ē«) 0:35 Xin Long :
> > >>>
> > >>> This is to add an ip_dev_find like function for ipv6, used to find
> > >>> the dev by saddr.
> > >>>
> > >>> It will be used by TIPC protocol. So also export it.
> > >>>
> > >>> Signed-off-by: Xin Long 
> > >>> ---
> > >>>  include/net/addrconf.h |  2 ++
> > >>>  net/ipv6/addrconf.c| 39 +++
> > >>>  2 files changed, 41 insertions(+)
> > >>>
> > >>> diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> > >>> index 8418b7d..ba3f6c15 100644
> > >>> --- a/include/net/addrconf.h
> > >>> +++ b/include/net/addrconf.h
> > >>> @@ -97,6 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr 
> > >>> *addr,
> > >>>
> > >>>  int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device 
> > >>> *dev);
> > >>>
> > >>> +struct net_device *ipv6_dev_find(struct net *net, const struct 
> > >>> in6_addr *addr);
> > >>> +
> > >>
> > >> How do we handle link-local addresses?
> > > This is what "if (!result)" branch meant to do:
> > >
> > > +   if (!result) {
> > > +   struct rt6_info *rt;
> > > +
> > > +   rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
> > > +   if (rt) {
> > > +   dev = rt->dst.dev;
> > > +   ip6_rt_put(rt);
> > > +   }
> > > +   } else {
> > > +   dev = result->idev->dev;
> > > +   }
> > >
> >
> > the stated purpose of this function is to find the netdevice to which an
> > address is attached. A route lookup should not be needed. Walking the
> > address hash list finds the address and hence the netdev or it does not.
> >
> >
>
> User supplied scope id which should be set for link-local addresses
> in TIPC_NLA_UDP_LOCAL attribute must be honored when we
> check the address.
Hi, Hideaki san,

Sorry for not understanding your comment earlier.

The bad thing is tipc in iproute2 doesn't seem able to set scope_id.
I saw many places in kernel doing this check:

 if (__ipv6_addr_needs_scope_id(atype) &&
 !ip6->sin6_scope_id) { return -EINVAL; }

Can I ask why scope id is needed for link-local addresses?
and is that for link-local addresses only?

>
> ipv6_chk_addr() can check if the address and supplied ifindex is a valid
> local address.  Or introduce an extra ifindex argument to ipv6_dev_find().
Yeah, but if scope id means ifindex for  link-local addresses, ipv6_dev_find()
would be more like a function to validate the address with right scope id.

Thanks for your reviewing.


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net 1/2] ipv6: add ipv6_dev_find()

2020-08-13 Thread Xin Long
On Tue, Aug 11, 2020 at 10:26 AM Hideaki Yoshifuji
 wrote:
>
> Hi,
>
> 2020幓8꜈9ę—„(ę—„) 19:52 Xin Long :
> >
> > On Fri, Aug 7, 2020 at 5:26 PM Hideaki Yoshifuji
> >  wrote:
> > >
> > > Hi,
> > >
> > > 2020幓8꜈6ę—„(ęœØ) 23:03 David Ahern :
> > > >
> > > > On 8/6/20 2:55 AM, Xin Long wrote:
> > > > > On Thu, Aug 6, 2020 at 10:50 AM Hideaki Yoshifuji
> > > > >  wrote:
> > > > >>
> > > > >> Hi,
> > > > >>
> > > > >> 2020幓8꜈4ę—„(ē«) 0:35 Xin Long :
> > > > >>>
> > > > >>> This is to add an ip_dev_find like function for ipv6, used to find
> > > > >>> the dev by saddr.
> > > > >>>
> > > > >>> It will be used by TIPC protocol. So also export it.
> > > > >>>
> > > > >>> Signed-off-by: Xin Long 
> > > > >>> ---
> > > > >>>  include/net/addrconf.h |  2 ++
> > > > >>>  net/ipv6/addrconf.c| 39 +++
> > > > >>>  2 files changed, 41 insertions(+)
> > > > >>>
> > > > >>> diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> > > > >>> index 8418b7d..ba3f6c15 100644
> > > > >>> --- a/include/net/addrconf.h
> > > > >>> +++ b/include/net/addrconf.h
> > > > >>> @@ -97,6 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr 
> > > > >>> *addr,
> > > > >>>
> > > > >>>  int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device 
> > > > >>> *dev);
> > > > >>>
> > > > >>> +struct net_device *ipv6_dev_find(struct net *net, const struct 
> > > > >>> in6_addr *addr);
> > > > >>> +
> > > > >>
> > > > >> How do we handle link-local addresses?
> > > > > This is what "if (!result)" branch meant to do:
> > > > >
> > > > > +   if (!result) {
> > > > > +   struct rt6_info *rt;
> > > > > +
> > > > > +   rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
> > > > > +   if (rt) {
> > > > > +   dev = rt->dst.dev;
> > > > > +   ip6_rt_put(rt);
> > > > > +   }
> > > > > +   } else {
> > > > > +   dev = result->idev->dev;
> > > > > +   }
> > > > >
> > > >
> > > > the stated purpose of this function is to find the netdevice to which an
> > > > address is attached. A route lookup should not be needed. Walking the
> > > > address hash list finds the address and hence the netdev or it does not.
> > > >
> > > >
> > >
> > > User supplied scope id which should be set for link-local addresses
> > > in TIPC_NLA_UDP_LOCAL attribute must be honored when we
> > > check the address.
> > Hi, Hideaki san,
> >
> > Sorry for not understanding your comment earlier.
> >
> > The bad thing is tipc in iproute2 doesn't seem able to set scope_id.
>
> I looked into the iproute2 code quickly and I think it should; it uses
> getaddrinfo(3) and it will fill if you say "fe80::1%eth0" or something
> like that OR, fix the bug.
right, thanks.

>
> > I saw many places in kernel doing this check:
> >
> >  if (__ipv6_addr_needs_scope_id(atype) &&
> >  !ip6->sin6_scope_id) { return -EINVAL; }
> >
> > Can I ask why scope id is needed for link-local addresses?
> > and is that for link-local addresses only?
>
> Because we distinguish link-local scope addresses on different interfaces.
> On the other hand, we do not distinguish global scope addresses on
> different interfaces.
okay.

>
> >
> > >
> > > ipv6_chk_addr() can check if the address and supplied ifindex is a valid
> > > local address.  Or introduce an extra ifindex argument to ipv6_dev_find().
> > Yeah, but if scope id means ifindex for  link-local addresses, 
> > ipv6_dev_find()
> > would be more like a function to validate the address with right scope id.
> >
>
> I think we should find a net_device with a specific "valid" (non-tentative)
> address

Re: [tipc-discussion] [PATCH net 1/2] ipv6: add ipv6_dev_find()

2020-08-06 Thread Xin Long
On Thu, Aug 6, 2020 at 10:50 AM Hideaki Yoshifuji
 wrote:
>
> Hi,
>
> 2020幓8꜈4ę—„(ē«) 0:35 Xin Long :
> >
> > This is to add an ip_dev_find like function for ipv6, used to find
> > the dev by saddr.
> >
> > It will be used by TIPC protocol. So also export it.
> >
> > Signed-off-by: Xin Long 
> > ---
> >  include/net/addrconf.h |  2 ++
> >  net/ipv6/addrconf.c| 39 +++
> >  2 files changed, 41 insertions(+)
> >
> > diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> > index 8418b7d..ba3f6c15 100644
> > --- a/include/net/addrconf.h
> > +++ b/include/net/addrconf.h
> > @@ -97,6 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
> >
> >  int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);
> >
> > +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr 
> > *addr);
> > +
>
> How do we handle link-local addresses?
This is what "if (!result)" branch meant to do:

+   if (!result) {
+   struct rt6_info *rt;
+
+   rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
+   if (rt) {
+   dev = rt->dst.dev;
+   ip6_rt_put(rt);
+   }
+   } else {
+   dev = result->idev->dev;
+   }

Thanks.

>
> --yoshfuji
>
> >  struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
> >  const struct in6_addr *addr,
> >  struct net_device *dev, int strict);
> > diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> > index 840bfdb..857d6f9 100644
> > --- a/net/ipv6/addrconf.c
> > +++ b/net/ipv6/addrconf.c
> > @@ -1983,6 +1983,45 @@ int ipv6_chk_prefix(const struct in6_addr *addr, 
> > struct net_device *dev)
> >  }
> >  EXPORT_SYMBOL(ipv6_chk_prefix);
> >
> > +/**
> > + * ipv6_dev_find - find the first device with a given source address.
> > + * @net: the net namespace
> > + * @addr: the source address
> > + *
> > + * The caller should be protected by RCU, or RTNL.
> > + */
> > +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr 
> > *addr)
> > +{
> > +   unsigned int hash = inet6_addr_hash(net, addr);
> > +   struct inet6_ifaddr *ifp, *result = NULL;
> > +   struct net_device *dev = NULL;
> > +
> > +   rcu_read_lock();
> > +   hlist_for_each_entry_rcu(ifp, _addr_lst[hash], addr_lst) {
> > +   if (net_eq(dev_net(ifp->idev->dev), net) &&
> > +   ipv6_addr_equal(>addr, addr)) {
> > +   result = ifp;
> > +   break;
> > +   }
> > +   }
> > +
> > +   if (!result) {
> > +   struct rt6_info *rt;
> > +
> > +   rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
> > +   if (rt) {
> > +   dev = rt->dst.dev;
> > +   ip6_rt_put(rt);
> > +   }
> > +   } else {
> > +   dev = result->idev->dev;
> > +   }
> > +   rcu_read_unlock();
> > +
> > +   return dev;
> > +}
> > +EXPORT_SYMBOL(ipv6_dev_find);
> > +
> >  struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct 
> > in6_addr *addr,
> >  struct net_device *dev, int strict)
> >  {
> > --
> > 2.1.0
> >


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net-next] tipc: fix NULL pointer dereference in streaming

2020-07-21 Thread Xin Long
On Wed, Jun 3, 2020 at 1:06 PM Tuong Lien  wrote:
>
> syzbot found the following crash:
>
> general protection fault, probably for non-canonical address 
> 0xdc19:  [#1] PREEMPT SMP KASAN
> KASAN: null-ptr-deref in range [0x00c8-0x00cf]
> CPU: 1 PID: 7060 Comm: syz-executor394 Not tainted 5.7.0-rc6-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
> Google 01/01/2011
> RIP: 0010:__tipc_sendstream+0xbde/0x11f0 net/tipc/socket.c:1591
> Code: 00 00 00 00 48 39 5c 24 28 48 0f 44 d8 e8 fa 3e db f9 48 b8 00 00 00 00 
> 00 fc ff df 48 8d bb c8 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 e2 
> 04 00 00 48 8b 9b c8 00 00 00 48 b8 00 00 00
> RSP: 0018:c90003ef7818 EFLAGS: 00010202
> RAX: dc00 RBX:  RCX: 8797fd9d
> RDX: 0019 RSI: 8797fde6 RDI: 00c8
> RBP: 888099848040 R08: 88809a5f6440 R09: fbfff1860b4c
> R10: 8c305a5f R11: fbfff1860b4b R12: 88809984857e
> R13:  R14: 888086aa4000 R15: 
> FS:  009b4880() GS:8880ae70() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 2140 CR3: a7fdf000 CR4: 001406e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  tipc_sendstream+0x4c/0x70 net/tipc/socket.c:1533
>  sock_sendmsg_nosec net/socket.c:652 [inline]
>  sock_sendmsg+0xcf/0x120 net/socket.c:672
>  sys_sendmsg+0x32f/0x810 net/socket.c:2352
>  ___sys_sendmsg+0x100/0x170 net/socket.c:2406
>  __sys_sendmmsg+0x195/0x480 net/socket.c:2496
>  __do_sys_sendmmsg net/socket.c:2525 [inline]
>  __se_sys_sendmmsg net/socket.c:2522 [inline]
>  __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2522
>  do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
>  entry_SYSCALL_64_after_hwframe+0x49/0xb3
> RIP: 0033:0x440199
> ...
>
> This bug was bisected to commit 0a3e060f340d ("tipc: add test for Nagle
> algorithm effectiveness"). However, it is not the case, the trouble was
> from the base in the case of zero data length message sending, we would
> unexpectedly make an empty 'txq' queue after the 'tipc_msg_append()' in
> Nagle mode.
>
> A similar crash can be generated even without the bisected patch but at
> the link layer when it accesses the empty queue.
>
> We solve the issues by building at least one buffer to go with socket's
> header and an optional data section that may be empty like what we had
> with the 'tipc_msg_build()'.
>
> Note: the previous commit 4c21daae3dbc ("tipc: Fix NULL pointer
> dereference in __tipc_sendstream()") is obsoleted by this one since the
> 'txq' will be never empty and the check of 'skb != NULL' is unnecessary
> but it is safe anyway.
Hi, Tuong

If commit 4c21daae3dbc is obsoleted by this one, can you please
send a patch to revert it?

Thanks.

>
> Reported-by: syzbot+8eac6d030e7807c21...@syzkaller.appspotmail.com
> Fixes: c0bceb97db9e ("tipc: add smart nagle feature")
> Acked-by: Jon Maloy 
> Signed-off-by: Tuong Lien 
> ---
>  net/tipc/msg.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/net/tipc/msg.c b/net/tipc/msg.c
> index c0afcd627c5e..046e4cb3acea 100644
> --- a/net/tipc/msg.c
> +++ b/net/tipc/msg.c
> @@ -221,7 +221,7 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr 
> *m, int dlen,
> accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
> total = accounted;
>
> -   while (rem) {
> +   do {
> if (!skb || skb->len >= mss) {
> skb = tipc_buf_acquire(mss, GFP_KERNEL);
> if (unlikely(!skb))
> @@ -245,7 +245,7 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr 
> *m, int dlen,
> skb_put(skb, cpy);
> rem -= cpy;
> total += msg_blocks(hdr) - curr;
> -   }
> +   } while (rem);
> return total - accounted;
>  }
>
> --
> 2.13.7
>
>
>
> ___
> tipc-discussion mailing list
> tipc-discussion@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net-next] tipc: fix NULL pointer dereference in streaming

2020-07-21 Thread Xin Long
On Tue, Jul 21, 2020 at 7:26 PM Tuong Tong Lien
 wrote:
>
>
>
> > -Original Message-----
> > From: Xin Long 
> > Sent: Tuesday, July 21, 2020 6:23 PM
> > To: Tuong Tong Lien 
> > Cc: davem ; jma...@redhat.com; ma...@donjonn.com; Ying 
> > Xue ; network dev
> > ; tipc-discussion@lists.sourceforge.net
> > Subject: Re: [tipc-discussion] [net-next] tipc: fix NULL pointer 
> > dereference in streaming
> >
> > On Wed, Jun 3, 2020 at 1:06 PM Tuong Lien  
> > wrote:
> > >
> > > syzbot found the following crash:
> > >
> > > general protection fault, probably for non-canonical address 
> > > 0xdc19:  [#1] PREEMPT SMP KASAN
> > > KASAN: null-ptr-deref in range [0x00c8-0x00cf]
> > > CPU: 1 PID: 7060 Comm: syz-executor394 Not tainted 5.7.0-rc6-syzkaller #0
> > > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
> > > Google 01/01/2011
> > > RIP: 0010:__tipc_sendstream+0xbde/0x11f0 net/tipc/socket.c:1591
> > > Code: 00 00 00 00 48 39 5c 24 28 48 0f 44 d8 e8 fa 3e db f9 48 b8 00 00 
> > > 00 00 00 fc ff df 48 8d bb c8 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c
> > 02 00 0f 85 e2 04 00 00 48 8b 9b c8 00 00 00 48 b8 00 00 00
> > > RSP: 0018:c90003ef7818 EFLAGS: 00010202
> > > RAX: dc00 RBX:  RCX: 8797fd9d
> > > RDX: 0019 RSI: 8797fde6 RDI: 00c8
> > > RBP: 888099848040 R08: 88809a5f6440 R09: fbfff1860b4c
> > > R10: 8c305a5f R11: fbfff1860b4b R12: 88809984857e
> > > R13:  R14: 888086aa4000 R15: 
> > > FS:  009b4880() GS:8880ae70() 
> > > knlGS:
> > > CS:  0010 DS:  ES:  CR0: 80050033
> > > CR2: 2140 CR3: a7fdf000 CR4: 001406e0
> > > DR0:  DR1:  DR2: 
> > > DR3:  DR6: fffe0ff0 DR7: 0400
> > > Call Trace:
> > >  tipc_sendstream+0x4c/0x70 net/tipc/socket.c:1533
> > >  sock_sendmsg_nosec net/socket.c:652 [inline]
> > >  sock_sendmsg+0xcf/0x120 net/socket.c:672
> > >  sys_sendmsg+0x32f/0x810 net/socket.c:2352
> > >  ___sys_sendmsg+0x100/0x170 net/socket.c:2406
> > >  __sys_sendmmsg+0x195/0x480 net/socket.c:2496
> > >  __do_sys_sendmmsg net/socket.c:2525 [inline]
> > >  __se_sys_sendmmsg net/socket.c:2522 [inline]
> > >  __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2522
> > >  do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
> > >  entry_SYSCALL_64_after_hwframe+0x49/0xb3
> > > RIP: 0033:0x440199
> > > ...
> > >
> > > This bug was bisected to commit 0a3e060f340d ("tipc: add test for Nagle
> > > algorithm effectiveness"). However, it is not the case, the trouble was
> > > from the base in the case of zero data length message sending, we would
> > > unexpectedly make an empty 'txq' queue after the 'tipc_msg_append()' in
> > > Nagle mode.
> > >
> > > A similar crash can be generated even without the bisected patch but at
> > > the link layer when it accesses the empty queue.
> > >
> > > We solve the issues by building at least one buffer to go with socket's
> > > header and an optional data section that may be empty like what we had
> > > with the 'tipc_msg_build()'.
> > >
> > > Note: the previous commit 4c21daae3dbc ("tipc: Fix NULL pointer
> > > dereference in __tipc_sendstream()") is obsoleted by this one since the
> > > 'txq' will be never empty and the check of 'skb != NULL' is unnecessary
> > > but it is safe anyway.
> > Hi, Tuong
> >
> > If commit 4c21daae3dbc is obsoleted by this one, can you please
> > send a patch to revert it?
> >
> > Thanks.
> Hi Xin,
>
> That patch includes a sanity check which is always true and safe, so I donā€™t 
> think
> we need to revert it. Do you agree?
surely it's safe.

People may be confused when reading the code:
if (skb) {
msg_set_ack_required(buf_msg(skb));
tsk->expect_ack = true;
} else {
tsk->expect_ack = false;  <- [1]
}

like why expect_ack needs to be set to false in [1]

>
> BR/Tuong
> >
> > &g

[tipc-discussion] [PATCH net] tipc: not enable tipc when ipv6 works as a module

2020-08-16 Thread Xin Long
When using ipv6_dev_find() in one module, it requires ipv6 not to
work as a module. Otherwise, this error occurs in build:

  undefined reference to `ipv6_dev_find'.

So fix it by adding "depends on IPV6 || IPV6=n" to tipc/Kconfig,
as it does in sctp/Kconfig.

Fixes: 5a6f6f579178 ("tipc: set ub->ifindex for local ipv6 address")
Reported-by: kernel test robot 
Acked-by: Randy Dunlap 
Signed-off-by: Xin Long 
---
 net/tipc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 9dd7802..be1c400 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -6,6 +6,7 @@
 menuconfig TIPC
tristate "The TIPC Protocol"
depends on INET
+   depends on IPV6 || IPV6=n
help
  The Transparent Inter Process Communication (TIPC) protocol is
  specially designed for intra cluster communication. This protocol
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] Documentation: add more details in tipc.rst

2021-06-11 Thread Xin Long
kernel-doc for TIPC is too simple, we need to add more information for it.

This patch is to extend the abstract, and add the Features and Links items.

Signed-off-by: Xin Long 
---
 Documentation/networking/tipc.rst | 121 +-
 1 file changed, 118 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/tipc.rst 
b/Documentation/networking/tipc.rst
index 76775f24cdc8..ab63d298cca2 100644
--- a/Documentation/networking/tipc.rst
+++ b/Documentation/networking/tipc.rst
@@ -4,10 +4,125 @@
 Linux Kernel TIPC
 =
 
-TIPC (Transparent Inter Process Communication) is a protocol that is
-specially designed for intra-cluster communication.
+Introduction
+
 
-For more information about TIPC, see http://tipc.sourceforge.net.
+TIPC (Transparent Inter Process Communication) is a protocol that is specially
+designed for intra-cluster communication. It can be configured to transmit
+messages either on UDP or directly across Ethernet. Message delivery is
+sequence guaranteed, loss free and flow controlled. Latency times are shorter
+than with any other known protocol, while maximal throughput is comparable to
+that of TCP.
+
+TIPC Features
+-
+
+- Cluster wide IPC service
+
+  Have you ever wished you had the convenience of Unix Domain Sockets even when
+  transmitting data between cluster nodes? Where you yourself determine the
+  addresses you want to bind to and use? Where you don't have to perform DNS
+  lookups and worry about IP addresses? Where you don't have to start timers
+  to monitor the continuous existence of peer sockets? And yet without the
+  downsides of that socket type, such as the risk of lingering inodes?
+
+  Welcome to the Transparent Inter Process Communication service, TIPC in 
short,
+  which gives you all of this, and a lot more.
+
+- Service Addressing
+
+  A fundamental concept in TIPC is that of Service Addressing which makes it
+  possible for a programmer to chose his own address, bind it to a server
+  socket and let client programs use only that address for sending messages.
+
+- Service Tracking
+
+  A client wanting to wait for the availability of a server, uses the Service
+  Tracking mechanism to subscribe for binding and unbinding/close events for
+  sockets with the associated service address.
+
+  The service tracking mechanism can also be used for Cluster Topology 
Tracking,
+  i.e., subscribing for availability/non-availability of cluster nodes.
+
+  Likewise, the service tracking mechanism can be used for Cluster Connectivity
+  Tracking, i.e., subscribing for up/down events for individual links between
+  cluster nodes.
+
+- Transmission Modes
+
+  Using a service address, a client can send datagram messages to a server 
socket.
+
+  Using the same address type, it can establish a connection towards an 
accepting
+  server socket.
+
+  It can also use a service address to create and join a Communication Group,
+  which is the TIPC manifestation of a brokerless message bus.
+
+  Multicast with very good performance and scalability is available both in
+  datagram mode and in communication group mode.
+
+- Inter Node Links
+
+  Communication between any two nodes in a cluster is maintained by one or two
+  Inter Node Links, which both guarantee data traffic integrity and monitor
+  the peer node's availability.
+
+- Cluster Scalability
+
+  By applying the Overlapping Ring Monitoring algorithm on the inter node links
+  it is possible to scale TIPC clusters up to 1000 nodes with a maintained
+  neighbor failure discovery time of 1-2 seconds. For smaller clusters this
+  time can be made much shorter.
+
+- Neighbor Discovery
+
+  Neighbor Node Discovery in the cluster is done by Ethernet broadcast or UDP
+  multicast, when any of those services are available. If not, configured peer
+  IP addresses can be used.
+
+- Configuration
+
+  When running TIPC in single node mode no configuration whatsoever is needed.
+  When running in cluster mode TIPC must as a minimum be given a node address
+  (before Linux 4.17) and told which interface to attach to. The "tipc"
+  configuration tool makes is possible to add and maintain many more
+  configuration parameters.
+
+- Performance
+
+  TIPC message transfer latency times are better than in any other known 
protocol.
+  Maximal byte throughput for inter-node connections is still somewhat lower 
than
+  for TCP, while they are superior for intra-node and inter-container 
throughput
+  on the same host.
+
+- Language Support
+
+  The TIPC user API has support for C, Python, Perl, Ruby, D and Go.
+
+More Information
+
+
+- How to set up TIPC:
+
+  http://tipc.io/getting_started.html
+
+- How to program with TIPC:
+
+  http://tipc.io/programming.html
+
+- How to contribute to TIPC:
+
+- http://tipc.io/contacts.html
+
+- More details about TIPC specification:
+
+  http://tipc.io/protocol.html
+
+
+Implementation
+==
+
+TIPC is i

[tipc-discussion] [PATCH net] tipc: skb_linearize the head skb when reassembling msgs

2021-05-07 Thread Xin Long
It's not a good idea to append the frag skb to a skb's frag_list if
the frag_list already has skbs from elsewhere, such as this skb was
created by pskb_copy() where the frag_list was cloned (all the skbs
in it were skb_get'ed) and shared by multiple skbs.

However, the new appended frag skb should have been only seen by the
current skb. Otherwise, it will cause use after free crashes as this
appended frag skb are seen by multiple skbs but it only got skb_get
called once.

The same thing happens with a skb updated by pskb_may_pull() with a
skb_cloned skb. Li Shuang has reported quite a few crashes caused
by this when doing testing over macvlan devices:

  [] kernel BUG at net/core/skbuff.c:1970!
  [] Call Trace:
  []  skb_clone+0x4d/0xb0
  []  macvlan_broadcast+0xd8/0x160 [macvlan]
  []  macvlan_process_broadcast+0x148/0x150 [macvlan]
  []  process_one_work+0x1a7/0x360
  []  worker_thread+0x30/0x390

  [] kernel BUG at mm/usercopy.c:102!
  [] Call Trace:
  []  __check_heap_object+0xd3/0x100
  []  __check_object_size+0xff/0x16b
  []  simple_copy_to_iter+0x1c/0x30
  []  __skb_datagram_iter+0x7d/0x310
  []  __skb_datagram_iter+0x2a5/0x310
  []  skb_copy_datagram_iter+0x3b/0x90
  []  tipc_recvmsg+0x14a/0x3a0 [tipc]
  []  sys_recvmsg+0x91/0x150
  []  ___sys_recvmsg+0x7b/0xc0

  [] kernel BUG at mm/slub.c:305!
  [] Call Trace:
  []  
  []  kmem_cache_free+0x3ff/0x400
  []  __netif_receive_skb_core+0x12c/0xc40
  []  ? kmem_cache_alloc+0x12e/0x270
  []  netif_receive_skb_internal+0x3d/0xb0
  []  ? get_rx_page_info+0x8e/0xa0 [be2net]
  []  be_poll+0x6ef/0xd00 [be2net]
  []  ? irq_exit+0x4f/0x100
  []  net_rx_action+0x149/0x3b0

  ...

This patch is to fix it by linearizing the head skb if it has frag_list
set in tipc_buf_append(). Note that we choose to do this before calling
skb_unshare(), as __skb_linearize() will avoid skb_copy(). Also, we can
not just drop the frag_list either as the early time.

Fixes: 45c8b7b175ce ("tipc: allow non-linear first fragment buffer")
Reported-by: Li Shuang 
Signed-off-by: Xin Long 
---
 net/tipc/msg.c | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 3f0a253..ce6ab54 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -149,18 +149,13 @@ int tipc_buf_append(struct sk_buff **headbuf, struct 
sk_buff **buf)
if (unlikely(head))
goto err;
*buf = NULL;
+   if (skb_has_frag_list(frag) && __skb_linearize(frag))
+   goto err;
frag = skb_unshare(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
head = *headbuf = frag;
TIPC_SKB_CB(head)->tail = NULL;
-   if (skb_is_nonlinear(head)) {
-   skb_walk_frags(head, tail) {
-   TIPC_SKB_CB(head)->tail = tail;
-   }
-   } else {
-   skb_frag_list_init(head);
-   }
return 0;
}
 
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"

2021-05-15 Thread Xin Long
On Fri, May 14, 2021 at 7:18 PM Jon Maloy  wrote:
>
>
>
> On 5/14/21 6:10 PM, patchwork-bot+netdev...@kernel.org wrote:
> > Hello:
> >
> > This patch was applied to netdev/net.git (refs/heads/master):
> >
> > On Fri, 14 May 2021 08:23:03 +0700 you wrote:
> >> This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046.
> >> Above fix is not correct and caused memory leak issue.
>
> I just convinced Xin (and myself) that the crash (double free) he was
> observing, and which he wanted to fix with the "tipc: fix a race in
> tipc_sk_mcast_rcv" patch was due to this bug.
> Now, realizing that this is causing a memory leak and not a double free
> I suspect there might still be an issue.
> Does anybody have a theory?
Hi Jon, I think the double free issue was due to the one I fixed in the patch
I posted:

[PATCH net] tipc: skb_linearize the head skb when reassembling msgs

see the changelog.
>
> ///jon
>
> >>
> >> Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
> >> Acked-by: Jon Maloy 
> >> Acked-by: Tung Nguyen 
> >> Signed-off-by: Hoang Le 
> >>
> >> [...]
> > Here is the summary with links:
> >- [net] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"
> >  https://git.kernel.org/netdev/net/c/75016891357a
> >
> > You are awesome, thank you!
> > --
> > Deet-doot-dot, I am a bot.
> > https://korg.docs.kernel.org/patchwork/pwbot.html
> >
> >
>


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: simplify the finalize work queue

2021-05-17 Thread Xin Long
This patch is to use "struct work_struct" for the finalize work queue
instead of "struct tipc_net_work", as it can get the "net" and "addr"
from tipc_net's other members and there is no need to add extra net
and addr in tipc_net by defining "struct tipc_net_work".

Note that it's safe to get net from tn->bcl as bcl is always released
after the finalize work queue is done.

Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 net/tipc/core.c |  4 ++--
 net/tipc/core.h |  8 +---
 net/tipc/discover.c |  4 ++--
 net/tipc/link.c |  5 +
 net/tipc/link.h |  1 +
 net/tipc/net.c  | 15 +++
 6 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 72f3ac7..3f4542e 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -60,7 +60,7 @@ static int __net_init tipc_init_net(struct net *net)
tn->trial_addr = 0;
tn->addr_trial_end = 0;
tn->capabilities = TIPC_NODE_CAPABILITIES;
-   INIT_WORK(>final_work.work, tipc_net_finalize_work);
+   INIT_WORK(>work, tipc_net_finalize_work);
memset(tn->node_id, 0, sizeof(tn->node_id));
memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
@@ -110,7 +110,7 @@ static void __net_exit tipc_exit_net(struct net *net)
 
tipc_detach_loopback(net);
/* Make sure the tipc_net_finalize_work() finished */
-   cancel_work_sync(>final_work.work);
+   cancel_work_sync(>work);
tipc_net_stop(net);
 
tipc_bcast_stop(net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5741ae4..0a3f7a7 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -91,12 +91,6 @@ extern unsigned int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
-struct tipc_net_work {
-   struct work_struct work;
-   struct net *net;
-   u32 addr;
-};
-
 struct tipc_net {
u8  node_id[NODE_ID_LEN];
u32 node_addr;
@@ -148,7 +142,7 @@ struct tipc_net {
struct tipc_crypto *crypto_tx;
 #endif
/* Work item for net finalize */
-   struct tipc_net_work final_work;
+   struct work_struct work;
/* The numbers of work queues in schedule */
atomic_t wq_count;
 };
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 5380f60..da69e1a 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -168,7 +168,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer 
*d,
 
/* Apply trial address if we just left trial period */
if (!trial && !self) {
-   tipc_sched_net_finalize(net, tn->trial_addr);
+   schedule_work(>work);
msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
@@ -308,7 +308,7 @@ static void tipc_disc_timeout(struct timer_list *t)
if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
mod_timer(>timer, jiffies + TIPC_DISC_INIT);
spin_unlock_bh(>lock);
-   tipc_sched_net_finalize(net, tn->trial_addr);
+   schedule_work(>work);
return;
}
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1151092..c44b4bf 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -372,6 +372,11 @@ char tipc_link_plane(struct tipc_link *l)
return l->net_plane;
 }
 
+struct net *tipc_link_net(struct tipc_link *l)
+{
+   return l->net;
+}
+
 void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
 {
l->peer_caps = capabilities;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index fc07232..a16f401 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -156,4 +156,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l,   struct 
tipc_msg *hdr,
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
  struct sk_buff_head *xmitq);
 bool tipc_link_too_silent(struct tipc_link *l);
+struct net *tipc_link_net(struct tipc_link *l);
 #endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a130195..0e95572 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,6 +41,7 @@
 #include "socket.h"
 #include "node.h"
 #include "bcast.h"
+#include "link.h"
 #include "netlink.h"
 #include "monitor.h"
 
@@ -142,19 +143,9 @@ static void tipc_net_finalize(struct net *net, u32 addr)
 
 void tipc_net_finalize_work(struct work_struct *work)
 {
-   struct tipc_net_work *fwork;
+   struct tipc_net *tn = container_of(work, struct tipc_net, work);
 
-   fwork = container_of(work, struct tipc_net_work, work);
-   tipc_net_finalize(fwork->net, fwork->addr);
-}
-
-void tipc_sched_net_finalize(str

Re: [tipc-discussion] [PATCH net] tipc: fix a race in tipc_sk_mcast_rcv

2021-05-14 Thread Xin Long
On Thu, May 13, 2021 at 5:15 PM Jon Maloy  wrote:
>
>
>
> On 4/28/21 3:30 PM, Xin Long wrote:
> > After commit cb1b728096f5 ("tipc: eliminate race condition at multicast
> > reception"), when processing the multicast reception, the packets are
> > firstly moved from be->inputq1 to be->arrvq in tipc_node_broadcast(),
> > then process be->arrvq in tipc_sk_mcast_rcv().
> >
> > In tipc_sk_mcast_rcv(), it gets the 1st skb by skb_peek(), then process
> > this skb without any lock. It means meanwhile another thread could also
> > call tipc_sk_mcast_rcv() and process be->arrvq and pick up the same skb,
> > then free it. A double free issue will be caused as Li Shuang reported:
> >
> >[] kernel BUG at mm/slub.c:305!
> >[]  kfree+0x3a7/0x3d0
> >[]  kfree_skb+0x32/0xa0
> >[]  skb_release_data+0xb4/0x170
> >[]  kfree_skb+0x32/0xa0
> >[]  skb_release_data+0xb4/0x170
> >[]  kfree_skb+0x32/0xa0
> >[]  tipc_sk_mcast_rcv+0x1fa/0x380 [tipc]
> >[]  tipc_rcv+0x411/0x1120 [tipc]
> >[]  tipc_udp_recv+0xc6/0x1e0 [tipc]
> >[]  udp_queue_rcv_one_skb+0x1a9/0x500
> >[]  udp_unicast_rcv_skb.isra.66+0x75/0x90
> >[]  __udp4_lib_rcv+0x537/0xc40
> >[]  ip_protocol_deliver_rcu+0xdf/0x1d0
> >[]  ip_local_deliver_finish+0x4a/0x50
> >[]  ip_local_deliver+0x6b/0xe0
> >[]  ip_rcv+0x27b/0x36a
> >[]  __netif_receive_skb_core+0xb47/0xc40
> >[]  process_backlog+0xae/0x160
> >
> > Commit 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
> > tried to fix this double free by not releasing the skbs in be->arrvq,
> > which would definitely cause the skbs' leak.
> >
> > The problem is we shouldn't process the skbs in be->arrvq without any
> > lock to protect the code from peeking to dequeuing them. The fix here
> > is to use a temp skb list instead of be->arrvq to make it "per thread
> > safe". While at it, remove the no-longer-used be->arrvq.
> >
> > Fixes: cb1b728096f5 ("tipc: eliminate race condition at multicast 
> > reception")
> > Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
> > Reported-by: Li Shuang 
> > Signed-off-by: Xin Long 
> > ---
> >   net/tipc/node.c   |  9 -
> >   net/tipc/socket.c | 16 +++-
> >   2 files changed, 7 insertions(+), 18 deletions(-)
> >
> > diff --git a/net/tipc/node.c b/net/tipc/node.c
> > index e0ee832..0c636fb 100644
> > --- a/net/tipc/node.c
> > +++ b/net/tipc/node.c
> > @@ -72,7 +72,6 @@ struct tipc_link_entry {
> >   struct tipc_bclink_entry {
> >   struct tipc_link *link;
> >   struct sk_buff_head inputq1;
> > - struct sk_buff_head arrvq;
> >   struct sk_buff_head inputq2;
> >   struct sk_buff_head namedq;
> >   u16 named_rcv_nxt;
> > @@ -552,7 +551,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 
> > addr, u8 *peer_id,
> >   INIT_LIST_HEAD(>conn_sks);
> >   skb_queue_head_init(>bc_entry.namedq);
> >   skb_queue_head_init(>bc_entry.inputq1);
> > - __skb_queue_head_init(>bc_entry.arrvq);
> >   skb_queue_head_init(>bc_entry.inputq2);
> >   for (i = 0; i < MAX_BEARERS; i++)
> >   spin_lock_init(>links[i].lock);
> > @@ -1803,14 +1801,15 @@ void tipc_node_broadcast(struct net *net, struct 
> > sk_buff *skb, int rc_dests)
> >   static void tipc_node_mcast_rcv(struct tipc_node *n)
> >   {
> >   struct tipc_bclink_entry *be = >bc_entry;
> > + struct sk_buff_head tmpq;
> >
> > - /* 'arrvq' is under inputq2's lock protection */
> > + __skb_queue_head_init();
> >   spin_lock_bh(>inputq2.lock);
> >   spin_lock_bh(>inputq1.lock);
> > - skb_queue_splice_tail_init(>inputq1, >arrvq);
> > + skb_queue_splice_tail_init(>inputq1, );
> >   spin_unlock_bh(>inputq1.lock);
> >   spin_unlock_bh(>inputq2.lock);
> > - tipc_sk_mcast_rcv(n->net, >arrvq, >inputq2);
> > + tipc_sk_mcast_rcv(n->net, , >inputq2);
> >   }
> >
> >   static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg 
> > *hdr,
> > diff --git a/net/tipc/socket.c b/net/tipc/socket.c
> > index 022999e..2870798 100644
> > --- a/net/tipc/socket.c
> > +++ b/net/tipc/socket.c
> > @@ -1210,8 +1210,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct 
> > sk_buff_head *arrvq,

[tipc-discussion] [PATCH net] tipc: wait and exit until all work queues are done

2021-05-14 Thread Xin Long
On some host, a crash could be triggered simply by repeating these
commands several times:

  # modprobe tipc
  # tipc bearer enable media udp name UDP1 localip 127.0.0.1
  # rmmod tipc

  [] BUG: unable to handle kernel paging request at c096bb00
  [] Workqueue: events 0xc096bb00
  [] Call Trace:
  []  ? process_one_work+0x1a7/0x360
  []  ? worker_thread+0x30/0x390
  []  ? create_worker+0x1a0/0x1a0
  []  ? kthread+0x116/0x130
  []  ? kthread_flush_work_fn+0x10/0x10
  []  ? ret_from_fork+0x35/0x40

When removing the TIPC module, the UDP tunnel sock will be delayed to
release in a work queue as sock_release() can't be done in rtnl_lock().
If the work queue is schedule to run after the TIPC module is removed,
kernel will crash as the work queue function cleanup_beareri() code no
longer exists when trying to invoke it.

To fix it, this patch introduce a member wq_count in tipc_net to track
the numbers of work queues in schedule, and  wait and exit until all
work queues are done in tipc_exit_net().

Reported-by: Shuang Li 
Signed-off-by: Xin Long 
---
 net/tipc/core.c  | 2 ++
 net/tipc/core.h  | 2 ++
 net/tipc/udp_media.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5cc1f03..72f3ac7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net)
 #ifdef CONFIG_TIPC_CRYPTO
tipc_crypto_stop(_net(net)->crypto_tx);
 #endif
+   while (atomic_read(>wq_count))
+   cond_resched();
 }
 
 static void __net_exit tipc_pernet_pre_exit(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 03de7b2..5741ae4 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -149,6 +149,8 @@ struct tipc_net {
 #endif
/* Work item for net finalize */
struct tipc_net_work final_work;
+   /* The numbers of work queues in schedule */
+   atomic_t wq_count;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e556d2c..c2bb818 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work)
kfree_rcu(rcast, rcu);
}
 
+   atomic_dec(_net(sock_net(ub->ubsock->sk))->wq_count);
dst_cache_destroy(>rcast.dst_cache);
udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
@@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
RCU_INIT_POINTER(ub->bearer, NULL);
 
/* sock_release need to be done outside of rtnl lock */
+   atomic_inc(_net(sock_net(ub->ubsock->sk))->wq_count);
INIT_WORK(>work, cleanup_bearer);
schedule_work(>work);
 }
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: simplify the finalize work queue

2021-05-14 Thread Xin Long
This patch is to use "struct work_struct" for the finalize work queue
instead of "struct tipc_net_work", as it can get the "net" and "addr"
from tipc_net's other members and there is no need to add extra net
and addr in tipc_net by defining "struct tipc_net_work".

Note that it's safe to get net from tn->bcl as bcl is always released
after the finalize work queue is done.

Signed-off-by: Xin Long 
---
 net/tipc/core.c |  4 ++--
 net/tipc/core.h |  8 +---
 net/tipc/discover.c |  4 ++--
 net/tipc/link.c |  5 +
 net/tipc/link.h |  1 +
 net/tipc/net.c  | 15 +++
 6 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 72f3ac7..3f4542e 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -60,7 +60,7 @@ static int __net_init tipc_init_net(struct net *net)
tn->trial_addr = 0;
tn->addr_trial_end = 0;
tn->capabilities = TIPC_NODE_CAPABILITIES;
-   INIT_WORK(>final_work.work, tipc_net_finalize_work);
+   INIT_WORK(>work, tipc_net_finalize_work);
memset(tn->node_id, 0, sizeof(tn->node_id));
memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
@@ -110,7 +110,7 @@ static void __net_exit tipc_exit_net(struct net *net)
 
tipc_detach_loopback(net);
/* Make sure the tipc_net_finalize_work() finished */
-   cancel_work_sync(>final_work.work);
+   cancel_work_sync(>work);
tipc_net_stop(net);
 
tipc_bcast_stop(net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5741ae4..0a3f7a7 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -91,12 +91,6 @@ extern unsigned int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
-struct tipc_net_work {
-   struct work_struct work;
-   struct net *net;
-   u32 addr;
-};
-
 struct tipc_net {
u8  node_id[NODE_ID_LEN];
u32 node_addr;
@@ -148,7 +142,7 @@ struct tipc_net {
struct tipc_crypto *crypto_tx;
 #endif
/* Work item for net finalize */
-   struct tipc_net_work final_work;
+   struct work_struct work;
/* The numbers of work queues in schedule */
atomic_t wq_count;
 };
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 5380f60..da69e1a 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -168,7 +168,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer 
*d,
 
/* Apply trial address if we just left trial period */
if (!trial && !self) {
-   tipc_sched_net_finalize(net, tn->trial_addr);
+   schedule_work(>work);
msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
@@ -308,7 +308,7 @@ static void tipc_disc_timeout(struct timer_list *t)
if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
mod_timer(>timer, jiffies + TIPC_DISC_INIT);
spin_unlock_bh(>lock);
-   tipc_sched_net_finalize(net, tn->trial_addr);
+   schedule_work(>work);
return;
}
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1151092..c44b4bf 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -372,6 +372,11 @@ char tipc_link_plane(struct tipc_link *l)
return l->net_plane;
 }
 
+struct net *tipc_link_net(struct tipc_link *l)
+{
+   return l->net;
+}
+
 void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
 {
l->peer_caps = capabilities;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index fc07232..a16f401 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -156,4 +156,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l,   struct 
tipc_msg *hdr,
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
  struct sk_buff_head *xmitq);
 bool tipc_link_too_silent(struct tipc_link *l);
+struct net *tipc_link_net(struct tipc_link *l);
 #endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a130195..0e95572 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,6 +41,7 @@
 #include "socket.h"
 #include "node.h"
 #include "bcast.h"
+#include "link.h"
 #include "netlink.h"
 #include "monitor.h"
 
@@ -142,19 +143,9 @@ static void tipc_net_finalize(struct net *net, u32 addr)
 
 void tipc_net_finalize_work(struct work_struct *work)
 {
-   struct tipc_net_work *fwork;
+   struct tipc_net *tn = container_of(work, struct tipc_net, work);
 
-   fwork = container_of(work, struct tipc_net_work, work);
-   tipc_net_finalize(fwork->net, fwork->addr);
-}
-
-void tipc_sched_net_finalize(struct net *net, u32 addr

[tipc-discussion] [PATCH net] tipc: wait and exit until all work queues are done

2021-05-16 Thread Xin Long
On some host, a crash could be triggered simply by repeating these
commands several times:

  # modprobe tipc
  # tipc bearer enable media udp name UDP1 localip 127.0.0.1
  # rmmod tipc

  [] BUG: unable to handle kernel paging request at c096bb00
  [] Workqueue: events 0xc096bb00
  [] Call Trace:
  []  ? process_one_work+0x1a7/0x360
  []  ? worker_thread+0x30/0x390
  []  ? create_worker+0x1a0/0x1a0
  []  ? kthread+0x116/0x130
  []  ? kthread_flush_work_fn+0x10/0x10
  []  ? ret_from_fork+0x35/0x40

When removing the TIPC module, the UDP tunnel sock will be delayed to
release in a work queue as sock_release() can't be done in rtnl_lock().
If the work queue is schedule to run after the TIPC module is removed,
kernel will crash as the work queue function cleanup_beareri() code no
longer exists when trying to invoke it.

To fix it, this patch introduce a member wq_count in tipc_net to track
the numbers of work queues in schedule, and  wait and exit until all
work queues are done in tipc_exit_net().

Fixes: d0f91938bede ("tipc: add ip/udp media type")
Reported-by: Shuang Li 
Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 net/tipc/core.c  | 2 ++
 net/tipc/core.h  | 2 ++
 net/tipc/udp_media.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5cc1f03..72f3ac7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net)
 #ifdef CONFIG_TIPC_CRYPTO
tipc_crypto_stop(_net(net)->crypto_tx);
 #endif
+   while (atomic_read(>wq_count))
+   cond_resched();
 }
 
 static void __net_exit tipc_pernet_pre_exit(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 03de7b2..5741ae4 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -149,6 +149,8 @@ struct tipc_net {
 #endif
/* Work item for net finalize */
struct tipc_net_work final_work;
+   /* The numbers of work queues in schedule */
+   atomic_t wq_count;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e556d2c..c2bb818 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work)
kfree_rcu(rcast, rcu);
}
 
+   atomic_dec(_net(sock_net(ub->ubsock->sk))->wq_count);
dst_cache_destroy(>rcast.dst_cache);
udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
@@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
RCU_INIT_POINTER(ub->bearer, NULL);
 
/* sock_release need to be done outside of rtnl lock */
+   atomic_inc(_net(sock_net(ub->ubsock->sk))->wq_count);
INIT_WORK(>work, cleanup_bearer);
schedule_work(>work);
 }
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] DGRAM/STREAM Crossover on Debian?

2021-06-02 Thread Xin Long
On Wed, May 26, 2021 at 11:38 AM Duzan, Gary D via tipc-discussion
 wrote:
>
>I'm in the process of enhancing a TIPC DGRAM-based RPC-ish service to 
> include TIPC STREAM transport for larger messages. To simplify configuration, 
> I have the server process(es) bind() the same type/range for both DGRAM and 
> STREAM sockets (poll()ing to see which have incoming requests), then choose 
> which to use on the client. This seems to work on most of my Linux systems 
> (RHEL-8, Ubuntu 20.04/21.04, Fedora 34, Debian 11), but on my Debian 10 
> system (4.19.181-1 kernel) I am seeing messages from a DGRAM client appearing 
> on an accept()ed STREAM socket on the server. I have confirmed that the 
> client is not sending anything on a STREAM socket, and the message received 
> by the server is formatted as a DGRAM message (without the message framing 
> header).

When you start two scoket on the server: DGRAM and STREAM, in the
client's nametable there will be 2 sockets with different portids:
# tipc nametable show
Type   Lower  Upper  ScopePort   Node
1  17 17 cluster  4063960415
1  17 17 cluster  1106254118

When the client calls sendmsg()/connect() to send msg to the server,
it will choose one of them by the rule of "local, closest-first or
round-robin".
The client doesn't know if the peer is a DGRAM socket or STREAM
socket. In your case, it should go round-robin.

Without this commit:

commit 25b9221b959483f17c2964d0922869e16caa86b5
Author: Jon Maloy 
Date:   Fri Sep 28 20:23:21 2018 +0200

tipc: add SYN bit to connection setup messages

The SYN msg for STREAM is no different from the DATA msg for DGRAM.
that's what you're seeing in kernel-4.19

>
>Debian isn't a target platform for production, so I don't need a specific 
> fix, but it is still surprising and a bit disturbing. Was this a known 
> problem with the 4.19 kernel? Are there particular reasons why using this 
> pattern is a bad idea?
I think it may not work as expected if you create 2 different types of
TIPC sockets binding to the same address.
At least on the latest kernel, once the DGRAM client chooses the
STREAM socket, the DATA msg will be dropped.

Thanks.

>
>Thanks.
>
> Gary Duzan
> FIS - GT.M Core
>
> The information contained in this message is proprietary and/or confidential. 
> If you are not the intended recipient, please: (i) delete the message and all 
> copies; (ii) do not disclose, distribute or use the message in any manner; 
> and (iii) notify the sender immediately. In addition, please be aware that 
> any message addressed to our domain is subject to archiving and review by 
> persons other than the intended recipient. Thank you.
>
> ___
> tipc-discussion mailing list
> tipc-discussion@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCHv2 net-next] tipc: keep the skb in rcv queue until the whole data is read

2021-06-30 Thread Xin Long
Currently, when userspace reads a datagram with a buffer that is
smaller than this datagram, the data will be truncated and only
part of it can be received by users. It doesn't seem right that
users don't know the datagram size and have to use a huge buffer
to read it to avoid the truncation.

This patch to fix it by keeping the skb in rcv queue until the
whole data is read by users. Only the last msg of the datagram
will be marked with MSG_EOR, just as TCP/SCTP does.

Note that this will work as above only when MSG_EOR is set in the
flags parameter of recvmsg(), so that it won't break any old user
applications.

v1->v2:
  - To enable this only when the flags with MSG_EOR is passed into
recvmsg() to fix the compatibility isssue as Erin noticed.

Signed-off-by: Xin Long 
---
 net/tipc/socket.c | 36 +++-
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 34a97ea36cc8..9b0b311c7ec1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
bool connected = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+   struct tipc_skb_cb *skb_cb;
struct sk_buff_head xmitq;
struct tipc_msg *hdr;
struct sk_buff *skb;
@@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
if (unlikely(rc))
goto exit;
skb = skb_peek(>sk_receive_queue);
+   skb_cb = TIPC_SKB_CB(skb);
hdr = buf_msg(skb);
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
@@ -1922,18 +1924,33 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
 
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
-   copy = min_t(int, dlen, buflen);
-   if (unlikely(copy != dlen))
-   m->msg_flags |= MSG_TRUNC;
-   rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+   int offset = skb_cb->bytes_read;
+
+   copy = min_t(int, dlen - offset, buflen);
+   rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+   if (unlikely(rc))
+   goto exit;
+   if (unlikely(offset + copy < dlen)) {
+   if (flags & MSG_EOR) {
+   if (!(flags & MSG_PEEK))
+   skb_cb->bytes_read = offset + copy;
+   } else {
+   m->msg_flags |= MSG_TRUNC;
+   skb_cb->bytes_read = 0;
+   }
+   } else {
+   if (flags & MSG_EOR)
+   m->msg_flags |= MSG_EOR;
+   skb_cb->bytes_read = 0;
+   }
} else {
copy = 0;
rc = 0;
-   if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+   if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
rc = -ECONNRESET;
+   goto exit;
+   }
}
-   if (unlikely(rc))
-   goto exit;
 
/* Mark message as group event if applicable */
if (unlikely(grp_evt)) {
@@ -1956,9 +1973,10 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
tipc_node_distr_xmit(sock_net(sk), );
}
 
-   tsk_advance_rx_queue(sk);
+   if (!skb_cb->bytes_read)
+   tsk_advance_rx_queue(sk);
 
-   if (likely(!connected))
+   if (likely(!connected) || skb_cb->bytes_read)
goto exit;
 
/* Send connection flow control advertisement when applicable */
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next] Documentation: add more details in tipc.rst

2021-06-30 Thread Xin Long
kernel-doc for TIPC is too simple, we need to add more information for it.

This patch is to extend the abstract, and add the Features and Links items.

Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 Documentation/networking/tipc.rst | 121 +-
 1 file changed, 118 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/tipc.rst 
b/Documentation/networking/tipc.rst
index 76775f24cdc8..ab63d298cca2 100644
--- a/Documentation/networking/tipc.rst
+++ b/Documentation/networking/tipc.rst
@@ -4,10 +4,125 @@
 Linux Kernel TIPC
 =
 
-TIPC (Transparent Inter Process Communication) is a protocol that is
-specially designed for intra-cluster communication.
+Introduction
+
 
-For more information about TIPC, see http://tipc.sourceforge.net.
+TIPC (Transparent Inter Process Communication) is a protocol that is specially
+designed for intra-cluster communication. It can be configured to transmit
+messages either on UDP or directly across Ethernet. Message delivery is
+sequence guaranteed, loss free and flow controlled. Latency times are shorter
+than with any other known protocol, while maximal throughput is comparable to
+that of TCP.
+
+TIPC Features
+-
+
+- Cluster wide IPC service
+
+  Have you ever wished you had the convenience of Unix Domain Sockets even when
+  transmitting data between cluster nodes? Where you yourself determine the
+  addresses you want to bind to and use? Where you don't have to perform DNS
+  lookups and worry about IP addresses? Where you don't have to start timers
+  to monitor the continuous existence of peer sockets? And yet without the
+  downsides of that socket type, such as the risk of lingering inodes?
+
+  Welcome to the Transparent Inter Process Communication service, TIPC in 
short,
+  which gives you all of this, and a lot more.
+
+- Service Addressing
+
+  A fundamental concept in TIPC is that of Service Addressing which makes it
+  possible for a programmer to chose his own address, bind it to a server
+  socket and let client programs use only that address for sending messages.
+
+- Service Tracking
+
+  A client wanting to wait for the availability of a server, uses the Service
+  Tracking mechanism to subscribe for binding and unbinding/close events for
+  sockets with the associated service address.
+
+  The service tracking mechanism can also be used for Cluster Topology 
Tracking,
+  i.e., subscribing for availability/non-availability of cluster nodes.
+
+  Likewise, the service tracking mechanism can be used for Cluster Connectivity
+  Tracking, i.e., subscribing for up/down events for individual links between
+  cluster nodes.
+
+- Transmission Modes
+
+  Using a service address, a client can send datagram messages to a server 
socket.
+
+  Using the same address type, it can establish a connection towards an 
accepting
+  server socket.
+
+  It can also use a service address to create and join a Communication Group,
+  which is the TIPC manifestation of a brokerless message bus.
+
+  Multicast with very good performance and scalability is available both in
+  datagram mode and in communication group mode.
+
+- Inter Node Links
+
+  Communication between any two nodes in a cluster is maintained by one or two
+  Inter Node Links, which both guarantee data traffic integrity and monitor
+  the peer node's availability.
+
+- Cluster Scalability
+
+  By applying the Overlapping Ring Monitoring algorithm on the inter node links
+  it is possible to scale TIPC clusters up to 1000 nodes with a maintained
+  neighbor failure discovery time of 1-2 seconds. For smaller clusters this
+  time can be made much shorter.
+
+- Neighbor Discovery
+
+  Neighbor Node Discovery in the cluster is done by Ethernet broadcast or UDP
+  multicast, when any of those services are available. If not, configured peer
+  IP addresses can be used.
+
+- Configuration
+
+  When running TIPC in single node mode no configuration whatsoever is needed.
+  When running in cluster mode TIPC must as a minimum be given a node address
+  (before Linux 4.17) and told which interface to attach to. The "tipc"
+  configuration tool makes is possible to add and maintain many more
+  configuration parameters.
+
+- Performance
+
+  TIPC message transfer latency times are better than in any other known 
protocol.
+  Maximal byte throughput for inter-node connections is still somewhat lower 
than
+  for TCP, while they are superior for intra-node and inter-container 
throughput
+  on the same host.
+
+- Language Support
+
+  The TIPC user API has support for C, Python, Perl, Ruby, D and Go.
+
+More Information
+
+
+- How to set up TIPC:
+
+  http://tipc.io/getting_started.html
+
+- How to program with TIPC:
+
+  http://tipc.io/programming.html
+
+- How to contribute to TIPC:
+
+- http://tipc.io/contacts.html
+
+- More details about TIPC specification:
+
+  http://tipc.io/protocol.html
+
+
+Impl

Re: [tipc-discussion] [PATCH net-next] tipc: keep the skb in rcv queue until the whole data is read

2021-06-29 Thread Xin Long
On Tue, Jun 29, 2021 at 3:57 PM Jon Maloy  wrote:
>
>
> On 28/06/2021 15:16, Xin Long wrote:
> > On Mon, Jun 28, 2021 at 3:03 PM Xin Long  wrote:
> >> On Sun, Jun 27, 2021 at 3:44 PM Erin Shepherd  wrote:
> >>> Xin Long  writes:
> >>>
> >>>> Currently, when userspace reads a datagram with a buffer that is
> >>>> smaller than this datagram, the data will be truncated and only
> >>>> part of it can be received by users. It doesn't seem right that
> >>>> users don't know the datagram size and have to use a huge buffer
> >>>> to read it to avoid the truncation.
> >>>>
> >>>> This patch to fix it by keeping the skb in rcv queue until the
> >>>> whole data is read by users. Only the last msg of the datagram
> >>>> will be marked with MSG_EOR, just as TCP/SCTP does.
> Makes sense to me.
> >>> I agree that the current behavior is suboptimal, but:
> >>>
> >>>   * Isn't this the same behavior that other datagram socket types
> >>> exhibit? It seems like this would make TIPC behave inconsistently
> >>> compared to other transports
> >> Yes, SCTP.
> >> Do you see any reliable datagram transports not doing this?
> >>
> >>>   * Isn't this a compatibility break with existing software? Particularly
> >>> existing software will not expect to receive trailers of overlong
> >>> datagrams
> >> I talked to Jon about this, he seems okay with this.
> >>
> >>> It feels like this behavior should be activated either with a
> >>> setsockopt(2) call or a new MSG_* flag passed to recv
> >> Anyway, It may not be worth a new sockopt.
> >> I'm thinking to pass MSG_EOR into sendmsg:
> >>sendmsg(MSG_EOR).
> > sorry, I meant recvmsg();
>
> Still not sure I understand what you are suggesting here. Do you mean
> that if we add MSG_EOR as a flag  to recvmsg() that means we *don't*
> want the remainder of the message, i.e., it is ok to truncate it?
>
> Or do you mean the opposite?
Yes, Jon, I mean the opposite.

when MSG_EOR is set, we will go with what this patch does,
but to delete MSG_EOR if this is not the last part of the data,
and keep MSG_EOR if this is the last part of the data.

when MSG_EOR is not set, the msg will be truncated as before.

>
> In the first case, we don't solve any compatibility issue, if that is
> the purpose. The programmer still has to add code to get the current
> behavior.
>
> In the latter case we would be on the 100% safe side, although I have a
> real hard time to see that this could be a real issue. Why would anybody
> deliberately design an application for having messages truncated.
>
> ///jon
>
>
> >> to indicate we don't want the truncating msg.
> >>
> >> When the msg flag returns with no MSG_EOR, it means there's more data to 
> >> read.
> >>
> >> Thanks.
> >>> - Erin
> >>>
> >>>> Signed-off-by: Xin Long 
> >>>> ---
> >>>>   net/tipc/socket.c | 30 +-
> >>>>   1 file changed, 21 insertions(+), 9 deletions(-)
> >>>>
> >>>> diff --git a/net/tipc/socket.c b/net/tipc/socket.c
> >>>> index 34a97ea36cc8..504e59838b8b 100644
> >>>> --- a/net/tipc/socket.c
> >>>> +++ b/net/tipc/socket.c
> >>>> @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, 
> >>>> struct msghdr *m,
> >>>>bool connected = !tipc_sk_type_connectionless(sk);
> >>>>struct tipc_sock *tsk = tipc_sk(sk);
> >>>>int rc, err, hlen, dlen, copy;
> >>>> + struct tipc_skb_cb *skb_cb;
> >>>>struct sk_buff_head xmitq;
> >>>>struct tipc_msg *hdr;
> >>>>struct sk_buff *skb;
> >>>> @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, 
> >>>> struct msghdr *m,
> >>>>if (unlikely(rc))
> >>>>goto exit;
> >>>>skb = skb_peek(>sk_receive_queue);
> >>>> + skb_cb = TIPC_SKB_CB(skb);
> >>>>hdr = buf_msg(skb);
> >>>>dlen = msg_data_sz(hdr);
> >>>>hlen = msg_hdr_sz(hdr);
> >>>> @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, 
> >>>> struct msghdr *m,
> >&

Re: [tipc-discussion] [PATCH net-next] tipc: keep the skb in rcv queue until the whole data is read

2021-06-30 Thread Xin Long
On Wed, Jun 30, 2021 at 10:33 AM Jon Maloy  wrote:
>
>
> On 29/06/2021 17:41, Xin Long wrote:
> > On Tue, Jun 29, 2021 at 3:57 PM Jon Maloy  wrote:
> >>
> [...]
> > Yes, Jon, I mean the opposite.
> >
> > when MSG_EOR is set, we will go with what this patch does,
> > but to delete MSG_EOR if this is not the last part of the data,
> > and keep MSG_EOR if this is the last part of the data.
> >
> > when MSG_EOR is not set, the msg will be truncated as before.
>
> Yes, that would be a safe behavior. Is SCTP doing this?
No, SCTP doesn't need to, as it doesn't truncate msg since the beginning.
That's why no compatibility issue was caused.

>
> ///jon
>
> >
> >> In the first case, we don't solve any compatibility issue, if that is
> >> the purpose. The programmer still has to add code to get the current
> >> behavior.
> >>
> >> In the latter case we would be on the 100% safe side, although I have a
> >> real hard time to see that this could be a real issue. Why would anybody
> >> deliberately design an application for having messages truncated.
> >>
> >> ///jon
> >>
> >>
> >>>> to indicate we don't want the truncating msg.
> >>>>
> >>>> When the msg flag returns with no MSG_EOR, it means there's more data to 
> >>>> read.
> >>>>
> >>>> Thanks.
> >>>>> - Erin
> >>>>>
> >>>>>> Signed-off-by: Xin Long 
> >>>>>> ---
> >>>>>>net/tipc/socket.c | 30 +-
> >>>>>>1 file changed, 21 insertions(+), 9 deletions(-)
> >>>>>>
> >>>>>> diff --git a/net/tipc/socket.c b/net/tipc/socket.c
> >>>>>> index 34a97ea36cc8..504e59838b8b 100644
> >>>>>> --- a/net/tipc/socket.c
> >>>>>> +++ b/net/tipc/socket.c
> >>>>>> @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, 
> >>>>>> struct msghdr *m,
> >>>>>> bool connected = !tipc_sk_type_connectionless(sk);
> >>>>>> struct tipc_sock *tsk = tipc_sk(sk);
> >>>>>> int rc, err, hlen, dlen, copy;
> >>>>>> + struct tipc_skb_cb *skb_cb;
> >>>>>> struct sk_buff_head xmitq;
> >>>>>> struct tipc_msg *hdr;
> >>>>>> struct sk_buff *skb;
> >>>>>> @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, 
> >>>>>> struct msghdr *m,
> >>>>>> if (unlikely(rc))
> >>>>>> goto exit;
> >>>>>> skb = skb_peek(>sk_receive_queue);
> >>>>>> + skb_cb = TIPC_SKB_CB(skb);
> >>>>>> hdr = buf_msg(skb);
> >>>>>> dlen = msg_data_sz(hdr);
> >>>>>> hlen = msg_hdr_sz(hdr);
> >>>>>> @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, 
> >>>>>> struct msghdr *m,
> >>>>>>
> >>>>>> /* Capture data if non-error msg, otherwise just set return 
> >>>>>> value */
> >>>>>> if (likely(!err)) {
> >>>>>> - copy = min_t(int, dlen, buflen);
> >>>>>> - if (unlikely(copy != dlen))
> >>>>>> - m->msg_flags |= MSG_TRUNC;
> >>>>>> - rc = skb_copy_datagram_msg(skb, hlen, m, copy);
> >>>>>> + int offset = skb_cb->bytes_read;
> >>>>>> +
> >>>>>> + copy = min_t(int, dlen - offset, buflen);
> >>>>>> + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
> >>>>>> + if (unlikely(rc))
> >>>>>> + goto exit;
> >>>>>> + if (unlikely(offset + copy < dlen)) {
> >>>>>> + if (!(flags & MSG_PEEK))
> >>>>>> + skb_cb->bytes_read = offset + copy;
> >>>>>> + } else {
> >>>>>> + m->msg_flags |= MSG_EOR;
> >>>>>> + skb_cb->b

[tipc-discussion] [PATCH net-next] tipc: keep the skb in rcv queue until the whole data is read

2021-06-25 Thread Xin Long
Currently, when userspace reads a datagram with a buffer that is
smaller than this datagram, the data will be truncated and only
part of it can be received by users. It doesn't seem right that
users don't know the datagram size and have to use a huge buffer
to read it to avoid the truncation.

This patch to fix it by keeping the skb in rcv queue until the
whole data is read by users. Only the last msg of the datagram
will be marked with MSG_EOR, just as TCP/SCTP does.

Signed-off-by: Xin Long 
---
 net/tipc/socket.c | 30 +-
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 34a97ea36cc8..504e59838b8b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
bool connected = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+   struct tipc_skb_cb *skb_cb;
struct sk_buff_head xmitq;
struct tipc_msg *hdr;
struct sk_buff *skb;
@@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
if (unlikely(rc))
goto exit;
skb = skb_peek(>sk_receive_queue);
+   skb_cb = TIPC_SKB_CB(skb);
hdr = buf_msg(skb);
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
@@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
 
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
-   copy = min_t(int, dlen, buflen);
-   if (unlikely(copy != dlen))
-   m->msg_flags |= MSG_TRUNC;
-   rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+   int offset = skb_cb->bytes_read;
+
+   copy = min_t(int, dlen - offset, buflen);
+   rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+   if (unlikely(rc))
+   goto exit;
+   if (unlikely(offset + copy < dlen)) {
+   if (!(flags & MSG_PEEK))
+   skb_cb->bytes_read = offset + copy;
+   } else {
+   m->msg_flags |= MSG_EOR;
+   skb_cb->bytes_read = 0;
+   }
} else {
copy = 0;
rc = 0;
-   if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+   if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
rc = -ECONNRESET;
+   goto exit;
+   }
}
-   if (unlikely(rc))
-   goto exit;
 
/* Mark message as group event if applicable */
if (unlikely(grp_evt)) {
@@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
tipc_node_distr_xmit(sock_net(sk), );
}
 
-   tsk_advance_rx_queue(sk);
+   if (!skb_cb->bytes_read)
+   tsk_advance_rx_queue(sk);
 
-   if (likely(!connected))
+   if (likely(!connected) || skb_cb->bytes_read)
goto exit;
 
/* Send connection flow control advertisement when applicable */
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net-next] tipc: keep the skb in rcv queue until the whole data is read

2021-06-28 Thread Xin Long
On Sun, Jun 27, 2021 at 3:44 PM Erin Shepherd  wrote:
>
> Xin Long  writes:
>
> > Currently, when userspace reads a datagram with a buffer that is
> > smaller than this datagram, the data will be truncated and only
> > part of it can be received by users. It doesn't seem right that
> > users don't know the datagram size and have to use a huge buffer
> > to read it to avoid the truncation.
> >
> > This patch to fix it by keeping the skb in rcv queue until the
> > whole data is read by users. Only the last msg of the datagram
> > will be marked with MSG_EOR, just as TCP/SCTP does.
>
> I agree that the current behavior is suboptimal, but:
>
>  * Isn't this the same behavior that other datagram socket types
>exhibit? It seems like this would make TIPC behave inconsistently
>compared to other transports
Yes, SCTP.
Do you see any reliable datagram transports not doing this?

>  * Isn't this a compatibility break with existing software? Particularly
>existing software will not expect to receive trailers of overlong
>datagrams
I talked to Jon about this, he seems okay with this.

>
> It feels like this behavior should be activated either with a
> setsockopt(2) call or a new MSG_* flag passed to recv
Anyway, It may not be worth a new sockopt.
I'm thinking to pass MSG_EOR into sendmsg:
  sendmsg(MSG_EOR).
to indicate we don't want the truncating msg.

When the msg flag returns with no MSG_EOR, it means there's more data to read.

Thanks.
>
> - Erin
>
> > Signed-off-by: Xin Long 
> > ---
> >  net/tipc/socket.c | 30 +-
> >  1 file changed, 21 insertions(+), 9 deletions(-)
> >
> > diff --git a/net/tipc/socket.c b/net/tipc/socket.c
> > index 34a97ea36cc8..504e59838b8b 100644
> > --- a/net/tipc/socket.c
> > +++ b/net/tipc/socket.c
> > @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
> > msghdr *m,
> >   bool connected = !tipc_sk_type_connectionless(sk);
> >   struct tipc_sock *tsk = tipc_sk(sk);
> >   int rc, err, hlen, dlen, copy;
> > + struct tipc_skb_cb *skb_cb;
> >   struct sk_buff_head xmitq;
> >   struct tipc_msg *hdr;
> >   struct sk_buff *skb;
> > @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
> > msghdr *m,
> >   if (unlikely(rc))
> >   goto exit;
> >   skb = skb_peek(>sk_receive_queue);
> > + skb_cb = TIPC_SKB_CB(skb);
> >   hdr = buf_msg(skb);
> >   dlen = msg_data_sz(hdr);
> >   hlen = msg_hdr_sz(hdr);
> > @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct 
> > msghdr *m,
> >
> >   /* Capture data if non-error msg, otherwise just set return value */
> >   if (likely(!err)) {
> > - copy = min_t(int, dlen, buflen);
> > - if (unlikely(copy != dlen))
> > - m->msg_flags |= MSG_TRUNC;
> > - rc = skb_copy_datagram_msg(skb, hlen, m, copy);
> > + int offset = skb_cb->bytes_read;
> > +
> > + copy = min_t(int, dlen - offset, buflen);
> > + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
> > + if (unlikely(rc))
> > + goto exit;
> > + if (unlikely(offset + copy < dlen)) {
> > + if (!(flags & MSG_PEEK))
> > + skb_cb->bytes_read = offset + copy;
> > + } else {
> > + m->msg_flags |= MSG_EOR;
> > + skb_cb->bytes_read = 0;
> > + }
> >   } else {
> >   copy = 0;
> >   rc = 0;
> > - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
> > + if (err != TIPC_CONN_SHUTDOWN && connected && 
> > !m->msg_control) {
> >   rc = -ECONNRESET;
> > + goto exit;
> > + }
> >   }
> > - if (unlikely(rc))
> > - goto exit;
> >
> >   /* Mark message as group event if applicable */
> >   if (unlikely(grp_evt)) {
> > @@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct 
> > msghdr *m,
> >   tipc_node_distr_xmit(sock_net(sk), );
> >   }
> >
> > - tsk_advance_rx_queue(sk);
> > + if (!skb_cb->bytes_read)
> > + tsk_advance_rx_queue(sk);
> >
> > - if (likely(!connected))
> > + if (likely(!connected) || skb_cb->bytes_read)
> >   goto exit;
> >
> >   /* Send connection flow control advertisement when applicable */
> > --
> > 2.27.0
> >
> >
> >
> > ___
> > tipc-discussion mailing list
> > tipc-discussion@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/tipc-discussion


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net-next] tipc: keep the skb in rcv queue until the whole data is read

2021-06-28 Thread Xin Long
On Mon, Jun 28, 2021 at 3:03 PM Xin Long  wrote:
>
> On Sun, Jun 27, 2021 at 3:44 PM Erin Shepherd  wrote:
> >
> > Xin Long  writes:
> >
> > > Currently, when userspace reads a datagram with a buffer that is
> > > smaller than this datagram, the data will be truncated and only
> > > part of it can be received by users. It doesn't seem right that
> > > users don't know the datagram size and have to use a huge buffer
> > > to read it to avoid the truncation.
> > >
> > > This patch to fix it by keeping the skb in rcv queue until the
> > > whole data is read by users. Only the last msg of the datagram
> > > will be marked with MSG_EOR, just as TCP/SCTP does.
> >
> > I agree that the current behavior is suboptimal, but:
> >
> >  * Isn't this the same behavior that other datagram socket types
> >exhibit? It seems like this would make TIPC behave inconsistently
> >compared to other transports
> Yes, SCTP.
> Do you see any reliable datagram transports not doing this?
>
> >  * Isn't this a compatibility break with existing software? Particularly
> >existing software will not expect to receive trailers of overlong
> >datagrams
> I talked to Jon about this, he seems okay with this.
>
> >
> > It feels like this behavior should be activated either with a
> > setsockopt(2) call or a new MSG_* flag passed to recv
> Anyway, It may not be worth a new sockopt.
> I'm thinking to pass MSG_EOR into sendmsg:
>   sendmsg(MSG_EOR).
sorry, I meant recvmsg();
> to indicate we don't want the truncating msg.
>
> When the msg flag returns with no MSG_EOR, it means there's more data to read.
>
> Thanks.
> >
> > - Erin
> >
> > > Signed-off-by: Xin Long 
> > > ---
> > >  net/tipc/socket.c | 30 +-
> > >  1 file changed, 21 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/net/tipc/socket.c b/net/tipc/socket.c
> > > index 34a97ea36cc8..504e59838b8b 100644
> > > --- a/net/tipc/socket.c
> > > +++ b/net/tipc/socket.c
> > > @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
> > > msghdr *m,
> > >   bool connected = !tipc_sk_type_connectionless(sk);
> > >   struct tipc_sock *tsk = tipc_sk(sk);
> > >   int rc, err, hlen, dlen, copy;
> > > + struct tipc_skb_cb *skb_cb;
> > >   struct sk_buff_head xmitq;
> > >   struct tipc_msg *hdr;
> > >   struct sk_buff *skb;
> > > @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
> > > msghdr *m,
> > >   if (unlikely(rc))
> > >   goto exit;
> > >   skb = skb_peek(>sk_receive_queue);
> > > + skb_cb = TIPC_SKB_CB(skb);
> > >   hdr = buf_msg(skb);
> > >   dlen = msg_data_sz(hdr);
> > >   hlen = msg_hdr_sz(hdr);
> > > @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, 
> > > struct msghdr *m,
> > >
> > >   /* Capture data if non-error msg, otherwise just set return value */
> > >   if (likely(!err)) {
> > > - copy = min_t(int, dlen, buflen);
> > > - if (unlikely(copy != dlen))
> > > - m->msg_flags |= MSG_TRUNC;
> > > - rc = skb_copy_datagram_msg(skb, hlen, m, copy);
> > > + int offset = skb_cb->bytes_read;
> > > +
> > > + copy = min_t(int, dlen - offset, buflen);
> > > + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
> > > + if (unlikely(rc))
> > > + goto exit;
> > > + if (unlikely(offset + copy < dlen)) {
> > > + if (!(flags & MSG_PEEK))
> > > + skb_cb->bytes_read = offset + copy;
> > > + } else {
> > > + m->msg_flags |= MSG_EOR;
> > > + skb_cb->bytes_read = 0;
> > > + }
> > >   } else {
> > >   copy = 0;
> > >   rc = 0;
> > > - if (err != TIPC_CONN_SHUTDOWN && connected && 
> > > !m->msg_control)
> > > + if (err != TIPC_CONN_SHUTDOWN && connected && 
> > > !m->msg_control) {
> > >   rc = -ECONNRESET;
> > > + goto exi

[tipc-discussion] [PATCH net-next 5/8] tipc: add probe recv and state transition

2021-07-06 Thread Xin Long
This patch is to receive and process the probe ack by checking
msg_max_pkt() == l->pl.probe_size then does state transition
in tipc_link_pl_recv().

For the details, see:

  https://lwn.net/Articles/860385/

Signed-off-by: Xin Long 
---
 net/tipc/link.c | 48 
 1 file changed, 48 insertions(+)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 3af6c04f82c2..241c9378e258 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -293,6 +293,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, 
struct tipc_link *r,
 static void tipc_link_update_cwin(struct tipc_link *l, int released,
  bool retransmitted);
 static void tipc_link_pl_send(struct tipc_link *l);
+static void tipc_link_pl_recv(struct tipc_link *l);
 /*
  *  Simple non-static link routines (i.e. referenced outside this file)
  */
@@ -2333,6 +2334,13 @@ static int tipc_link_proto_rcv(struct tipc_link *l, 
struct sk_buff *skb,
break;
}
 
+   if (!reply && msg_max_pkt(hdr) == l->pl.probe_size) {
+   tipc_link_pl_recv(l);
+   if (l->pl.state == TIPC_PL_COMPLETE)
+   break;
+   tipc_link_build_proto_msg(l, STATE_MSG, PROBE_PLPMTU, 
0, 0, 0, 0, xmitq);
+   }
+
/* Receive Gap ACK blocks from peer if any */
glen = tipc_get_gap_ack_blks(, l, hdr, true);
 
@@ -3061,3 +3069,43 @@ static void tipc_link_pl_send(struct tipc_link *l)
}
l->pl.count = TIPC_PROBE_INTERVAL;
 }
+
+static void tipc_link_pl_recv(struct tipc_link *l)
+{
+   pr_debug("%s: PLPMTUD: link: %p, state: %d, pmtu: %d, size: %d, high: 
%d\n",
+__func__, l, l->pl.state, l->pl.pmtu, l->pl.probe_size, 
l->pl.probe_high);
+
+   l->pl.pmtu = l->pl.probe_size;
+   l->pl.count = 0;
+   if (l->pl.state == TIPC_PL_BASE) {
+   l->pl.state = TIPC_PL_SEARCH; /* Base -> Search */
+   l->pl.probe_size += TIPC_PL_BIG_STEP;
+   } else if (l->pl.state == TIPC_PL_ERROR) {
+   l->pl.state = TIPC_PL_SEARCH; /* Error -> Search */
+
+   l->pl.pmtu = l->pl.probe_size;
+   l->mtu = l->pl.pmtu;
+   l->pl.probe_size += TIPC_PL_BIG_STEP;
+   } else if (l->pl.state == TIPC_PL_SEARCH) {
+   if (!l->pl.probe_high) {
+   l->pl.probe_size = min(l->pl.probe_size + 
TIPC_PL_BIG_STEP,
+  TIPC_MAX_PLPMTU);
+   return;
+   }
+   l->pl.probe_size += TIPC_PL_MIN_STEP;
+   if (l->pl.probe_size >= l->pl.probe_high) {
+   l->pl.probe_high = 0;
+   l->pl.raise = 0;
+   l->pl.state = TIPC_PL_COMPLETE; /* Search -> Search 
Complete */
+
+   l->pl.probe_size = l->pl.pmtu;
+   l->mtu = l->pl.pmtu;
+   }
+   } else if (l->pl.state == TIPC_PL_COMPLETE) {
+   l->pl.raise++;
+   if (l->pl.raise == 30) {
+   l->pl.state = TIPC_PL_SEARCH; /* Search Complete -> 
Search */
+   l->pl.probe_size += TIPC_PL_MIN_STEP;
+   }
+   }
+}
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next 6/8] tipc: add offload base

2021-07-06 Thread Xin Long
This is the base code for tipc gso, and tipc_gso_segment() will
only be called after gso packets are built in the next patch.

Signed-off-by: Xin Long 
---
 include/linux/skbuff.h |  2 ++
 net/tipc/Makefile  |  2 +-
 net/tipc/core.c|  3 +++
 net/tipc/msg.h |  2 ++
 net/tipc/offload.c | 29 +
 5 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 net/tipc/offload.c

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b2db9cd9a73f..148bf0ed7336 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -599,6 +599,8 @@ enum {
SKB_GSO_UDP_L4 = 1 << 17,
 
SKB_GSO_FRAGLIST = 1 << 18,
+
+   SKB_GSO_TIPC = 1 << 19,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index ee49a9f1dd4f..ff276bf78d03 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,7 +9,7 @@ tipc-y  += addr.o bcast.o bearer.o \
   core.o link.o discover.o msg.o  \
   name_distr.o  subscr.o monitor.o name_table.o net.o  \
   netlink.o netlink_compat.o node.o socket.o eth_media.o \
-  topsrv.o group.o trace.o
+  topsrv.o group.o trace.o offload.o
 
 CFLAGS_trace.o += -I$(src)
 
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3f4542e0f065..1f59371aa036 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -186,6 +186,8 @@ static int __init tipc_init(void)
if (err)
goto out_netlink_compat;
 
+   tipc_offload_init();
+
pr_info("Started in single node mode\n");
return 0;
 
@@ -210,6 +212,7 @@ static int __init tipc_init(void)
 
 static void __exit tipc_exit(void)
 {
+   tipc_offload_exit();
tipc_netlink_compat_stop();
tipc_netlink_stop();
tipc_bearer_cleanup();
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 64ae4c4c44f8..d6c6231b8208 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1203,6 +1203,8 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
 bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
 struct sk_buff *skb);
 bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
+void tipc_offload_init(void);
+void tipc_offload_exit(void);
 
 static inline u16 buf_seqno(struct sk_buff *skb)
 {
diff --git a/net/tipc/offload.c b/net/tipc/offload.c
new file mode 100644
index ..f8a81c8886f0
--- /dev/null
+++ b/net/tipc/offload.c
@@ -0,0 +1,29 @@
+#include 
+#include 
+#include "msg.h"
+
+static struct sk_buff *tipc_gso_segment(struct sk_buff *skb,
+   netdev_features_t features)
+{
+   if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TIPC))
+   return ERR_PTR(-EINVAL);
+
+   return skb_segment(skb, (features | NETIF_F_HW_CSUM) & ~NETIF_F_SG);
+}
+
+static struct packet_offload tipc_packet_offload __read_mostly = {
+   .type = cpu_to_be16(ETH_P_TIPC),
+   .callbacks = {
+   .gso_segment = tipc_gso_segment,
+   },
+};
+
+void tipc_offload_init(void)
+{
+   dev_add_offload(_packet_offload);
+}
+
+void tipc_offload_exit(void)
+{
+   dev_remove_offload(_packet_offload);
+}
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next 8/8] tipc: add hardware gso

2021-07-06 Thread Xin Long
Since there's no enough bit in netdev_features_t for
NETIF_F_GSO_TIPC_BIT, and tipc is using the simliar
code as sctp, this patch will reuse SKB_GSO_SCTP and
NETIF_F_GSO_SCTP_BIT for tipc.

Signed-off-by: Xin Long 
---
 include/linux/skbuff.h |  2 --
 net/tipc/node.c| 15 ++-
 net/tipc/offload.c |  4 ++--
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 148bf0ed7336..b2db9cd9a73f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -599,8 +599,6 @@ enum {
SKB_GSO_UDP_L4 = 1 << 17,
 
SKB_GSO_FRAGLIST = 1 << 18,
-
-   SKB_GSO_TIPC = 1 << 19,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9947b7dfe1d2..17e59c8dac31 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2068,7 +2068,7 @@ static bool tipc_node_check_state(struct tipc_node *n, 
struct sk_buff *skb,
  * Invoked with no locks held. Bearer pointer must point to a valid bearer
  * structure (i.e. cannot be NULL), but bearer can be inactive.
  */
-void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
+static void __tipc_rcv(struct net *net, struct sk_buff *skb, struct 
tipc_bearer *b)
 {
struct sk_buff_head xmitq;
struct tipc_link_entry *le;
@@ -2189,6 +2189,19 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, 
struct tipc_bearer *b)
kfree_skb(skb);
 }
 
+void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
+{
+   struct sk_buff *seg, *next;
+
+   if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+   return __tipc_rcv(net, skb, b);
+
+   skb_list_walk_safe(skb_shinfo(skb)->frag_list, seg, next)
+   __tipc_rcv(net, seg, b);
+   skb_shinfo(skb)->frag_list = NULL;
+   consume_skb(skb);
+}
+
 void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
  int prop)
 {
diff --git a/net/tipc/offload.c b/net/tipc/offload.c
index d137679f4db0..26e372178635 100644
--- a/net/tipc/offload.c
+++ b/net/tipc/offload.c
@@ -5,7 +5,7 @@
 static struct sk_buff *tipc_gso_segment(struct sk_buff *skb,
netdev_features_t features)
 {
-   if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TIPC))
+   if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
return ERR_PTR(-EINVAL);
 
return skb_segment(skb, (features | NETIF_F_HW_CSUM) & ~NETIF_F_SG);
@@ -39,7 +39,7 @@ bool tipc_msg_gso_append(struct sk_buff **p, struct sk_buff 
*skb, u16 segs)
 
skb_shinfo(nskb)->frag_list = head;
skb_shinfo(nskb)->gso_segs = 1;
-   skb_shinfo(nskb)->gso_type = SKB_GSO_TIPC;
+   skb_shinfo(nskb)->gso_type = SKB_GSO_SCTP;
skb_shinfo(nskb)->gso_size = GSO_BY_FRAGS;
skb_reset_network_header(head);
 
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next 7/8] tipc: add software gso

2021-07-06 Thread Xin Long
TIPC GSO is implemented in the skb frag_list way as SCTP does.

We don't need to change much in the tx path, but only create
a head skb and append the skbs when there are more than one
skb ready to send. In the lower-layer gso_segment(), it does
fragmentation by copy eth header or ip/udp header to each
skb in the head_skb's frag_list and send them one by one.

This supports with both eth media and udp media.

Signed-off-by: Xin Long 
---
 net/tipc/bearer.c| 23 +--
 net/tipc/msg.h   |  1 +
 net/tipc/offload.c   | 41 +
 net/tipc/udp_media.c |  7 +++
 4 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 443f8e5b9477..b0321b21bfdc 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -570,8 +570,9 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
  struct tipc_media_addr *dst,
  struct tipc_node *__dnode)
 {
+   struct sk_buff *head = NULL, *skb, *tmp;
struct tipc_bearer *b;
-   struct sk_buff *skb, *tmp;
+   u16 segs = 0;
 
if (skb_queue_empty(xmitq))
return;
@@ -585,13 +586,31 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
if (likely(test_bit(0, >up) || msg_is_reset(buf_msg(skb {
 #ifdef CONFIG_TIPC_CRYPTO
tipc_crypto_xmit(net, , b, dst, __dnode);
-   if (skb)
+   if (!skb)
+   continue;
 #endif
+   if (!skb->ignore_df) { /* PLPMTUD probe packet*/
b->media->send_msg(net, skb, b, dst);
+   continue;
+   }
+   if (!head) {
+   segs = 1;
+   head = skb;
+   continue;
+   }
+   if (tipc_msg_gso_append(, skb, segs)) {
+   segs++;
+   continue;
+   }
+   b->media->send_msg(net, head, b, dst);
+   segs = 1;
+   head = skb;
} else {
kfree_skb(skb);
}
}
+   if (head)
+   b->media->send_msg(net, head, b, dst);
rcu_read_unlock();
 }
 
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index d6c6231b8208..4d1ff666790c 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1205,6 +1205,7 @@ bool __tipc_skb_queue_sorted(struct sk_buff_head *list, 
u16 seqno,
 bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
 void tipc_offload_init(void);
 void tipc_offload_exit(void);
+bool tipc_msg_gso_append(struct sk_buff **p, struct sk_buff *skb, u16 segs);
 
 static inline u16 buf_seqno(struct sk_buff *skb)
 {
diff --git a/net/tipc/offload.c b/net/tipc/offload.c
index f8a81c8886f0..d137679f4db0 100644
--- a/net/tipc/offload.c
+++ b/net/tipc/offload.c
@@ -18,6 +18,47 @@ static struct packet_offload tipc_packet_offload 
__read_mostly = {
},
 };
 
+bool tipc_msg_gso_append(struct sk_buff **p, struct sk_buff *skb, u16 segs)
+{
+   struct sk_buff *head = *p;
+   struct sk_buff *nskb;
+
+   if (head->len + skb->len >= 65535)
+   return false;
+
+   if (segs == 1) {
+   nskb = tipc_buf_acquire(0, GFP_ATOMIC);
+   if (!nskb)
+   return false;
+
+   nskb->ip_summed = CHECKSUM_UNNECESSARY;
+   nskb->truesize += head->truesize;
+   nskb->data_len += head->len;
+   nskb->len += head->len;
+   TIPC_SKB_CB(nskb)->tail = head;
+
+   skb_shinfo(nskb)->frag_list = head;
+   skb_shinfo(nskb)->gso_segs = 1;
+   skb_shinfo(nskb)->gso_type = SKB_GSO_TIPC;
+   skb_shinfo(nskb)->gso_size = GSO_BY_FRAGS;
+   skb_reset_network_header(head);
+
+   head = nskb;
+   *p = head;
+   }
+
+   head->truesize += skb->truesize;
+   head->data_len += skb->len;
+   head->len += skb->len;
+   TIPC_SKB_CB(head)->tail->next = skb;
+   TIPC_SKB_CB(head)->tail = skb;
+
+   skb_shinfo(head)->gso_segs++;
+   skb_reset_network_header(skb);
+
+   return true;
+}
+
 void tipc_offload_init(void)
 {
dev_add_offload(_packet_offload);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 5078c5b19e81..7da02db6a50e 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -245,6 +245,13 @@ static int tipc_udp_send_msg(struct net *net, struct 
sk_buff *skb,
goto out;
}
 
+   if (skb_is_gso(skb))
+   skb_shinfo(skb)->gso_typ

[tipc-discussion] [PATCH net-next 3/8] tipc: build probe and its reply in tipc_link_build_proto_msg

2021-07-06 Thread Xin Long
This patch is to adjust the code in tipc_link_build_proto_msg()
to make it able to build probe packet with a specific size for
sender, and probe reply packet with mtu set.

Note that to send the probe packet, the df flag has to be set.

Signed-off-by: Xin Long 
---
 net/tipc/link.c  | 38 +++---
 net/tipc/link.h  |  9 +
 net/tipc/msg.c   |  1 +
 net/tipc/udp_media.c |  3 ++-
 4 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1aa775cef3bb..414f9cf543ff 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -273,8 +273,8 @@ static int link_is_up(struct tipc_link *l)
 
 static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
   struct sk_buff_head *xmitq);
-static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool 
probe,
- bool probe_reply, u16 rcvgap,
+static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp,
+ u8 ptype, u32 mtu, u16 rcvgap,
  int tolerance, int priority,
  struct sk_buff_head *xmitq);
 static void link_print(struct tipc_link *l, const char *str);
@@ -900,7 +900,7 @@ int tipc_link_timeout(struct tipc_link *l, struct 
sk_buff_head *xmitq)
}
 
if (state || probe || setup)
-   tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq);
+   tipc_link_build_proto_msg(l, mtyp, PROBE_MSTATE, 0, 0, 0, 0, 
xmitq);
 
return rc;
 }
@@ -1862,8 +1862,8 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff 
*skb,
return rc;
 }
 
-static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool 
probe,
- bool probe_reply, u16 rcvgap,
+static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp,
+ u8 ptype, u32 mtu, u16 rcvgap,
  int tolerance, int priority,
  struct sk_buff_head *xmitq)
 {
@@ -1874,7 +1874,7 @@ static void tipc_link_build_proto_msg(struct tipc_link 
*l, int mtyp, bool probe,
struct sk_buff *skb;
bool node_up = link_is_up(bcl);
u16 glen = 0, bc_rcvgap = 0;
-   int dlen = 0;
+   int dlen = 0, msg_sz;
void *data;
 
/* Don't send protocol message during reset or link failover */
@@ -1884,11 +1884,13 @@ static void tipc_link_build_proto_msg(struct tipc_link 
*l, int mtyp, bool probe,
if (!tipc_link_is_up(l) && (mtyp == STATE_MSG))
return;
 
-   if ((probe || probe_reply) && !skb_queue_empty(dfq))
+   if (ptype && !skb_queue_empty(dfq))
rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
 
-   skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
- tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ,
+   msg_sz = tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ;
+   if (ptype == PROBE_PLPMTU)
+   msg_sz = l->pl.probe_size - INT_H_SIZE;
+   skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, msg_sz,
  l->addr, tipc_own_addr(l->net), 0, 0, 0);
if (!skb)
return;
@@ -1915,13 +1917,19 @@ static void tipc_link_build_proto_msg(struct tipc_link 
*l, int mtyp, bool probe,
msg_set_seq_gap(hdr, rcvgap);
bc_rcvgap = link_bc_rcv_gap(bcl);
msg_set_bc_gap(hdr, bc_rcvgap);
-   msg_set_probe(hdr, probe);
-   msg_set_is_keepalive(hdr, probe || probe_reply);
+   msg_set_probe(hdr, ptype == PROBE_MSTATE || ptype == 
PROBE_PLPMTU);
+   msg_set_is_keepalive(hdr, !!ptype);
+   if (ptype == PROBE_REPLY)
+   msg_set_max_pkt(hdr, mtu);
if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
glen = tipc_build_gap_ack_blks(l, hdr);
tipc_mon_prep(l->net, data + glen, , mstate, l->bearer_id);
-   msg_set_size(hdr, INT_H_SIZE + glen + dlen);
-   skb_trim(skb, INT_H_SIZE + glen + dlen);
+   if (ptype != PROBE_PLPMTU) {
+   msg_set_size(hdr, INT_H_SIZE + glen + dlen);
+   skb_trim(skb, INT_H_SIZE + glen + dlen);
+   } else {
+   skb->ignore_df = 0;
+   }
l->stats.sent_states++;
l->rcv_unacked = 0;
} else {
@@ -1935,7 +1943,7 @@ static void tipc_link_build_proto_msg(struct tipc_link 
*l, int mtyp, bool probe,
msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME);
}
-   if (probe)
+   if (ptype == PROBE_MSTATE || ptype == PROBE_PLPMTU)

[tipc-discussion] [PATCH net-next 0/8] tipc: add PLPMTUD probe and GSO offload

2021-07-06 Thread Xin Long
This patchset is to implement PLPMTUD and GSO for TIPC,
Patch 1-5 are for PLPMTUD while 6-8 are for GSO.

It gets some ideas from SCTP as their similarities like
both are reliable datagram packets and possible to run
over IP(v6)/UDP. But also it does some adjustments for
TIPC.

Xin Long (8):
  tipc: set the mtu for bearer properly for udp media
  tipc: add the constants and variables for plpmtud
  tipc: build probe and its reply in tipc_link_build_proto_msg
  tipc: add probe send and state transition
  tipc: add probe recv and state transition
  tipc: add offload base
  tipc: add software gso
  tipc: add hardware gso

 include/uapi/linux/tipc_config.h |   6 --
 net/tipc/Makefile|   2 +-
 net/tipc/bearer.c|  23 -
 net/tipc/core.c  |   3 +
 net/tipc/link.c  | 147 +++
 net/tipc/link.h  |  29 ++
 net/tipc/msg.c   |   1 +
 net/tipc/msg.h   |   3 +
 net/tipc/node.c  |  15 +++-
 net/tipc/offload.c   |  70 +++
 net/tipc/udp_media.c |  18 ++--
 11 files changed, 287 insertions(+), 30 deletions(-)
 create mode 100644 net/tipc/offload.c

-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next 4/8] tipc: add probe send and state transition

2021-07-06 Thread Xin Long
pl.count will make a timer that 'timeout' every after '10 * node timer
interval', where it does state transition in tipc_link_pl_send() and
sends probe in tipc_link_build_proto_msg().

For the details, see:

  https://lwn.net/Articles/860385/

Signed-off-by: Xin Long 
---
 net/tipc/link.c | 48 
 1 file changed, 48 insertions(+)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 414f9cf543ff..3af6c04f82c2 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -292,6 +292,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, 
struct tipc_link *r,
 bool *retransmitted, int *rc);
 static void tipc_link_update_cwin(struct tipc_link *l, int released,
  bool retransmitted);
+static void tipc_link_pl_send(struct tipc_link *l);
 /*
  *  Simple non-static link routines (i.e. referenced outside this file)
  */
@@ -902,6 +903,14 @@ int tipc_link_timeout(struct tipc_link *l, struct 
sk_buff_head *xmitq)
if (state || probe || setup)
tipc_link_build_proto_msg(l, mtyp, PROBE_MSTATE, 0, 0, 0, 0, 
xmitq);
 
+   if (probe && tipc_link_is_up(l)) {
+   l->pl.count++;
+   if (!(l->pl.count % TIPC_PROBE_INTERVAL)) {
+   tipc_link_pl_send(l);
+   tipc_link_build_proto_msg(l, mtyp, PROBE_PLPMTU, 0, 0, 
0, 0, xmitq);
+   }
+   }
+
return rc;
 }
 
@@ -3013,3 +3022,42 @@ int tipc_link_dump(struct tipc_link *l, u16 dqueues, 
char *buf)
 
return i;
 }
+
+static void tipc_link_pl_send(struct tipc_link *l)
+{
+   pr_debug("%s: PLPMTUD: link: %p, state: %d, pmtu: %d, size: %d, high: 
%d\n",
+__func__, l, l->pl.state, l->pl.pmtu, l->pl.probe_size, 
l->pl.probe_high);
+
+   if (l->pl.count <= TIPC_MAX_PROBES * TIPC_PROBE_INTERVAL)
+   return;
+
+   if (l->pl.state == TIPC_PL_BASE) {
+   if (l->pl.probe_size == TIPC_BASE_PLPMTU) { /* BASE_PLPMTU 
Confirmation Failed */
+   l->pl.state = TIPC_PL_ERROR; /* Base -> Error */
+
+   l->pl.pmtu = TIPC_MIN_PLPMTU;
+   l->mtu = l->pl.pmtu;
+   }
+   } else if (l->pl.state == TIPC_PL_SEARCH) {
+   if (l->pl.pmtu == l->pl.probe_size) { /* Black Hole Detected */
+   l->pl.state = TIPC_PL_BASE;  /* Search -> Base */
+   l->pl.probe_size = TIPC_BASE_PLPMTU;
+   l->pl.probe_high = 0;
+
+   l->pl.pmtu = TIPC_BASE_PLPMTU;
+   l->mtu = l->pl.pmtu;
+   } else { /* Normal probe failure. */
+   l->pl.probe_high = l->pl.probe_size;
+   l->pl.probe_size = l->pl.pmtu;
+   }
+   } else if (l->pl.state == TIPC_PL_COMPLETE) {
+   if (l->pl.pmtu == l->pl.probe_size) { /* Black Hole Detected */
+   l->pl.state = TIPC_PL_BASE;  /* Search Complete -> Base 
*/
+   l->pl.probe_size = TIPC_BASE_PLPMTU;
+
+   l->pl.pmtu = TIPC_BASE_PLPMTU;
+   l->mtu = l->pl.pmtu;
+   }
+   }
+   l->pl.count = TIPC_PROBE_INTERVAL;
+}
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next 2/8] tipc: add the constants and variables for plpmtud

2021-07-06 Thread Xin Long
These are 4 constants described in rfc8899#section-5.1.2:

  MAX_PROBES, MIN_PLPMTU, MAX_PLPMTU, BASE_PLPMTU;

And 2 variables described in rfc8899#section-5.1.3:

  PROBED_SIZE, PROBE_COUNT;

And 5 states described in rfc8899#section-5.2:

  DISABLED, BASE, SEARCH, SEARCH_COMPLETE, ERROR;

'count' and 'raise' are used for two timers' counting:
PROBE_TIMER and PMTU_RAISE_TIMER. 'probe_high' is used
for finding the optimal value for pmtu.

Signed-off-by: Xin Long 
---
 net/tipc/link.c | 13 +
 net/tipc/link.h | 20 
 2 files changed, 33 insertions(+)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index cf586840caeb..1aa775cef3bb 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -182,6 +182,14 @@ struct tipc_link {
/* Max packet negotiation */
u16 mtu;
u16 advertised_mtu;
+   struct {
+   u16 probe_size;
+   u16 probe_high;
+   u16 pmtu;
+   u8 count;
+   u8 state:3;
+   u8 raise:5;
+   } pl;
 
/* Sending */
struct sk_buff_head transmq;
@@ -984,6 +992,11 @@ void tipc_link_reset(struct tipc_link *l)
l->peer_session--;
l->session++;
l->mtu = l->advertised_mtu;
+   l->pl.state = TIPC_PL_BASE;
+   l->pl.pmtu = TIPC_BASE_PLPMTU;
+   l->pl.probe_size = TIPC_BASE_PLPMTU;
+   l->pl.count = 0;
+   l->pl.probe_high = 0;
 
spin_lock_bh(>wakeupq.lock);
skb_queue_splice_init(>wakeupq, );
diff --git a/net/tipc/link.h b/net/tipc/link.h
index a16f401fdabd..30bee2562987 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -66,6 +66,26 @@ enum {
TIPC_LINK_SND_STATE= (1 << 2)
 };
 
+/* PLPMTUD state
+ */
+enum {
+   TIPC_PL_DISABLED,
+   TIPC_PL_BASE,
+   TIPC_PL_SEARCH,
+   TIPC_PL_COMPLETE,
+   TIPC_PL_ERROR,
+};
+
+#defineTIPC_BASE_PLPMTU1200
+#defineTIPC_MAX_PLPMTU 9000
+#defineTIPC_MIN_PLPMTU 512
+
+#defineTIPC_MAX_PROBES 3
+#defineTIPC_PROBE_INTERVAL 10
+
+#defineTIPC_PL_BIG_STEP32
+#defineTIPC_PL_MIN_STEP4
+
 /* Starting value for maximum packet size negotiation on unicast links
  * (unless bearer MTU is less)
  */
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next 1/8] tipc: set the mtu for bearer properly for udp media

2021-07-06 Thread Xin Long
Instead of using 14000 for ipv4/udp mtu, and 1280 for ipv6/udp
mtu, this patch to set mtu according to the lower device's mtu
at the beginning. The pmtu will be determined by the PLPMTUD
probe in the following patches.

Signed-off-by: Xin Long 
---
 include/uapi/linux/tipc_config.h | 6 --
 net/tipc/udp_media.c | 8 
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 4dfc05651c98..7e23b7f438b4 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -185,12 +185,6 @@
 #define TIPC_DEF_LINK_WIN 50
 #define TIPC_MAX_LINK_WIN 8191
 
-/*
- * Default MTU for UDP media
- */
-
-#define TIPC_DEF_LINK_UDP_MTU 14000
-
 struct tipc_node_info {
__be32 addr;/* network address of node */
__be32 up;  /* 0=down, 1= up */
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index c2bb818704c8..dc4bae965549 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -661,7 +661,7 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct 
nlattr *attr)
 static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
   struct nlattr *attrs[])
 {
-   int err = -EINVAL;
+   int err = -EINVAL, hlen;
struct udp_bearer *ub;
struct udp_media_addr remote = {0};
struct udp_media_addr local = {0};
@@ -743,7 +743,7 @@ static int tipc_udp_enable(struct net *net, struct 
tipc_bearer *b,
err = -EINVAL;
goto err;
}
-   b->mtu = b->media->mtu;
+   hlen = sizeof(struct iphdr);
 #if IS_ENABLED(CONFIG_IPV6)
} else if (local.proto == htons(ETH_P_IPV6)) {
dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL;
@@ -760,12 +760,13 @@ static int tipc_udp_enable(struct net *net, struct 
tipc_bearer *b,
else
udp_conf.local_ip6 = local.ipv6;
ub->ifindex = dev->ifindex;
-   b->mtu = 1280;
+   hlen = sizeof(struct ipv6hdr);
 #endif
} else {
err = -EAFNOSUPPORT;
goto err;
}
+   b->mtu = b->media->mtu ?: dev->mtu - hlen - sizeof(struct udphdr);
udp_conf.local_udp_port = local.port;
err = udp_sock_create(net, _conf, >ubsock);
if (err)
@@ -851,7 +852,6 @@ struct tipc_media udp_media_info = {
.tolerance  = TIPC_DEF_LINK_TOL,
.min_win= TIPC_DEF_LINK_WIN,
.max_win= TIPC_DEF_LINK_WIN,
-   .mtu= TIPC_DEF_LINK_UDP_MTU,
.type_id= TIPC_MEDIA_TYPE_UDP,
.hwaddr_len = 0,
.name   = "udp"
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: fix implicit-connect for SYN+

2021-07-10 Thread Xin Long
For implicit-connect, when it's either SYN- or SYN+, an ACK should
be sent back to the client immediately. It's not appropriate for
the client to enter established state only after receiving data
from the server.

On client side, after the SYN is sent out, tipc_wait_for_connect()
should be called to wait for the ACK if timeout is set.

This patch also restricts __tipc_sendstream() to call __sendmsg()
only when it's in TIPC_OPEN state, so that the client can program
in a single loop doing both connecting and data sending like:

  for (...)
  sendmsg(dest, buf);

This makes the implicit-connect more implicit.

Fixes: b97bf3fd8f6a ("[TIPC] Initial merge")
Signed-off-by: Xin Long 
---
 net/tipc/socket.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 34a97ea36cc8..ebd300c26a44 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk);
 static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t 
dsz);
 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
 static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
 
 static const struct proto_ops packet_ops;
 static const struct proto_ops stream_ops;
@@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct 
msghdr *m, size_t dlen)
rc = 0;
}
 
-   if (unlikely(syn && !rc))
+   if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
+   if (timeout) {
+   timeout = msecs_to_jiffies(timeout);
+   tipc_wait_for_connect(sock, );
+   }
+   }
 
return rc ? rc : dlen;
 }
@@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct 
msghdr *m, size_t dlen)
return -EMSGSIZE;
 
/* Handle implicit connection setup */
-   if (unlikely(dest)) {
+   if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
rc = __tipc_sendmsg(sock, m, dlen);
if (dlen && dlen == rc) {
tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
@@ -2689,9 +2695,10 @@ static int tipc_accept(struct socket *sock, struct 
socket *new_sock, int flags,
   bool kern)
 {
struct sock *new_sk, *sk = sock->sk;
-   struct sk_buff *buf;
struct tipc_sock *new_tsock;
+   struct msghdr m = {NULL,};
struct tipc_msg *msg;
+   struct sk_buff *buf;
long timeo;
int res;
 
@@ -2737,19 +2744,17 @@ static int tipc_accept(struct socket *sock, struct 
socket *new_sock, int flags,
}
 
/*
-* Respond to 'SYN-' by discarding it & returning 'ACK'-.
-* Respond to 'SYN+' by queuing it on new socket.
+* Respond to 'SYN-' by discarding it & returning 'ACK'.
+* Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
 */
if (!msg_data_sz(msg)) {
-   struct msghdr m = {NULL,};
-
tsk_advance_rx_queue(sk);
-   __tipc_sendstream(new_sock, , 0);
} else {
__skb_dequeue(>sk_receive_queue);
__skb_queue_head(_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+   __tipc_sendstream(new_sock, , 0);
release_sock(new_sk);
 exit:
release_sock(sk);
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCHv2 net] tipc: fix a race in tipc_sk_mcast_rcv

2021-04-29 Thread Xin Long
After commit cb1b728096f5 ("tipc: eliminate race condition at multicast
reception"), when processing the multicast reception, the packets are
firstly moved from be->inputq1 to be->arrvq in tipc_node_broadcast(),
then it processes be->arrvq in tipc_sk_mcast_rcv().

In tipc_sk_mcast_rcv(), it gets the 1st skb by skb_peek(), then handles
this skb without any lock. It means meanwhile another thread could also
call tipc_sk_mcast_rcv() and process be->arrvq and pick up the same skb,
then free it. A double free issue will be caused as Li Shuang reported:

  [] kernel BUG at mm/slub.c:305!
  []  kfree+0x3a7/0x3d0
  []  kfree_skb+0x32/0xa0
  []  skb_release_data+0xb4/0x170
  []  kfree_skb+0x32/0xa0
  []  skb_release_data+0xb4/0x170
  []  kfree_skb+0x32/0xa0
  []  tipc_sk_mcast_rcv+0x1fa/0x380 [tipc]
  []  tipc_rcv+0x411/0x1120 [tipc]
  []  tipc_udp_recv+0xc6/0x1e0 [tipc]
  []  udp_queue_rcv_one_skb+0x1a9/0x500
  []  udp_unicast_rcv_skb.isra.66+0x75/0x90
  []  __udp4_lib_rcv+0x537/0xc40
  []  ip_protocol_deliver_rcu+0xdf/0x1d0
  []  ip_local_deliver_finish+0x4a/0x50
  []  ip_local_deliver+0x6b/0xe0
  []  ip_rcv+0x27b/0x36a
  []  __netif_receive_skb_core+0xb47/0xc40
  []  process_backlog+0xae/0x160

Commit 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
tried to fix this double free by not releasing the skbs in be->arrvq,
which would definitely cause the skbs' leak.

The problem is we shouldn't process the skbs in be->arrvq without any
lock to protect the code from peeking to dequeuing them. The fix here
is to use a temp skb list instead of be->arrvq to make it "per thread
safe". While at it, remove the no-longer-used be->arrvq.

v1->v2:
  - remove the no-longer-used tipc_skb_peek() and some comments from
tipc_sk_mcast_rcv() as Tung noticed.

Fixes: cb1b728096f5 ("tipc: eliminate race condition at multicast reception")
Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
Reported-by: Li Shuang 
Signed-off-by: Xin Long 
---
 net/tipc/msg.h| 17 -
 net/tipc/node.c   |  9 -
 net/tipc/socket.c | 17 +++--
 3 files changed, 7 insertions(+), 36 deletions(-)

diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5d64596..7914358 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1213,23 +1213,6 @@ static inline int buf_roundup_len(struct sk_buff *skb)
return (skb->len / 1024 + 1) * 1024;
 }
 
-/* tipc_skb_peek(): peek and reserve first buffer in list
- * @list: list to be peeked in
- * Returns pointer to first buffer in list, if any
- */
-static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list,
-   spinlock_t *lock)
-{
-   struct sk_buff *skb;
-
-   spin_lock_bh(lock);
-   skb = skb_peek(list);
-   if (skb)
-   skb_get(skb);
-   spin_unlock_bh(lock);
-   return skb;
-}
-
 /* tipc_skb_peek_port(): find a destination port, ignoring all destinations
  *   up to and including 'filter'.
  * Note: ignoring previously tried destinations minimizes the risk of
diff --git a/net/tipc/node.c b/net/tipc/node.c
index e0ee832..0c636fb 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -72,7 +72,6 @@ struct tipc_link_entry {
 struct tipc_bclink_entry {
struct tipc_link *link;
struct sk_buff_head inputq1;
-   struct sk_buff_head arrvq;
struct sk_buff_head inputq2;
struct sk_buff_head namedq;
u16 named_rcv_nxt;
@@ -552,7 +551,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 
addr, u8 *peer_id,
INIT_LIST_HEAD(>conn_sks);
skb_queue_head_init(>bc_entry.namedq);
skb_queue_head_init(>bc_entry.inputq1);
-   __skb_queue_head_init(>bc_entry.arrvq);
skb_queue_head_init(>bc_entry.inputq2);
for (i = 0; i < MAX_BEARERS; i++)
spin_lock_init(>links[i].lock);
@@ -1803,14 +1801,15 @@ void tipc_node_broadcast(struct net *net, struct 
sk_buff *skb, int rc_dests)
 static void tipc_node_mcast_rcv(struct tipc_node *n)
 {
struct tipc_bclink_entry *be = >bc_entry;
+   struct sk_buff_head tmpq;
 
-   /* 'arrvq' is under inputq2's lock protection */
+   __skb_queue_head_init();
spin_lock_bh(>inputq2.lock);
spin_lock_bh(>inputq1.lock);
-   skb_queue_splice_tail_init(>inputq1, >arrvq);
+   skb_queue_splice_tail_init(>inputq1, );
spin_unlock_bh(>inputq1.lock);
spin_unlock_bh(>inputq2.lock);
-   tipc_sk_mcast_rcv(n->net, >arrvq, >inputq2);
+   tipc_sk_mcast_rcv(n->net, , >inputq2);
 }
 
 static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 022999e..cfd30fa 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1210,8 +1210,7 @@ void tipc_sk_mcast_rcv(struct 

[tipc-discussion] [PATCH net] tipc: fix a race in tipc_sk_mcast_rcv

2021-04-28 Thread Xin Long
After commit cb1b728096f5 ("tipc: eliminate race condition at multicast
reception"), when processing the multicast reception, the packets are
firstly moved from be->inputq1 to be->arrvq in tipc_node_broadcast(),
then process be->arrvq in tipc_sk_mcast_rcv().

In tipc_sk_mcast_rcv(), it gets the 1st skb by skb_peek(), then process
this skb without any lock. It means meanwhile another thread could also
call tipc_sk_mcast_rcv() and process be->arrvq and pick up the same skb,
then free it. A double free issue will be caused as Li Shuang reported:

  [] kernel BUG at mm/slub.c:305!
  []  kfree+0x3a7/0x3d0
  []  kfree_skb+0x32/0xa0
  []  skb_release_data+0xb4/0x170
  []  kfree_skb+0x32/0xa0
  []  skb_release_data+0xb4/0x170
  []  kfree_skb+0x32/0xa0
  []  tipc_sk_mcast_rcv+0x1fa/0x380 [tipc]
  []  tipc_rcv+0x411/0x1120 [tipc]
  []  tipc_udp_recv+0xc6/0x1e0 [tipc]
  []  udp_queue_rcv_one_skb+0x1a9/0x500
  []  udp_unicast_rcv_skb.isra.66+0x75/0x90
  []  __udp4_lib_rcv+0x537/0xc40
  []  ip_protocol_deliver_rcu+0xdf/0x1d0
  []  ip_local_deliver_finish+0x4a/0x50
  []  ip_local_deliver+0x6b/0xe0
  []  ip_rcv+0x27b/0x36a
  []  __netif_receive_skb_core+0xb47/0xc40
  []  process_backlog+0xae/0x160

Commit 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
tried to fix this double free by not releasing the skbs in be->arrvq,
which would definitely cause the skbs' leak.

The problem is we shouldn't process the skbs in be->arrvq without any
lock to protect the code from peeking to dequeuing them. The fix here
is to use a temp skb list instead of be->arrvq to make it "per thread
safe". While at it, remove the no-longer-used be->arrvq.

Fixes: cb1b728096f5 ("tipc: eliminate race condition at multicast reception")
Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
Reported-by: Li Shuang 
Signed-off-by: Xin Long 
---
 net/tipc/node.c   |  9 -
 net/tipc/socket.c | 16 +++-
 2 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index e0ee832..0c636fb 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -72,7 +72,6 @@ struct tipc_link_entry {
 struct tipc_bclink_entry {
struct tipc_link *link;
struct sk_buff_head inputq1;
-   struct sk_buff_head arrvq;
struct sk_buff_head inputq2;
struct sk_buff_head namedq;
u16 named_rcv_nxt;
@@ -552,7 +551,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 
addr, u8 *peer_id,
INIT_LIST_HEAD(>conn_sks);
skb_queue_head_init(>bc_entry.namedq);
skb_queue_head_init(>bc_entry.inputq1);
-   __skb_queue_head_init(>bc_entry.arrvq);
skb_queue_head_init(>bc_entry.inputq2);
for (i = 0; i < MAX_BEARERS; i++)
spin_lock_init(>links[i].lock);
@@ -1803,14 +1801,15 @@ void tipc_node_broadcast(struct net *net, struct 
sk_buff *skb, int rc_dests)
 static void tipc_node_mcast_rcv(struct tipc_node *n)
 {
struct tipc_bclink_entry *be = >bc_entry;
+   struct sk_buff_head tmpq;
 
-   /* 'arrvq' is under inputq2's lock protection */
+   __skb_queue_head_init();
spin_lock_bh(>inputq2.lock);
spin_lock_bh(>inputq1.lock);
-   skb_queue_splice_tail_init(>inputq1, >arrvq);
+   skb_queue_splice_tail_init(>inputq1, );
spin_unlock_bh(>inputq1.lock);
spin_unlock_bh(>inputq2.lock);
-   tipc_sk_mcast_rcv(n->net, >arrvq, >inputq2);
+   tipc_sk_mcast_rcv(n->net, , >inputq2);
 }
 
 static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 022999e..2870798 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1210,8 +1210,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct 
sk_buff_head *arrvq,
__skb_queue_head_init();
INIT_LIST_HEAD();
 
-   skb = tipc_skb_peek(arrvq, >lock);
-   for (; skb; skb = tipc_skb_peek(arrvq, >lock)) {
+   while ((skb = __skb_dequeue(arrvq)) != NULL) {
hdr = buf_msg(skb);
user = msg_user(hdr);
mtyp = msg_type(hdr);
@@ -1220,13 +1219,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct 
sk_buff_head *arrvq,
type = msg_nametype(hdr);
 
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
-   spin_lock_bh(>lock);
-   if (skb_peek(arrvq) == skb) {
-   __skb_dequeue(arrvq);
-   __skb_queue_tail(inputq, skb);
-   }
-   kfree_skb(skb);
-   spin_unlock_bh(>lock);
+   skb_queue_tail(inputq, skb);
continue;
}
 
@@ -1263,10 +1256,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct 
sk

[tipc-discussion] [PATCH net] tipc: increment the tmp aead refcnt before attaching it

2021-04-05 Thread Xin Long
Li Shuang found a NULL pointer dereference crash in her testing:

  [] BUG: unable to handle kernel NULL pointer dereference at 0020
  [] RIP: 0010:tipc_crypto_rcv_complete+0xc8/0x7e0 [tipc]
  [] Call Trace:
  []  
  []  tipc_crypto_rcv+0x2d9/0x8f0 [tipc]
  []  tipc_rcv+0x2fc/0x1120 [tipc]
  []  tipc_udp_recv+0xc6/0x1e0 [tipc]
  []  udpv6_queue_rcv_one_skb+0x16a/0x460
  []  udp6_unicast_rcv_skb.isra.35+0x41/0xa0
  []  ip6_protocol_deliver_rcu+0x23b/0x4c0
  []  ip6_input+0x3d/0xb0
  []  ipv6_rcv+0x395/0x510
  []  __netif_receive_skb_core+0x5fc/0xc40

This is caused by NULL returned by tipc_aead_get(), and then crashed when
dereferencing it later in tipc_crypto_rcv_complete(). This might happen
when tipc_crypto_rcv_complete() is called by two threads at the same time:
the tmp attached by tipc_crypto_key_attach() in one thread may be released
by the one attached by that in the other thread.

This patch is to fix it by incrementing the tmp's refcnt before attaching
it instead of calling tipc_aead_get() after attaching it.

Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication")
Reported-by: Li Shuang 
Signed-off-by: Xin Long 
---
 net/tipc/crypto.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index f4fca8f..97710ce 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1941,12 +1941,13 @@ static void tipc_crypto_rcv_complete(struct net *net, 
struct tipc_aead *aead,
goto rcv;
if (tipc_aead_clone(, aead) < 0)
goto rcv;
+   WARN_ON(!refcount_inc_not_zero(>refcnt));
if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) {
tipc_aead_free(>rcu);
goto rcv;
}
tipc_aead_put(aead);
-   aead = tipc_aead_get(tmp);
+   aead = tmp;
}
 
if (unlikely(err)) {
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net-next v2 00/16] tipc: cleanups and simplifications

2021-03-15 Thread Xin Long
On Sun, Feb 28, 2021 at 12:56 AM  wrote:
>
> From: Jon Maloy 
>
> We make a number of simplifications and cleanups, especially to call 
> signatures
> in the binding table. This makes the code easier to understand and serves as a
> preparation for an upcoming functional addition.
>
> v2:
>- Eliminated sparse warnings, as per feedback from Hoang
>- Fixed some typos, as per feedback from Tung
>- Fixed some issues noted by Xin:
>  #2/16: Moved spinlock.
>  #4/16: Added scope check and used tipc_uaddr() in tipc_sk_join()
>  #5/16: Eliminated 'addrtype' from struct publication (for now).
>  #6/16: 'node number' represented as decimal in some places and
> hex in others. I will fix that in a separate patch later.
>  #8/16: Re-introduce use of node2scope(). This function is needed
> later.
>  #13/16: This change does not look motivated right now, as stated
>  in the commit log, but will be needed later. I kept it,
>  just because of the consequences of removing it now and
>  having to re-introduce it later.
>  #14/16: Made tipc_sub_check_overlap() static, as also reported
>  by sparse.
>
> Jon Maloy (16):
>   tipc: re-organize members of struct publication
>   tipc: move creation of publication item one level up in call chain
>   tipc: introduce new unified address type for internal use
>   tipc: simplify signature of tipc_namtbl_publish()
>   tipc: simplify call signatures for publication creation
>   tipc: simplify signature of tipc_nametbl_withdraw() functions
>   tipc: rename binding table lookup functions
>   tipc: refactor tipc_sendmsg() and tipc_lookup_anycast()
>   tipc: simplify signature of tipc_namtbl_lookup_mcast_sockets()
>   tipc: simplify signature of tipc_nametbl_lookup_mcast_nodes()
>   tipc: simplify signature of tipc_nametbl_lookup_group()
>   tipc: simplify signature of tipc_service_find_range()
>   tipc: simplify signature of tipc_find_service()
>   tipc: simplify api between binding table and topology server
>   tipc: add host-endian copy of user subscription to struct
> tipc_subscription
>   tipc: remove some unnecessary warnings
>
>  net/tipc/addr.h   |  44 +
>  net/tipc/msg.c|  23 ++-
>  net/tipc/name_distr.c |  91 +
>  net/tipc/name_table.c | 426 +-
>  net/tipc/name_table.h |  63 +++
>  net/tipc/net.c|   8 +-
>  net/tipc/node.c   |  28 +--
>  net/tipc/socket.c | 319 ---
>  net/tipc/subscr.c |  84 +
>  net/tipc/subscr.h |  12 +-
>  10 files changed, 573 insertions(+), 525 deletions(-)
>
>
>
> _______
> tipc-discussion mailing list
> tipc-discussion@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Acked-by: Xin Long 


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net-next 05/16] tipc: simplify call signatures for publication creation

2021-02-24 Thread Xin Long
On Wed, Dec 9, 2020 at 2:51 AM  wrote:
>
> From: Jon Maloy 
>
> We simplify the call signatures for tipc_nametbl_insert_publ() and
> tipc_publ_create() so that fewer parameters are passed around.
>
> Signed-off-by: Jon Maloy 
> ---
>  net/tipc/name_distr.c | 23 
>  net/tipc/name_table.c | 61 +++
>  net/tipc/name_table.h | 10 ---
>  net/tipc/socket.c |  8 ++
>  4 files changed, 47 insertions(+), 55 deletions(-)
>
> diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
> index 721d2fca3d6f..df42fc2b4536 100644
> --- a/net/tipc/name_distr.c
> +++ b/net/tipc/name_distr.c
> @@ -293,30 +293,31 @@ static bool tipc_update_nametbl(struct net *net, struct 
> distr_item *i,
> u32 node, u32 dtype)
>  {
> struct publication *p = NULL;
> -   u32 lower = ntohl(i->lower);
> -   u32 upper = ntohl(i->upper);
> -   u32 type = ntohl(i->type);
> -   u32 port = ntohl(i->port);
> +   struct tipc_socket_addr sk;
> +   struct tipc_uaddr ua;
> u32 key = ntohl(i->key);
>
> +   tipc_uaddr(, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
> +  ntohl(i->type), ntohl(i->lower), ntohl(i->upper));
> +   sk.ref = ntohl(i->port);
> +   sk.node = node;
> +
> if (dtype == PUBLICATION) {
> -   p = tipc_nametbl_insert_publ(net, type, lower, upper,
> -TIPC_CLUSTER_SCOPE, node,
> -port, key);
> +   p = tipc_nametbl_insert_publ(net, , , key);
> if (p) {
> tipc_node_subscribe(net, >binding_node, node);
> return true;
> }
> } else if (dtype == WITHDRAWAL) {
> -   p = tipc_nametbl_remove_publ(net, type, lower,
> -upper, node, key);
> +   p = tipc_nametbl_remove_publ(net, ua.sr.type, ua.sr.lower,
> +ua.sr.upper, node, key);
> if (p) {
> tipc_node_unsubscribe(net, >binding_node, node);
> kfree_rcu(p, rcu);
> return true;
> }
> -   pr_warn_ratelimited("Failed to remove binding %u,%u from 
> %x\n",
> -   type, lower, node);
> +   pr_warn_ratelimited("Failed to remove binding %u,%u from 
> %u\n",
> +   ua.sr.type, ua.sr.lower, node);
> } else {
> pr_warn("Unrecognized name table message received\n");
> }
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index d951e9345122..ba96d5fc57f3 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -222,36 +222,30 @@ static int hash(int x)
>
>  /**
>   * tipc_publ_create - create a publication structure
> - * @type: name sequence type
> - * @lower: name sequence lower bound
> - * @upper: name sequence upper bound
> - * @scope: publication scope
> - * @node: network address of publishing socket
> - * @port: publishing port
> + * @ua: the service range the user is binding to
> + * @sk: the address of the socket thatis bound
>   * @key: publication key
>   */
> -static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
> -   u32 scope, u32 node, u32 port,
> +static struct publication *tipc_publ_create(struct tipc_uaddr *ua,
> +   struct tipc_socket_addr *sk,
> u32 key)
>  {
> -   struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
> +   struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC);
>
> -   if (!publ)
> +   if (!p)
> return NULL;
>
> -   publ->sr.type = type;
> -   publ->sr.lower = lower;
> -   publ->sr.upper = upper;
> -   publ->scope = scope;
> -   publ->sk.node = node;
> -   publ->sk.ref = port;
> -   publ->key = key;
> -   INIT_LIST_HEAD(>binding_sock);
> -   INIT_LIST_HEAD(>binding_node);
> -   INIT_LIST_HEAD(>local_publ);
> -   INIT_LIST_HEAD(>all_publ);
> -   INIT_LIST_HEAD(>list);
> -   return publ;
> +   p->sr = ua->sr;
> +   p->sk = *sk;
> +   p->addrtype = ua->addrtype;
> +   p->scope = ua->scope;
> +   p->key = key;
> +   INIT_LIST_HEAD(>binding_sock);
> +   INIT_LIST_HEAD(>binding_node);
> +   INIT_LIST_HEAD(>local_publ);
> +   INIT_LIST_HEAD(>all_publ);
> +   INIT_LIST_HEAD(>list);
> +   return p;
>  }
>
>  /**
> @@ -468,23 +462,24 @@ static struct tipc_service *tipc_service_find(struct 
> net *net, u32 type)
> return NULL;
>  };
>
> -struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
> -u32 lower, u32 upper,
> - 

Re: [tipc-discussion] [net-next 13/16] tipc: simplify signature of tipc_find_service()

2021-02-24 Thread Xin Long
On Wed, Dec 9, 2020 at 2:51 AM  wrote:
>
> From: Jon Maloy 
>
> We reduce the signature of tipc_find_service() and
> tipc_create_service(). The reason for doing this might not
> be obvious, but we plan to let struct tipc_uaddr contain
> information that is relevant for these functions in a later
> commit.
>
> Signed-off-by: Jon Maloy 
> ---
>  net/tipc/name_table.c | 58 ++-
>  net/tipc/name_table.h |  2 +-
>  net/tipc/socket.c |  2 +-
>  3 files changed, 37 insertions(+), 25 deletions(-)
>
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index 57abed74d0d9..554a006d7c0d 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -255,20 +255,25 @@ static struct publication *tipc_publ_create(struct 
> tipc_uaddr *ua,
>   *
>   * Allocates a single range structure and sets it to all 0's.
>   */
> -static struct tipc_service *tipc_service_create(u32 type, struct hlist_head 
> *hd)
> +static struct tipc_service *tipc_service_create(struct net *net,
> +   struct tipc_uaddr *ua)
is that too much to pass 'ua' pointer if only 'type' is being used here?

>  {
> -   struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC);
> +   struct name_table *nt = tipc_name_table(net);
> +   struct tipc_service *service;
> +   struct hlist_head *hd;
>
> +   service = kzalloc(sizeof(*service), GFP_ATOMIC);
> if (!service) {
> pr_warn("Service creation failed, no memory\n");
> return NULL;
> }
>
> spin_lock_init(>lock);
> -   service->type = type;
> +   service->type = ua->sr.type;
> service->ranges = RB_ROOT;
> INIT_HLIST_NODE(>service_list);
> INIT_LIST_HEAD(>subscriptions);
> +   hd = >services[hash(ua->sr.type)];
> hlist_add_head_rcu(>service_list, hd);
> return service;
>  }
> @@ -453,15 +458,16 @@ static void tipc_service_subscribe(struct tipc_service 
> *service,
> }
>  }
>
> -static struct tipc_service *tipc_service_find(struct net *net, u32 type)
> +static struct tipc_service *tipc_service_find(struct net *net,
> + struct tipc_uaddr *ua)
>  {
> struct name_table *nt = tipc_name_table(net);
> struct hlist_head *service_head;
> struct tipc_service *service;
>
> -   service_head = >services[hash(type)];
> +   service_head = >services[hash(ua->sr.type)];
> hlist_for_each_entry_rcu(service, service_head, service_list) {
> -   if (service->type == type)
> +   if (service->type == ua->sr.type)
> return service;
> }
> return NULL;
> @@ -472,7 +478,6 @@ struct publication *tipc_nametbl_insert_publ(struct net 
> *net,
>  struct tipc_socket_addr *sk,
>  u32 key)
>  {
> -   struct name_table *nt = tipc_name_table(net);
> struct tipc_service *sc;
> struct publication *p;
> u32 type = ua->sr.type;
> @@ -487,9 +492,9 @@ struct publication *tipc_nametbl_insert_publ(struct net 
> *net,
>  type, ua->sr.lower, ua->sr.upper, sk->node);
> return NULL;
> }
> -   sc = tipc_service_find(net, type);
> +   sc = tipc_service_find(net, ua);
> if (!sc)
> -   sc = tipc_service_create(type, >services[hash(type)]);
> +   sc = tipc_service_create(net, ua);
> if (sc) {
> spin_lock_bh(>lock);
> res = tipc_service_insert_publ(net, sc, p);
> @@ -512,7 +517,7 @@ struct publication *tipc_nametbl_remove_publ(struct net 
> *net,
> struct tipc_service *sc;
> bool last;
>
> -   sc = tipc_service_find(net, ua->sr.type);
> +   sc = tipc_service_find(net, ua);
> if (!sc)
> return NULL;
>
> @@ -585,7 +590,7 @@ bool tipc_nametbl_lookup_anycast(struct net *net,
> return true;
>
> rcu_read_lock();
> -   sc = tipc_service_find(net, ua->sr.type);
> +   sc = tipc_service_find(net, ua);
> if (unlikely(!sc))
> goto exit;
>
> @@ -638,7 +643,7 @@ bool tipc_nametbl_lookup_group(struct net *net, struct 
> tipc_uaddr *ua,
>
> *dstcnt = 0;
> rcu_read_lock();
> -   sc = tipc_service_find(net, ua->sa.type);
> +   sc = tipc_service_find(net, ua);
> if (unlikely(!sc))
> goto exit;
>
> @@ -682,7 +687,7 @@ void tipc_nametbl_lookup_mcast_sockets(struct net *net, 
> struct tipc_uaddr *ua,
> u32 scope = ua->scope;
>
> rcu_read_lock();
> -   sc = tipc_service_find(net, ua->sr.type);
> +   sc = tipc_service_find(net, ua);
> if (!sc)
> goto exit;
>
> @@ -711,7 +716,7 @@ void tipc_nametbl_lookup_mcast_nodes(struct net *net, 
> struct tipc_uaddr *ua,
> 

Re: [tipc-discussion] [net-next 14/16] tipc: simplify api between binding table and topology server

2021-02-24 Thread Xin Long
On Wed, Dec 9, 2020 at 2:51 AM  wrote:
>
> From: Jon Maloy 
>
> The function tipc_report_overlap() is called from the binding table
> with numerous parameters taken from an instance of struct publication.
> A closer look reveals that it always is safe to send along a pointer
> to the instance itself, and hence reduce the call signature. We do
> that in this commit.
>
> Signed-off-by: Jon Maloy 
> ---
>  net/tipc/name_table.c | 20 ++-
>  net/tipc/subscr.c | 57 +++
>  net/tipc/subscr.h |  9 +++
>  3 files changed, 41 insertions(+), 45 deletions(-)
>
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index 554a006d7c0d..4dfac3b9d26c 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -357,9 +357,7 @@ static bool tipc_service_insert_publ(struct net *net,
>
> /* Any subscriptions waiting for notification?  */
> list_for_each_entry_safe(sub, tmp, >subscriptions, service_list) {
> -   tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper,
> -   TIPC_PUBLISHED, p->sk.ref, p->sk.node,
> -   p->scope, first);
> +   tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, first);
> }
> return true;
>  err:
> @@ -451,9 +449,7 @@ static void tipc_service_subscribe(struct tipc_service 
> *service,
> /* Sort the publications before reporting */
> list_sort(NULL, _list, tipc_publ_sort);
> list_for_each_entry_safe(p, tmp, _list, list) {
> -   tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper,
> -   TIPC_PUBLISHED, p->sk.ref, p->sk.node,
> -   p->scope, true);
> +   tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, true);
> list_del_init(>list);
> }
>  }
> @@ -532,9 +528,7 @@ struct publication *tipc_nametbl_remove_publ(struct net 
> *net,
> /* Notify any waiting subscriptions */
> last = list_empty(>all_publ);
> list_for_each_entry_safe(sub, tmp, >subscriptions, service_list) {
> -   tipc_sub_report_overlap(sub, ua->sr.lower, ua->sr.upper,
> -   TIPC_WITHDRAWN, sk->ref, sk->node,
> -   ua->scope, last);
> +   tipc_sub_report_overlap(sub, p, TIPC_WITHDRAWN, last);
> }
>
> /* Remove service range item if this was its last publication */
> @@ -543,7 +537,7 @@ struct publication *tipc_nametbl_remove_publ(struct net 
> *net,
> kfree(sr);
> }
>
> -   /* Delete service item if this no more publications and subscriptions 
> */
> +   /* Delete service item if no more publications and subscriptions */
> if (RB_EMPTY_ROOT(>ranges) && list_empty(>subscriptions)) {
> hlist_del_init_rcu(>service_list);
> kfree_rcu(sc, rcu);
> @@ -842,7 +836,8 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
> struct tipc_uaddr ua;
> bool res = true;
>
> -   tipc_uaddr(, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, 0, 0);
> +   tipc_uaddr(, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
> +  tipc_sub_read(s, seq.lower), tipc_sub_read(s, seq.upper));
> spin_lock_bh(>nametbl_lock);
> sc = tipc_service_find(sub->net, );
> if (!sc)
> @@ -873,7 +868,8 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription 
> *sub)
> struct tipc_service *sc;
> struct tipc_uaddr ua;
>
> -   tipc_uaddr(, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, 0, 0);
> +   tipc_uaddr(, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
> +  tipc_sub_read(s, seq.lower), tipc_sub_read(s, seq.upper));
> spin_lock_bh(>nametbl_lock);
> sc = tipc_service_find(sub->net, );
> if (!sc)
> diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
> index f6ad0005218c..feda0b6bbf1b 100644
> --- a/net/tipc/subscr.c
> +++ b/net/tipc/subscr.c
> @@ -40,18 +40,26 @@
>  #include "subscr.h"
>
>  static void tipc_sub_send_event(struct tipc_subscription *sub,
> -   u32 found_lower, u32 found_upper,
> -   u32 event, u32 port, u32 node)
> +   struct publication *p,
> +   u32 event)
>  {
> +   struct tipc_subscr *s = >evt.s;
> struct tipc_event *evt = >evt;
>
> if (sub->inactive)
> return;
> tipc_evt_write(evt, event, event);
> -   tipc_evt_write(evt, found_lower, found_lower);
> -   tipc_evt_write(evt, found_upper, found_upper);
> -   tipc_evt_write(evt, port.ref, port);
> -   tipc_evt_write(evt, port.node, node);
> +   if (p) {
> +   tipc_evt_write(evt, found_lower, p->sr.lower);
> +   tipc_evt_write(evt, found_upper, p->sr.upper);
> +

Re: [tipc-discussion] [net-next 08/16] tipc: refactor tipc_sendmsg() and tipc_lookup_anycast()

2021-02-24 Thread Xin Long
On Wed, Dec 9, 2020 at 2:51 AM  wrote:
>
> From: Jon Maloy 
>
> We simplify the signature if function tipc_nametbl_lookup_anycast(),
> using address structures instead of dicrete integers.
>
> This also makes it possible to make some improvements to the functions
> __tipc_sendmsg() in socket.c and tipc_msg_lookup_dest() in msg.c.
>
> Signed-off-by: Jon Maloy 
> ---
>  net/tipc/msg.c| 23 ++--
>  net/tipc/name_table.c | 75 +++---
>  net/tipc/name_table.h |  5 ++-
>  net/tipc/socket.c | 83 +--
>  4 files changed, 91 insertions(+), 95 deletions(-)
>
> diff --git a/net/tipc/msg.c b/net/tipc/msg.c
> index 9eddbddb2fec..931245e93830 100644
> --- a/net/tipc/msg.c
> +++ b/net/tipc/msg.c
> @@ -711,8 +711,11 @@ bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct 
> sk_buff_head *cpy)
>  bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
>  {
> struct tipc_msg *msg = buf_msg(skb);
> -   u32 dport, dnode;
> -   u32 onode = tipc_own_addr(net);
> +   u32 scope = msg_lookup_scope(msg);
> +   u32 self = tipc_own_addr(net);
> +   u32 inst = msg_nameinst(msg);
> +   struct tipc_socket_addr sk;
> +   struct tipc_uaddr ua;
>
> if (!msg_isdata(msg))
> return false;
> @@ -726,16 +729,16 @@ bool tipc_msg_lookup_dest(struct net *net, struct 
> sk_buff *skb, int *err)
> msg = buf_msg(skb);
> if (msg_reroute_cnt(msg))
> return false;
> -   dnode = tipc_scope2node(net, msg_lookup_scope(msg));
> -   dport = tipc_nametbl_lookup_anycast(net, msg_nametype(msg),
> -   msg_nameinst(msg), );
> -   if (!dport)
> +   tipc_uaddr(, TIPC_SERVICE_RANGE, scope,
> +  msg_nametype(msg), inst, inst);
> +   sk.node = tipc_scope2node(net, scope);
> +   if (!tipc_nametbl_lookup_anycast(net, , ))
> return false;
> msg_incr_reroute_cnt(msg);
> -   if (dnode != onode)
> -   msg_set_prevnode(msg, onode);
> -   msg_set_destnode(msg, dnode);
> -   msg_set_destport(msg, dport);
> +   if (sk.node != self)
> +   msg_set_prevnode(msg, self);
> +   msg_set_destnode(msg, sk.node);
> +   msg_set_destport(msg, sk.ref);
> *err = TIPC_OK;
>
> return true;
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index e6177ccf1140..ed68db36bab9 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -549,66 +549,64 @@ struct publication *tipc_nametbl_remove_publ(struct net 
> *net,
>  /**
>   * tipc_nametbl_lookup_anycast - perform service instance to socket 
> translation
>   * @net: network namespace
> - * @type: message type
> - * @instance: message instance
> - * @dnode: the search domain used during translation
> - *
> - * On entry, 'dnode' is the search domain used during the lookup
> + * @ua: service address to look ip
> + * @sk: address to socket we want to find
>   *
> + * On entry, a non-zero 'sk->node' indicates the node where we want lookup 
> to be
> + * performed, which may not be this one.
>   * On exit:
> - * - if lookup is deferred to another node, leave 'dnode' unchanged and 
> return 0
> - * - if lookup is attempted and succeeds, set 'dnode' to the publishing node 
> and
> - *   return the published (non-zero) port number
> - * - if lookup is attempted and fails, set 'dnode' to 0 and return 0
> + * - If lookup is deferred to another node, leave 'sk->node' unchanged and
> + *   return 'true'.
> + * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) 
> which
> + *   represent the bound socket and return 'true'.
> + * - If lookup fails, return 'false'
>   *
>   * Note that for legacy users (node configured with Z.C.N address format) the
> - * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0
> + * 'closest-first' lookup algorithm must be maintained, i.e., if sk.node is 0
>   * we must look in the local binding list first
>   */
> -u32 tipc_nametbl_lookup_anycast(struct net *net, u32 type,
> -   u32 instance, u32 *dnode)
> +bool tipc_nametbl_lookup_anycast(struct net *net,
> +struct tipc_uaddr *ua,
> +struct tipc_socket_addr *sk)
>  {
> struct tipc_net *tn = tipc_net(net);
> bool legacy = tn->legacy_addr_format;
> u32 self = tipc_own_addr(net);
> -   struct service_range *sr;
> +   u32 inst = ua->sa.instance;
> +   struct service_range *r;
> struct tipc_service *sc;
> -   struct list_head *list;
> struct publication *p;
> -   u32 port = 0;
> -   u32 node = 0;
> +   struct list_head *l;
> +   bool res = false;
>
> -   if (!tipc_in_scope(legacy, *dnode, self))
> -   return 0;
> +   if (!tipc_in_scope(legacy, sk->node, self))
> +   

Re: [tipc-discussion] [net-next 02/16] tipc: move creation of publication item one level up in call chain

2021-02-24 Thread Xin Long
On Wed, Dec 9, 2020 at 2:51 AM  wrote:
>
> From: Jon Maloy 
>
> We instantiante struct publication in tipc_nametbl_insert_publ()
> instead of as currently in tipc_service_insert_publ(). This has the
> advantage that we can pass a pointer to the publication struct to
> the next call levels, instead of the numerous individual parameters
> we pass on now. It also gives us a location to keep the contents of
> the additional fields we will introduce in a later commit.
>
> Signed-off-by: Jon Maloy 
> ---
>  net/tipc/name_table.c | 63 ++-
>  1 file changed, 32 insertions(+), 31 deletions(-)
>
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index c37a4a9c87ca..68e269b49780 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -327,49 +327,44 @@ static struct service_range 
> *tipc_service_create_range(struct tipc_service *sc,
> return sr;
>  }
>
> -static struct publication *tipc_service_insert_publ(struct net *net,
> -   struct tipc_service *sc,
> -   u32 type, u32 lower,
> -   u32 upper, u32 scope,
> -   u32 node, u32 port,
> -   u32 key)
> +static bool tipc_service_insert_publ(struct net *net,
> +struct tipc_service *sc,
> +struct publication *p)
>  {
> struct tipc_subscription *sub, *tmp;
> struct service_range *sr;
> -   struct publication *p;
> +   struct publication *_p;
> +   u32 node = p->sk.node;
> bool first = false;
>
> -   sr = tipc_service_create_range(sc, lower, upper);
> +   sr = tipc_service_create_range(sc, p->sr.lower, p->sr.upper);
> if (!sr)
> goto  err;
>
> first = list_empty(>all_publ);
>
> /* Return if the publication already exists */
> -   list_for_each_entry(p, >all_publ, all_publ) {
> -   if (p->key == key && (!p->sk.node || p->sk.node == node))
> -   return NULL;
> +   list_for_each_entry(_p, >all_publ, all_publ) {
> +   if (_p->key == p->key && (!_p->sk.node || _p->sk.node == 
> node))
> +   return false;
> }
>
> -   /* Create and insert publication */
> -   p = tipc_publ_create(type, lower, upper, scope, node, port, key);
> -   if (!p)
> -   goto err;
> -   /* Suppose there shouldn't be a huge gap btw publs i.e. >INT_MAX */
> -   p->id = sc->publ_cnt++;
> -   if (in_own_node(net, node))
> +   if (in_own_node(net, p->sk.node))
> list_add(>local_publ, >local_publ);
> list_add(>all_publ, >all_publ);
> +   p->id = sc->publ_cnt++;
>
> /* Any subscriptions waiting for notification?  */
> list_for_each_entry_safe(sub, tmp, >subscriptions, service_list) {
> -   tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper, 
> TIPC_PUBLISHED,
> -   p->sk.ref, p->sk.node, p->scope, 
> first);
> +   tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper,
> +   TIPC_PUBLISHED, p->sk.ref, p->sk.node,
> +   p->scope, first);
> }
> -   return p;
> +   return true;
>  err:
> -   pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, 
> upper);
> -   return NULL;
> +   pr_warn("Failed to bind to %u,%u,%u, no memory\n",
> +   p->sr.type, p->sr.lower, p->sr.upper);
> +   return false;
>  }
>
>  /**
> @@ -481,6 +476,11 @@ struct publication *tipc_nametbl_insert_publ(struct net 
> *net, u32 type,
> struct name_table *nt = tipc_name_table(net);
> struct tipc_service *sc;
> struct publication *p;
> +   bool res = false;
> +
> +   p = tipc_publ_create(type, lower, upper, scope, node, port, key);
> +   if (!p)
> +   return NULL;
>
> if (scope > TIPC_NODE_SCOPE || lower > upper) {
> pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n",
> @@ -490,14 +490,15 @@ struct publication *tipc_nametbl_insert_publ(struct net 
> *net, u32 type,
> sc = tipc_service_find(net, type);
> if (!sc)
> sc = tipc_service_create(type, >services[hash(type)]);
> -   if (!sc)
> -   return NULL;
> -
> -   spin_lock_bh(>lock);
> -   p = tipc_service_insert_publ(net, sc, type, lower, upper,
> -scope, node, port, key);
> -   spin_unlock_bh(>lock);
> -   return p;
> +   if (sc) {
> +   spin_lock_bh(>lock);
> +   res = tipc_service_insert_publ(net, sc, p);
> +   spin_unlock_bh(>lock);
If move spin_(un)lock_bh into 

Re: [tipc-discussion] [net-next 04/16] tipc: simplify signature of tipc_namtbl_publish()

2021-02-24 Thread Xin Long
On Wed, Dec 9, 2020 at 2:51 AM  wrote:
>
> From: Jon Maloy 
>
> Using the new address structure tipc_uaddr, we simplify the signature
> of function tipc_sk_publish() and tipc_namtbl_publish() so that fewer
> parameters need to be passed around.
>
> Signed-off-by: Jon Maloy 
> ---
>  net/tipc/name_table.c | 10 +++---
>  net/tipc/name_table.h |  6 ++--
>  net/tipc/net.c|  8 +++--
>  net/tipc/node.c   | 29 +
>  net/tipc/socket.c | 75 +++
>  5 files changed, 70 insertions(+), 58 deletions(-)
>
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index 68e269b49780..d951e9345122 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -742,9 +742,8 @@ void tipc_nametbl_build_group(struct net *net, struct 
> tipc_group *grp,
>
>  /* tipc_nametbl_publish - add service binding to name table
>   */
> -struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 
> lower,
> -u32 upper, u32 scope, u32 port,
> -u32 key)
> +struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr 
> *ua,
> +struct tipc_socket_addr *sk, u32 key)
>  {
> struct name_table *nt = tipc_name_table(net);
> struct tipc_net *tn = tipc_net(net);
> @@ -759,8 +758,9 @@ struct publication *tipc_nametbl_publish(struct net *net, 
> u32 type, u32 lower,
> goto exit;
> }
>
> -   p = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
> -tipc_own_addr(net), port, key);
> +   p = tipc_nametbl_insert_publ(net, ua->sr.type, ua->sr.lower,
> +ua->sr.upper, ua->scope,
> +sk->node, sk->ref, key);
> if (p) {
> nt->local_publ_count++;
> skb = tipc_named_publish(net, p);
> diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
> index 3fff00440e1a..5e969e060509 100644
> --- a/net/tipc/name_table.h
> +++ b/net/tipc/name_table.h
> @@ -42,6 +42,7 @@ struct tipc_subscription;
>  struct tipc_plist;
>  struct tipc_nlist;
>  struct tipc_group;
> +struct tipc_uaddr;
>
>  /*
>   * TIPC name types reserved for internal TIPC use (both current and planned)
> @@ -120,9 +121,8 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 
> type, u32 lower,
>  bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
>  struct list_head *dsts, int *dstcnt, u32 exclude,
>  bool all);
> -struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 
> lower,
> -u32 upper, u32 scope, u32 port,
> -u32 key);
> +struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr 
> *ua,
> +struct tipc_socket_addr *sk, u32 
> key);
>  int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper,
>   u32 key);
>  struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
> diff --git a/net/tipc/net.c b/net/tipc/net.c
> index a129f661bee3..3f927949bb23 100644
> --- a/net/tipc/net.c
> +++ b/net/tipc/net.c
> @@ -125,6 +125,11 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
>  static void tipc_net_finalize(struct net *net, u32 addr)
>  {
> struct tipc_net *tn = tipc_net(net);
> +   struct tipc_socket_addr sk = {0, addr};
> +   struct tipc_uaddr ua;
> +
> +   tipc_uaddr(, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
> +  TIPC_NODE_STATE, addr, addr);
>
> if (cmpxchg(>node_addr, 0, addr))
> return;
> @@ -132,8 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr)
> tipc_named_reinit(net);
> tipc_sk_reinit(net);
> tipc_mon_reinit_self(net);
> -   tipc_nametbl_publish(net, TIPC_NODE_STATE, addr, addr,
> -TIPC_CLUSTER_SCOPE, 0, addr);
> +   tipc_nametbl_publish(net, , , addr);
>  }
>
>  void tipc_net_finalize_work(struct work_struct *work)
> diff --git a/net/tipc/node.c b/net/tipc/node.c
> index 86b4d7ffb47a..3a71e26c9509 100644
> --- a/net/tipc/node.c
> +++ b/net/tipc/node.c
> @@ -393,21 +393,23 @@ static void tipc_node_write_unlock_fast(struct 
> tipc_node *n)
>
>  static void tipc_node_write_unlock(struct tipc_node *n)
>  {
> +   struct tipc_socket_addr sk;
> struct net *net = n->net;
> -   u32 addr = 0;
> u32 flags = n->action_flags;
> -   u32 link_id = 0;
> -   u32 bearer_id;
> struct list_head *publ_list;
> +   struct tipc_uaddr ua;
> +   u32 bearer_id;
>
> if (likely(!flags)) {
> write_unlock_bh(>lock);
> return;
> }
>
> -   addr = n->addr;
> -   link_id 

Re: [tipc-discussion] [net-next 00/16] tipc: cleanups and simplifications

2021-02-24 Thread Xin Long
Hi Jon,

Sorry for so late to check this patchset, I just came back from
Chinese New Year holidays.

Overall I like the idea to use one or two new structures to make the
parameters passing between functions more clear and readable.
Meanwhile I think if the parameters of some function are already
simple, we may just need to pass what the function only wants. After
all, ua->xxx will need CPU to access memory obj instead of read
registers only,  and we will have to build a ua object before calling
the function.

I just commented on some of the patches.

Thanks.

On Wed, Dec 9, 2020 at 2:51 AM  wrote:
>
> From: Jon Maloy 
>
> We make a number of simplifications and cleanups, especially to call 
> signatures
> in the binding table. This makes the code easier to understand and serves as a
> preparation for an upcoming functional addition.
>
> Jon Maloy (16):
>   tipc: re-organize members of struct publication
>   tipc: move creation of publication item one level up in call chain
>   tipc: introduce new unified address type for internal use
>   tipc: simplify signature of tipc_namtbl_publish()
>   tipc: simplify call signatures for publication creation
>   tipc: simplify signature of tipc_nametbl_withdraw() functions
>   tipc: rename binding table lookup functions
>   tipc: refactor tipc_sendmsg() and tipc_lookup_anycast()
>   tipc: simplify signature of tipc_namtbl_lookup_mcast_sockets()
>   tipc: simplify signature of tipc_nametbl_lookup_mcast_nodes()
>   tipc: simplify signature of tipc_nametbl_lookup_group()
>   tipc: simplify signature of tipc_service_find_range()
>   tipc: simplify signature of tipc_find_service()
>   tipc: simplify api between binding table and topology server
>   tipc: add host-endian copy of user subscription to struct
> tipc_subscription
>   tipc: remove some unnecessary warnings
>
>  net/tipc/addr.h   |  44 +
>  net/tipc/msg.c|  23 ++-
>  net/tipc/name_distr.c |  89 +
>  net/tipc/name_table.c | 419 ++
>  net/tipc/name_table.h |  64 ---
>  net/tipc/net.c|   8 +-
>  net/tipc/node.c   |  28 +--
>  net/tipc/socket.c | 313 +++
>  net/tipc/subscr.c |  84 +
>  net/tipc/subscr.h |  12 +-
>  10 files changed, 567 insertions(+), 517 deletions(-)
>
> --
> 2.28.0
>
>
>
> ___
> tipc-discussion mailing list
> tipc-discussion@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [net-next 06/16] tipc: simplify signature of tipc_nametbl_withdraw() functions

2021-02-24 Thread Xin Long
On Wed, Dec 9, 2020 at 2:51 AM  wrote:
>
> From: Jon Maloy 
>
> Following the priniciples of the preceding commits, we reduce
> the number of parameters passed along in tipc_sk_withdraw(),
> tipc_nametbl_withdraw() and associated functions.
>
> Signed-off-by: Jon Maloy 
> ---
>  net/tipc/name_distr.c | 11 
>  net/tipc/name_table.c | 54 +++-
>  net/tipc/name_table.h | 11 
>  net/tipc/node.c   |  3 +-
>  net/tipc/socket.c | 64 +--
>  5 files changed, 74 insertions(+), 69 deletions(-)
>
> diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
> index df42fc2b4536..fe5f39792323 100644
> --- a/net/tipc/name_distr.c
> +++ b/net/tipc/name_distr.c
> @@ -244,17 +244,19 @@ static void tipc_publ_purge(struct net *net, struct 
> publication *p, u32 addr)
>  {
> struct tipc_net *tn = tipc_net(net);
> struct publication *_p;
> +   struct tipc_uaddr ua;
>
> +   tipc_uaddr(, p->addrtype, p->scope, p->sr.type,
> +  p->sr.lower, p->sr.upper);
> spin_lock_bh(>nametbl_lock);
> -   _p = tipc_nametbl_remove_publ(net, p->sr.type, p->sr.lower,
> - p->sr.upper, p->sk.node, p->key);
> +   _p = tipc_nametbl_remove_publ(net, , >sk, p->key);
> if (_p)
> tipc_node_unsubscribe(net, &_p->binding_node, addr);
> spin_unlock_bh(>nametbl_lock);
>
> if (_p != p) {
> pr_err("Unable to remove publication from failed node\n"
> -  " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
> +  " (type=%u, lower=%u, node=%u, port=%u, key=%u)\n",
Currently, there seem still some places using 0x%x to print node value.

>p->sr.type, p->sr.lower, p->sk.node, p->sk.ref, 
> p->key);
> }
>
> @@ -309,8 +311,7 @@ static bool tipc_update_nametbl(struct net *net, struct 
> distr_item *i,
> return true;
> }
> } else if (dtype == WITHDRAWAL) {
> -   p = tipc_nametbl_remove_publ(net, ua.sr.type, ua.sr.lower,
> -ua.sr.upper, node, key);
> +   p = tipc_nametbl_remove_publ(net, , , key);
> if (p) {
> tipc_node_unsubscribe(net, >binding_node, node);
> kfree_rcu(p, rcu);
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index ba96d5fc57f3..50562d086016 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -367,12 +367,15 @@ static bool tipc_service_insert_publ(struct net *net,
>   * @node: target node
>   * @key: target publication key
>   */
> -static struct publication *tipc_service_remove_publ(struct service_range *sr,
> -   u32 node, u32 key)
> +static struct publication *tipc_service_remove_publ(struct service_range *r,
> +   struct tipc_uaddr *ua,
> +   struct tipc_socket_addr 
> *sk,
> +   u32 key)
>  {
> struct publication *p;
> +   u32 node = sk->node;
>
> -   list_for_each_entry(p, >all_publ, all_publ) {
> +   list_for_each_entry(p, >all_publ, all_publ) {
> if (p->key != key || (node && node != p->sk.node))
> continue;
> list_del(>all_publ);
> @@ -496,32 +499,35 @@ struct publication *tipc_nametbl_insert_publ(struct net 
> *net,
> return NULL;
>  }
>
> -struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
> -u32 lower, u32 upper,
> -u32 node, u32 key)
> +struct publication *tipc_nametbl_remove_publ(struct net *net,
> +struct tipc_uaddr *ua,
> +struct tipc_socket_addr *sk,
> +u32 key)
>  {
> -   struct tipc_service *sc = tipc_service_find(net, type);
> struct tipc_subscription *sub, *tmp;
> -   struct service_range *sr = NULL;
> struct publication *p = NULL;
> +   struct service_range *sr;
> +   struct tipc_service *sc;
> bool last;
>
> +   sc = tipc_service_find(net, ua->sr.type);
> if (!sc)
> return NULL;
>
> spin_lock_bh(>lock);
> -   sr = tipc_service_find_range(sc, lower, upper);
> +   sr = tipc_service_find_range(sc, ua->sr.lower, ua->sr.upper);
> if (!sr)
> goto exit;
> -   p = tipc_service_remove_publ(sr, node, key);
> +   p = tipc_service_remove_publ(sr, ua, sk, key);
> if (!p)
> goto exit;
>
> /* Notify any waiting subscriptions */
> last = list_empty(>all_publ);
> 

Re: [tipc-discussion] [net-next v2 0/3] tipc: some small cleanups

2021-04-14 Thread Xin Long
On Wed, Apr 7, 2021 at 4:59 PM  wrote:
>
> From: Jon Maloy 
>
> We make some minor code cleanups and improvements.
>
> ---
> v2: - Removed patch #1 from v1, which has now been applied upstream
> - Fixed memory leak in patch #2 as identified by Hoang
>
> Jon Maloy (3):
>   tipc: eliminate redundant fields in struct tipc_sock
>   tipc: refactor function tipc_sk_anc_data_recv()
>   tipc: simplify handling of lookup scope during multicast message
> reception
>
>  net/tipc/name_table.c |   6 +-
>  net/tipc/name_table.h |   4 +-
>  net/tipc/socket.c | 149 +++---
>  3 files changed, 74 insertions(+), 85 deletions(-)
>
> --
> 2.29.2
>
>
>
> ___
> tipc-discussion mailing list
> tipc-discussion@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion
LGTM!

Reviewed-by: Xin Long 


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: call tipc_wait_for_connect only when dlen is not 0

2021-08-15 Thread Xin Long
__tipc_sendmsg() is called to send SYN packet by either tipc_sendmsg()
or tipc_connect(). The difference is in tipc_connect(), it will call
tipc_wait_for_connect() after __tipc_sendmsg() to wait until connecting
is done. So there's no need to wait in __tipc_sendmsg() for this case.

This patch is to fix it by calling tipc_wait_for_connect() only when dlen
is not 0 in __tipc_sendmsg(), which means it's called by tipc_connect().

Note this also fixes the failure in tipcutils/test/ptts/:

  # ./tipcTS &
  # ./tipcTC 9
  (hang)

Fixes: 36239dab6da7 ("tipc: fix implicit-connect for SYN+")
Reported-by: Shuang Li 
Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 75b99b7eda22..8754bd885169 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1518,7 +1518,7 @@ static int __tipc_sendmsg(struct socket *sock, struct 
msghdr *m, size_t dlen)
 
if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
-   if (timeout) {
+   if (dlen && timeout) {
timeout = msecs_to_jiffies(timeout);
tipc_wait_for_connect(sock, );
}
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: call tipc_wait_for_connect only when dlen is not 0

2021-08-03 Thread Xin Long
__tipc_sendmsg() is called to send SYN packet by either tipc_sendmsg()
or tipc_connect(). The difference is in tipc_connect(), it will call
tipc_wait_for_connect() after __tipc_sendmsg() to wait until connecting
is done. So there's no need to wait in __tipc_sendmsg() for this case.

This patch is to fix it by calling tipc_wait_for_connect() only when dlen
is not 0 in __tipc_sendmsg(), which means it's called by tipc_connect().

Note this also fixes the failure in tipcutils/test/ptts/:

  # ./tipcTS &
  # ./tipcTC 9
  (hang)

Fixes: 36239dab6da7 ("tipc: fix implicit-connect for SYN+")
Reported-by: Shuang Li 
Signed-off-by: Xin Long 
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 75b99b7eda22..8754bd885169 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1518,7 +1518,7 @@ static int __tipc_sendmsg(struct socket *sock, struct 
msghdr *m, size_t dlen)
 
if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
-   if (timeout) {
+   if (dlen && timeout) {
timeout = msecs_to_jiffies(timeout);
tipc_wait_for_connect(sock, );
}
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net-next 8/8] tipc: add hardware gso

2021-09-22 Thread Xin Long
On Fri, Sep 10, 2021 at 8:08 AM Jon Maloy  wrote:
>
>
>
> On 06/07/2021 14:22, Xin Long wrote:
> > Since there's no enough bit in netdev_features_t for
> > NETIF_F_GSO_TIPC_BIT, and tipc is using the simliar
> > code as sctp, this patch will reuse SKB_GSO_SCTP and
> > NETIF_F_GSO_SCTP_BIT for tipc.
> >
> > Signed-off-by: Xin Long 
> > ---
> >   include/linux/skbuff.h |  2 --
> >   net/tipc/node.c| 15 ++-
> >   net/tipc/offload.c |  4 ++--
> >   3 files changed, 16 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 148bf0ed7336..b2db9cd9a73f 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -599,8 +599,6 @@ enum {
> >   SKB_GSO_UDP_L4 = 1 << 17,
> >
> >   SKB_GSO_FRAGLIST = 1 << 18,
> > -
> > - SKB_GSO_TIPC = 1 << 19,
> >   };
> >
> >   #if BITS_PER_LONG > 32
> > diff --git a/net/tipc/node.c b/net/tipc/node.c
> > index 9947b7dfe1d2..17e59c8dac31 100644
> > --- a/net/tipc/node.c
> > +++ b/net/tipc/node.c
> > @@ -2068,7 +2068,7 @@ static bool tipc_node_check_state(struct tipc_node 
> > *n, struct sk_buff *skb,
> >* Invoked with no locks held. Bearer pointer must point to a valid bearer
> >* structure (i.e. cannot be NULL), but bearer can be inactive.
> >*/
> > -void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
> > +static void __tipc_rcv(struct net *net, struct sk_buff *skb, struct 
> > tipc_bearer *b)
> >   {
> >   struct sk_buff_head xmitq;
> >   struct tipc_link_entry *le;
> > @@ -2189,6 +2189,19 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, 
> > struct tipc_bearer *b)
> >   kfree_skb(skb);
> >   }
> >
> > +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
> > +{
> > + struct sk_buff *seg, *next;
> > +
> > + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
> > + return __tipc_rcv(net, skb, b);
> > +
> > + skb_list_walk_safe(skb_shinfo(skb)->frag_list, seg, next)
> > + __tipc_rcv(net, seg, b);
> > + skb_shinfo(skb)->frag_list = NULL;
> > + consume_skb(skb);
> > +}
> > +
> >   void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
> > int prop)
> >   {
> > diff --git a/net/tipc/offload.c b/net/tipc/offload.c
> > index d137679f4db0..26e372178635 100644
> > --- a/net/tipc/offload.c
> > +++ b/net/tipc/offload.c
> > @@ -5,7 +5,7 @@
> >   static struct sk_buff *tipc_gso_segment(struct sk_buff *skb,
> >   netdev_features_t features)
> >   {
> > - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TIPC))
> > + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
> >   return ERR_PTR(-EINVAL);
> >
> >   return skb_segment(skb, (features | NETIF_F_HW_CSUM) & ~NETIF_F_SG);
> > @@ -39,7 +39,7 @@ bool tipc_msg_gso_append(struct sk_buff **p, struct 
> > sk_buff *skb, u16 segs)
> >
> >   skb_shinfo(nskb)->frag_list = head;
> >   skb_shinfo(nskb)->gso_segs = 1;
> > - skb_shinfo(nskb)->gso_type = SKB_GSO_TIPC;
> > + skb_shinfo(nskb)->gso_type = SKB_GSO_SCTP;
> >   skb_shinfo(nskb)->gso_size = GSO_BY_FRAGS;
> >   skb_reset_network_header(head);
> >
> >
>
> I donĀ“t have much more to add, -it looks good to me, and way simpler
> than what I was trying a couple of years ago.
>
> If you fix the checkpatch issues and, maybe if possible, split it into
> two series, you have my ack.
>
> PS. Did you test this with crypto?
Hi Jon,

Sorry for late.

Got an urgent problem from a customer recently, and spent quite a few
weeks getting things almost done.
I need to do more testing for its performance in different scenarios
before continuing.
I think I did, but I will confirm it will work well over crypto.

Thanks a lot for checking!

>
> ///jon
>


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next] tipc: delete the unlikely branch in tipc_aead_encrypt

2021-11-21 Thread Xin Long
When a skb comes to tipc_aead_encrypt(), it's always linear. The
unlikely check 'skb_cloned(skb) && tailen <= skb_tailroom(skb)'
can completely be taken care of in skb_cow_data() by the code
in branch "if (!skb_has_frag_list())".

Also, remove the 'TODO:' annotation, as the pages in skbs are not
writable, see more on commit 3cf4375a0904 ("tipc: do not write
skb_shinfo frags when doing decrytion").

Signed-off-by: Xin Long 
---
 net/tipc/crypto.c | 19 ---
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index e701651f6533..c5eefe4a8c4d 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -757,21 +757,10 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, 
struct sk_buff *skb,
 skb_tailroom(skb), tailen);
}
 
-   if (unlikely(!skb_cloned(skb) && tailen <= skb_tailroom(skb))) {
-   nsg = 1;
-   trailer = skb;
-   } else {
-   /* TODO: We could avoid skb_cow_data() if skb has no frag_list
-* e.g. by skb_fill_page_desc() to add another page to the skb
-* with the wanted tailen... However, page skbs look not often,
-* so take it easy now!
-* Cloned skbs e.g. from link_xmit() seems no choice though :(
-*/
-   nsg = skb_cow_data(skb, tailen, );
-   if (unlikely(nsg < 0)) {
-   pr_err("TX: skb_cow_data() returned %d\n", nsg);
-   return nsg;
-   }
+   nsg = skb_cow_data(skb, tailen, );
+   if (unlikely(nsg < 0)) {
+   pr_err("TX: skb_cow_data() returned %d\n", nsg);
+   return nsg;
}
 
pskb_put(skb, trailer, tailen);
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] shouldn't unencrypted packets be discarded if any key is set on local node?

2021-11-10 Thread Xin Long
Hi Everyone,

Currently in tcp_rcv(), it seems that both unencrypted and encrypted packets
can be processed even when key/master_key is set.

After the key is set, which means all packets going out will be encrypted, to
respond to the unencrypted packets with encrypted packets doesn't seem
normal, from my point of view.

Besides, it may cause some potential security issues if the local node can
still receive unencrypted packets after the key is set, such as the CVE
one fixed by:

fa40d9734a57 ("tipc: fix size validations for the MSG_CRYPTO type")

So I'm thinking of only accepting the encrypted packets if any key is
set on the local node. But I'm not sure if we have any other cases
needing it to accept both kinds of packets, anyone know? Tuong?

Thanks.


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net] tipc: only accept encrypted MSG_CRYPTO msgs

2021-11-14 Thread Xin Long
You're right, will do

Thanks.

On Sun, Nov 14, 2021 at 2:00 PM Jon Maloy  wrote:
>
> You should mention that is a supplementary fix to CVE-2021-43267,
> improving the original fix in commit
> fa40d9734a57bcbfa79a280189799f76c88f7bb0 ("tipc: fix size validations
> for the MSG_CRYPTO type")
>
> ///jon
>
>
>
>
> On 11/14/21 08:09, Xue, Ying wrote:
> > Thanks Xin! The patch looks good to me.
> >
> > Acked-by: Ying Xue 
> >
> > -Original Message-
> > From: Xin Long 
> > Sent: Saturday, November 13, 2021 3:23 AM
> > To: tipc-discussion@lists.sourceforge.net
> > Subject: [tipc-discussion] [PATCH net] tipc: only accept encrypted 
> > MSG_CRYPTO msgs
> >
> > The MSG_CRYPTO msgs are always encrypted and sent to other nodes for keys' 
> > deployment. But when receiving in peers, if those nodes do not validate it 
> > and make sure it's encrypted, one could craft a malicious MSG_CRYPTO msg to 
> > deploy its key with no need to know other nodes' keys.
> >
> > This patch is to do that by checking TIPC_SKB_CB(skb)->decrypted and 
> > discard it if this packet never got decrypted.
> >
> > Fixes: 1ef6f7c9390f ("tipc: add automatic session key exchange")
> > Signed-off-by: Xin Long 
> > ---
> >   net/tipc/link.c | 7 +--
> >   1 file changed, 5 insertions(+), 2 deletions(-)
> >
> > diff --git a/net/tipc/link.c b/net/tipc/link.c index 
> > 1b7a487c8841..09ae8448f394 100644
> > --- a/net/tipc/link.c
> > +++ b/net/tipc/link.c
> > @@ -1298,8 +1298,11 @@ static bool tipc_data_input(struct tipc_link *l, 
> > struct sk_buff *skb,
> >   return false;
> >   #ifdef CONFIG_TIPC_CRYPTO
> >   case MSG_CRYPTO:
> > - tipc_crypto_msg_rcv(l->net, skb);
> > - return true;
> > + if (TIPC_SKB_CB(skb)->decrypted) {
> > + tipc_crypto_msg_rcv(l->net, skb);
> > + return true;
> > + }
> > + fallthrough;
> >   #endif
> >   default:
> >   pr_warn("Dropping received illegal msg type\n");
> > --
> > 2.27.0
> >
> >
> >
> > ___
> > tipc-discussion mailing list
> > tipc-discussion@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/tipc-discussion
> >
> >
> > ___
> > tipc-discussion mailing list
> > tipc-discussion@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/tipc-discussion
> >
>


___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next] tipc: delete the unlikely branch in tipc_aead_encrypt

2021-11-24 Thread Xin Long
When a skb comes to tipc_aead_encrypt(), it's always linear. The
unlikely check 'skb_cloned(skb) && tailen <= skb_tailroom(skb)'
can completely be taken care of in skb_cow_data() by the code
in branch "if (!skb_has_frag_list())".

Also, remove the 'TODO:' annotation, as the pages in skbs are not
writable, see more on commit 3cf4375a0904 ("tipc: do not write
skb_shinfo frags when doing decrytion").

Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 net/tipc/crypto.c | 19 ---
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index b4d9419a015b..81116312b753 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -761,21 +761,10 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, 
struct sk_buff *skb,
 skb_tailroom(skb), tailen);
}
 
-   if (unlikely(!skb_cloned(skb) && tailen <= skb_tailroom(skb))) {
-   nsg = 1;
-   trailer = skb;
-   } else {
-   /* TODO: We could avoid skb_cow_data() if skb has no frag_list
-* e.g. by skb_fill_page_desc() to add another page to the skb
-* with the wanted tailen... However, page skbs look not often,
-* so take it easy now!
-* Cloned skbs e.g. from link_xmit() seems no choice though :(
-*/
-   nsg = skb_cow_data(skb, tailen, );
-   if (unlikely(nsg < 0)) {
-   pr_err("TX: skb_cow_data() returned %d\n", nsg);
-   return nsg;
-   }
+   nsg = skb_cow_data(skb, tailen, );
+   if (unlikely(nsg < 0)) {
+   pr_err("TX: skb_cow_data() returned %d\n", nsg);
+   return nsg;
}
 
pskb_put(skb, trailer, tailen);
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next] tipc: discard MSG_CRYPTO msgs when key_exchange_enabled is not set

2021-11-24 Thread Xin Long
When key_exchange is disabled, there is no reason to accept MSG_CRYPTO
msgs if it doesn't send MSG_CRYPTO msgs.

Signed-off-by: Xin Long 
---
 net/tipc/link.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 09ae8448f394..8d9e09f48f4c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1298,7 +1298,8 @@ static bool tipc_data_input(struct tipc_link *l, struct 
sk_buff *skb,
return false;
 #ifdef CONFIG_TIPC_CRYPTO
case MSG_CRYPTO:
-   if (TIPC_SKB_CB(skb)->decrypted) {
+   if (sysctl_tipc_key_exchange_enabled &&
+   TIPC_SKB_CB(skb)->decrypted) {
tipc_crypto_msg_rcv(l->net, skb);
return true;
}
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: only accept encrypted MSG_CRYPTO msgs

2021-11-12 Thread Xin Long
The MSG_CRYPTO msgs are always encrypted and sent to other nodes
for keys' deployment. But when receiving in peers, if those nodes
do not validate it and make sure it's encrypted, one could craft
a malicious MSG_CRYPTO msg to deploy its key with no need to know
other nodes' keys.

This patch is to do that by checking TIPC_SKB_CB(skb)->decrypted
and discard it if this packet never got decrypted.

Fixes: 1ef6f7c9390f ("tipc: add automatic session key exchange")
Signed-off-by: Xin Long 
---
 net/tipc/link.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1b7a487c8841..09ae8448f394 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1298,8 +1298,11 @@ static bool tipc_data_input(struct tipc_link *l, struct 
sk_buff *skb,
return false;
 #ifdef CONFIG_TIPC_CRYPTO
case MSG_CRYPTO:
-   tipc_crypto_msg_rcv(l->net, skb);
-   return true;
+   if (TIPC_SKB_CB(skb)->decrypted) {
+   tipc_crypto_msg_rcv(l->net, skb);
+   return true;
+   }
+   fallthrough;
 #endif
default:
pr_warn("Dropping received illegal msg type\n");
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: only accept encrypted MSG_CRYPTO msgs

2021-11-15 Thread Xin Long
The MSG_CRYPTO msgs are always encrypted and sent to other nodes
for keys' deployment. But when receiving in peers, if those nodes
do not validate it and make sure it's encrypted, one could craft
a malicious MSG_CRYPTO msg to deploy its key with no need to know
other nodes' keys.

This patch is to do that by checking TIPC_SKB_CB(skb)->decrypted
and discard it if this packet never got decrypted.

Note that this is also a supplementary fix to CVE-2021-43267 that
can be triggered by an unencrypted malicious MSG_CRYPTO msg.

Fixes: 1ef6f7c9390f ("tipc: add automatic session key exchange")
Acked-by: Ying Xue 
Acked-by: Jon Maloy 
Signed-off-by: Xin Long 
---
 net/tipc/link.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1b7a487c8841..09ae8448f394 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1298,8 +1298,11 @@ static bool tipc_data_input(struct tipc_link *l, struct 
sk_buff *skb,
return false;
 #ifdef CONFIG_TIPC_CRYPTO
case MSG_CRYPTO:
-   tipc_crypto_msg_rcv(l->net, skb);
-   return true;
+   if (TIPC_SKB_CB(skb)->decrypted) {
+   tipc_crypto_msg_rcv(l->net, skb);
+   return true;
+   }
+   fallthrough;
 #endif
default:
pr_warn("Dropping received illegal msg type\n");
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net-next] tipc: discard MSG_CRYPTO msgs when key_exchange_enabled is not set

2021-12-10 Thread Xin Long
When key_exchange is disabled, there is no reason to accept MSG_CRYPTO
msgs if it doesn't send MSG_CRYPTO msgs.

Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 net/tipc/link.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 09ae8448f394..8d9e09f48f4c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1298,7 +1298,8 @@ static bool tipc_data_input(struct tipc_link *l, struct 
sk_buff *skb,
return false;
 #ifdef CONFIG_TIPC_CRYPTO
case MSG_CRYPTO:
-   if (TIPC_SKB_CB(skb)->decrypted) {
+   if (sysctl_tipc_key_exchange_enabled &&
+   TIPC_SKB_CB(skb)->decrypted) {
tipc_crypto_msg_rcv(l->net, skb);
return true;
}
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: fix implicit-connect for SYN+

2021-07-22 Thread Xin Long
For implicit-connect, when it's either SYN- or SYN+, an ACK should
be sent back to the client immediately. It's not appropriate for
the client to enter established state only after receiving data
from the server.

On client side, after the SYN is sent out, tipc_wait_for_connect()
should be called to wait for the ACK if timeout is set.

This patch also restricts __tipc_sendstream() to call __sendmsg()
only when it's in TIPC_OPEN state, so that the client can program
in a single loop doing both connecting and data sending like:

  for (...)
  sendmsg(dest, buf);

This makes the implicit-connect more implicit.

Fixes: b97bf3fd8f6a ("[TIPC] Initial merge")
Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 net/tipc/socket.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 34a97ea36cc8..ebd300c26a44 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk);
 static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t 
dsz);
 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
 static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
 
 static const struct proto_ops packet_ops;
 static const struct proto_ops stream_ops;
@@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct 
msghdr *m, size_t dlen)
rc = 0;
}
 
-   if (unlikely(syn && !rc))
+   if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
+   if (timeout) {
+   timeout = msecs_to_jiffies(timeout);
+   tipc_wait_for_connect(sock, );
+   }
+   }
 
return rc ? rc : dlen;
 }
@@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct 
msghdr *m, size_t dlen)
return -EMSGSIZE;
 
/* Handle implicit connection setup */
-   if (unlikely(dest)) {
+   if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
rc = __tipc_sendmsg(sock, m, dlen);
if (dlen && dlen == rc) {
tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
@@ -2689,9 +2695,10 @@ static int tipc_accept(struct socket *sock, struct 
socket *new_sock, int flags,
   bool kern)
 {
struct sock *new_sk, *sk = sock->sk;
-   struct sk_buff *buf;
struct tipc_sock *new_tsock;
+   struct msghdr m = {NULL,};
struct tipc_msg *msg;
+   struct sk_buff *buf;
long timeo;
int res;
 
@@ -2737,19 +2744,17 @@ static int tipc_accept(struct socket *sock, struct 
socket *new_sock, int flags,
}
 
/*
-* Respond to 'SYN-' by discarding it & returning 'ACK'-.
-* Respond to 'SYN+' by queuing it on new socket.
+* Respond to 'SYN-' by discarding it & returning 'ACK'.
+* Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
 */
if (!msg_data_sz(msg)) {
-   struct msghdr m = {NULL,};
-
tsk_advance_rx_queue(sk);
-   __tipc_sendstream(new_sock, , 0);
} else {
__skb_dequeue(>sk_receive_queue);
__skb_queue_head(_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+   __tipc_sendstream(new_sock, , 0);
release_sock(new_sk);
 exit:
release_sock(sk);
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] FW: [syzbot] KASAN: use-after-free Read in tipc_recvmsg

2021-07-22 Thread Xin Long
On Thu, Jul 22, 2021 at 9:55 PM Hoang Huu Le  wrote:
>
> Hi Xin,
>
> I think the issue caused by your patch:
>
> f4919ff59c282 ("tipc: keep the skb in rcv queue until the whole data is read)
> 
> 1976 if (!skb_cb->bytes_read)
> 1977 tsk_advance_rx_queue(sk);   <-- skb free-ed here
> 1978
> 1979 if (likely(!connected) || skb_cb->bytes_read) <- use-after-free
> 1980 goto exit;
> 
>
> Can you take a look at the issue.
will do, thanks for reminding.

>
> Thanks,
> Hoang
> -Original Message-
> From: syzbot 
> Sent: Monday, July 19, 2021 12:15 AM
> To: da...@davemloft.net; devicet...@vger.kernel.org; frowand.l...@gmail.com; 
> gre...@linuxfoundation.org; jma...@redhat.com; k...@kernel.org; 
> linux-ker...@vger.kernel.org; net...@vger.kernel.org; raf...@kernel.org; 
> robh...@kernel.org; r...@kernel.org; syzkaller-b...@googlegroups.com; 
> tipc-discussion@lists.sourceforge.net; ying@windriver.com
> Subject: [syzbot] KASAN: use-after-free Read in tipc_recvmsg
>
> Hello,
>
> syzbot found the following issue on:
>
> HEAD commit:ab0441b4a920 Merge branch 'vmxnet3-version-6'
> git tree:   net-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=1744ac6a30
> kernel config:  https://syzkaller.appspot.com/x/.config?x=da140227e4f25b17
> dashboard link: https://syzkaller.appspot.com/bug?extid=e6741b97d5552f97c24d
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=13973a7430
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=17ffc90230
>
> The issue was bisected to:
>
> commit 67a3156453859ceb40dc4448b7a6a99ea0ad27c7
> Author: Rob Herring 
> Date:   Thu May 27 19:45:47 2021 +
>
> of: Merge of_address_to_resource() and of_pci_address_to_resource() 
> implementations
>
> bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=129b043830
> final oops: https://syzkaller.appspot.com/x/report.txt?x=119b043830
> console output: https://syzkaller.appspot.com/x/log.txt?x=169b043830
>
> IMPORTANT: if you fix the issue, please add the following tag to the commit:
> Reported-by: syzbot+e6741b97d5552f97c...@syzkaller.appspotmail.com
> Fixes: 67a315645385 ("of: Merge of_address_to_resource() and 
> of_pci_address_to_resource() implementations")
>
> ==
> BUG: KASAN: use-after-free in tipc_recvmsg+0xf77/0xf90 net/tipc/socket.c:1979
> Read of size 4 at addr 8880328cf1c0 by task kworker/u4:0/8
>
> CPU: 1 PID: 8 Comm: kworker/u4:0 Not tainted 5.14.0-rc1-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
> Google 01/01/2011
> Workqueue: tipc_rcv tipc_conn_recv_work
> Call Trace:
>  __dump_stack lib/dump_stack.c:88 [inline]
>  dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:105
>  print_address_description.constprop.0.cold+0x6c/0x309 mm/kasan/report.c:233
>  __kasan_report mm/kasan/report.c:419 [inline]
>  kasan_report.cold+0x83/0xdf mm/kasan/report.c:436
>  tipc_recvmsg+0xf77/0xf90 net/tipc/socket.c:1979
>  sock_recvmsg_nosec net/socket.c:943 [inline]
>  sock_recvmsg net/socket.c:961 [inline]
>  sock_recvmsg+0xca/0x110 net/socket.c:957
>  tipc_conn_rcv_from_sock+0x162/0x2f0 net/tipc/topsrv.c:398
>  tipc_conn_recv_work+0xeb/0x190 net/tipc/topsrv.c:421
>  process_one_work+0x98d/0x1630 kernel/workqueue.c:2276
>  worker_thread+0x658/0x11f0 kernel/workqueue.c:2422
>  kthread+0x3e5/0x4d0 kernel/kthread.c:319
>  ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
>
> Allocated by task 8446:
>  kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
>  kasan_set_track mm/kasan/common.c:46 [inline]
>  set_alloc_info mm/kasan/common.c:434 [inline]
>  __kasan_slab_alloc+0x84/0xa0 mm/kasan/common.c:467
>  kasan_slab_alloc include/linux/kasan.h:253 [inline]
>  slab_post_alloc_hook mm/slab.h:512 [inline]
>  slab_alloc_node mm/slub.c:2981 [inline]
>  kmem_cache_alloc_node+0x266/0x3e0 mm/slub.c:3017
>  __alloc_skb+0x20b/0x340 net/core/skbuff.c:414
>  alloc_skb_fclone include/linux/skbuff.h:1162 [inline]
>  tipc_buf_acquire+0x25/0xe0 net/tipc/msg.c:72
>  tipc_msg_build+0xf7/0x10a0 net/tipc/msg.c:386
>  __tipc_sendstream+0x6d0/0x1150 net/tipc/socket.c:1610
>  tipc_sendstream+0x4c/0x70 net/tipc/socket.c:1541
>  sock_sendmsg_nosec net/socket.c:703 [inline]
>  sock_sendmsg+0xcf/0x120 net/socket.c:723
>  sock_write_iter+0x289/0x3c0 net/socket.c:1056
>  call_write_iter include/linux/fs.h:2114 [inline]
>  new_sync_write+0x426/0x650 fs/read_write.c:518
>  vfs_write+0x75a/0xa40 fs/read_write.c:605
>  ksys_write+0x1ee/0x250 fs/read_write.c:658
>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>  do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
>  entry_SYSCALL_64_after_hwframe+0x44/0xae
>
> Freed by task 8:
>  kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
>  kasan_set_track+0x1c/0x30 mm/kasan/common.c:46
>  kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:360
>  kasan_slab_free mm/kasan/common.c:366 [inline]
>  

[tipc-discussion] [PATCH net-next] tipc: keep the skb in rcv queue until the whole data is read

2021-07-16 Thread Xin Long
Currently, when userspace reads a datagram with a buffer that is
smaller than this datagram, the data will be truncated and only
part of it can be received by users. It doesn't seem right that
users don't know the datagram size and have to use a huge buffer
to read it to avoid the truncation.

This patch to fix it by keeping the skb in rcv queue until the
whole data is read by users. Only the last msg of the datagram
will be marked with MSG_EOR, just as TCP/SCTP does.

Note that this will work as above only when MSG_EOR is set in the
flags parameter of recvmsg(), so that it won't break any old user
applications.

Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 net/tipc/socket.c | 36 +++-
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 34a97ea36cc8..9b0b311c7ec1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
bool connected = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+   struct tipc_skb_cb *skb_cb;
struct sk_buff_head xmitq;
struct tipc_msg *hdr;
struct sk_buff *skb;
@@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
if (unlikely(rc))
goto exit;
skb = skb_peek(>sk_receive_queue);
+   skb_cb = TIPC_SKB_CB(skb);
hdr = buf_msg(skb);
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
@@ -1922,18 +1924,33 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
 
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
-   copy = min_t(int, dlen, buflen);
-   if (unlikely(copy != dlen))
-   m->msg_flags |= MSG_TRUNC;
-   rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+   int offset = skb_cb->bytes_read;
+
+   copy = min_t(int, dlen - offset, buflen);
+   rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+   if (unlikely(rc))
+   goto exit;
+   if (unlikely(offset + copy < dlen)) {
+   if (flags & MSG_EOR) {
+   if (!(flags & MSG_PEEK))
+   skb_cb->bytes_read = offset + copy;
+   } else {
+   m->msg_flags |= MSG_TRUNC;
+   skb_cb->bytes_read = 0;
+   }
+   } else {
+   if (flags & MSG_EOR)
+   m->msg_flags |= MSG_EOR;
+   skb_cb->bytes_read = 0;
+   }
} else {
copy = 0;
rc = 0;
-   if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+   if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
rc = -ECONNRESET;
+   goto exit;
+   }
}
-   if (unlikely(rc))
-   goto exit;
 
/* Mark message as group event if applicable */
if (unlikely(grp_evt)) {
@@ -1956,9 +1973,10 @@ static int tipc_recvmsg(struct socket *sock, struct 
msghdr *m,
tipc_node_distr_xmit(sock_net(sk), );
}
 
-   tsk_advance_rx_queue(sk);
+   if (!skb_cb->bytes_read)
+   tsk_advance_rx_queue(sk);
 
-   if (likely(!connected))
+   if (likely(!connected) || skb_cb->bytes_read)
goto exit;
 
/* Send connection flow control advertisement when applicable */
-- 
2.27.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


  1   2   >