[tipc-discussion] [PATCH net] tipc: wait and exit until all work queues are done

2021-05-16 Thread Xin Long
On some host, a crash could be triggered simply by repeating these
commands several times:

  # modprobe tipc
  # tipc bearer enable media udp name UDP1 localip 127.0.0.1
  # rmmod tipc

  [] BUG: unable to handle kernel paging request at c096bb00
  [] Workqueue: events 0xc096bb00
  [] Call Trace:
  []  ? process_one_work+0x1a7/0x360
  []  ? worker_thread+0x30/0x390
  []  ? create_worker+0x1a0/0x1a0
  []  ? kthread+0x116/0x130
  []  ? kthread_flush_work_fn+0x10/0x10
  []  ? ret_from_fork+0x35/0x40

When removing the TIPC module, the UDP tunnel sock will be delayed to
release in a work queue as sock_release() can't be done in rtnl_lock().
If the work queue is schedule to run after the TIPC module is removed,
kernel will crash as the work queue function cleanup_beareri() code no
longer exists when trying to invoke it.

To fix it, this patch introduce a member wq_count in tipc_net to track
the numbers of work queues in schedule, and  wait and exit until all
work queues are done in tipc_exit_net().

Fixes: d0f91938bede ("tipc: add ip/udp media type")
Reported-by: Shuang Li 
Signed-off-by: Xin Long 
Acked-by: Jon Maloy 
---
 net/tipc/core.c  | 2 ++
 net/tipc/core.h  | 2 ++
 net/tipc/udp_media.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5cc1f03..72f3ac7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net)
 #ifdef CONFIG_TIPC_CRYPTO
tipc_crypto_stop(_net(net)->crypto_tx);
 #endif
+   while (atomic_read(>wq_count))
+   cond_resched();
 }
 
 static void __net_exit tipc_pernet_pre_exit(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 03de7b2..5741ae4 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -149,6 +149,8 @@ struct tipc_net {
 #endif
/* Work item for net finalize */
struct tipc_net_work final_work;
+   /* The numbers of work queues in schedule */
+   atomic_t wq_count;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e556d2c..c2bb818 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work)
kfree_rcu(rcast, rcu);
}
 
+   atomic_dec(_net(sock_net(ub->ubsock->sk))->wq_count);
dst_cache_destroy(>rcast.dst_cache);
udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
@@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
RCU_INIT_POINTER(ub->bearer, NULL);
 
/* sock_release need to be done outside of rtnl lock */
+   atomic_inc(_net(sock_net(ub->ubsock->sk))->wq_count);
INIT_WORK(>work, cleanup_bearer);
schedule_work(>work);
 }
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


Re: [tipc-discussion] [PATCH net] tipc: wait and exit until all work queues are done

2021-05-15 Thread Jon Maloy




On 5/14/21 2:40 PM, Xin Long wrote:

On some host, a crash could be triggered simply by repeating these
commands several times:

   # modprobe tipc
   # tipc bearer enable media udp name UDP1 localip 127.0.0.1
   # rmmod tipc

   [] BUG: unable to handle kernel paging request at c096bb00
   [] Workqueue: events 0xc096bb00
   [] Call Trace:
   []  ? process_one_work+0x1a7/0x360
   []  ? worker_thread+0x30/0x390
   []  ? create_worker+0x1a0/0x1a0
   []  ? kthread+0x116/0x130
   []  ? kthread_flush_work_fn+0x10/0x10
   []  ? ret_from_fork+0x35/0x40

When removing the TIPC module, the UDP tunnel sock will be delayed to
release in a work queue as sock_release() can't be done in rtnl_lock().
If the work queue is schedule to run after the TIPC module is removed,
kernel will crash as the work queue function cleanup_beareri() code no
longer exists when trying to invoke it.

To fix it, this patch introduce a member wq_count in tipc_net to track
the numbers of work queues in schedule, and  wait and exit until all
work queues are done in tipc_exit_net().

Reported-by: Shuang Li 
Signed-off-by: Xin Long 
---
  net/tipc/core.c  | 2 ++
  net/tipc/core.h  | 2 ++
  net/tipc/udp_media.c | 2 ++
  3 files changed, 6 insertions(+)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5cc1f03..72f3ac7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net)
  #ifdef CONFIG_TIPC_CRYPTO
tipc_crypto_stop(_net(net)->crypto_tx);
  #endif
+   while (atomic_read(>wq_count))
+   cond_resched();
  }
  
  static void __net_exit tipc_pernet_pre_exit(struct net *net)

diff --git a/net/tipc/core.h b/net/tipc/core.h
index 03de7b2..5741ae4 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -149,6 +149,8 @@ struct tipc_net {
  #endif
/* Work item for net finalize */
struct tipc_net_work final_work;
+   /* The numbers of work queues in schedule */
+   atomic_t wq_count;
  };
  
  static inline struct tipc_net *tipc_net(struct net *net)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e556d2c..c2bb818 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work)
kfree_rcu(rcast, rcu);
}
  
+	atomic_dec(_net(sock_net(ub->ubsock->sk))->wq_count);

dst_cache_destroy(>rcast.dst_cache);
udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
@@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
RCU_INIT_POINTER(ub->bearer, NULL);
  
  	/* sock_release need to be done outside of rtnl lock */

+   atomic_inc(_net(sock_net(ub->ubsock->sk))->wq_count);
INIT_WORK(>work, cleanup_bearer);
schedule_work(>work);
  }


Acked-by: Jon Maloy 



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion


[tipc-discussion] [PATCH net] tipc: wait and exit until all work queues are done

2021-05-14 Thread Xin Long
On some host, a crash could be triggered simply by repeating these
commands several times:

  # modprobe tipc
  # tipc bearer enable media udp name UDP1 localip 127.0.0.1
  # rmmod tipc

  [] BUG: unable to handle kernel paging request at c096bb00
  [] Workqueue: events 0xc096bb00
  [] Call Trace:
  []  ? process_one_work+0x1a7/0x360
  []  ? worker_thread+0x30/0x390
  []  ? create_worker+0x1a0/0x1a0
  []  ? kthread+0x116/0x130
  []  ? kthread_flush_work_fn+0x10/0x10
  []  ? ret_from_fork+0x35/0x40

When removing the TIPC module, the UDP tunnel sock will be delayed to
release in a work queue as sock_release() can't be done in rtnl_lock().
If the work queue is schedule to run after the TIPC module is removed,
kernel will crash as the work queue function cleanup_beareri() code no
longer exists when trying to invoke it.

To fix it, this patch introduce a member wq_count in tipc_net to track
the numbers of work queues in schedule, and  wait and exit until all
work queues are done in tipc_exit_net().

Reported-by: Shuang Li 
Signed-off-by: Xin Long 
---
 net/tipc/core.c  | 2 ++
 net/tipc/core.h  | 2 ++
 net/tipc/udp_media.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5cc1f03..72f3ac7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net)
 #ifdef CONFIG_TIPC_CRYPTO
tipc_crypto_stop(_net(net)->crypto_tx);
 #endif
+   while (atomic_read(>wq_count))
+   cond_resched();
 }
 
 static void __net_exit tipc_pernet_pre_exit(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 03de7b2..5741ae4 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -149,6 +149,8 @@ struct tipc_net {
 #endif
/* Work item for net finalize */
struct tipc_net_work final_work;
+   /* The numbers of work queues in schedule */
+   atomic_t wq_count;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e556d2c..c2bb818 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work)
kfree_rcu(rcast, rcu);
}
 
+   atomic_dec(_net(sock_net(ub->ubsock->sk))->wq_count);
dst_cache_destroy(>rcast.dst_cache);
udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
@@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
RCU_INIT_POINTER(ub->bearer, NULL);
 
/* sock_release need to be done outside of rtnl lock */
+   atomic_inc(_net(sock_net(ub->ubsock->sk))->wq_count);
INIT_WORK(>work, cleanup_bearer);
schedule_work(>work);
 }
-- 
2.1.0



___
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion