[PATCH 3.4 116/125] sctp: Prevent soft lockup when sctp_accept() is called during a timeout event

2016-10-12 Thread lizf
From: Karl Heiss 

3.4.113-rc1 review patch.  If anyone has any objections, please let me know.

--


commit 635682a14427d241bab7bbdeebb48a7d7b91638e upstream.

A case can occur when sctp_accept() is called by the user during
a heartbeat timeout event after the 4-way handshake.  Since
sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the
bh_sock_lock in sctp_generate_heartbeat_event() will be taken with
the listening socket but released with the new association socket.
The result is a deadlock on any future attempts to take the listening
socket lock.

Note that this race can occur with other SCTP timeouts that take
the bh_lock_sock() in the event sctp_accept() is called.

 BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0]
 ...
 RIP: 0010:[]  [] _spin_lock+0x1e/0x30
 RSP: 0018:880028323b20  EFLAGS: 0206
 RAX: 0002 RBX: 880028323b20 RCX: 
 RDX:  RSI: 880028323be0 RDI: 8804632c4b48
 RBP: 8100bb93 R08:  R09: 
 R10: 880610662280 R11: 0100 R12: 880028323aa0
 R13: 8804383c3880 R14: 880028323a90 R15: 81534225
 FS:  () GS:88002832() knlGS:
 CS:  0010 DS: 0018 ES: 0018 CR0: 8005003b
 CR2: 006df528 CR3: 01a85000 CR4: 06e0
 DR0:  DR1:  DR2: 
 DR3:  DR6: 0ff0 DR7: 0400
 Process swapper (pid: 0, threadinfo 880616b7, task 880616b6cab0)
 Stack:
 880028323c40 a01c2582 880614cfb020 
  0100 0014383a6c44 8804383c3880 880614e93c00
  880614e93c00  8804632c4b00 8804383c38b8
 Call Trace:
 
 [] ? sctp_rcv+0x492/0xa10 [sctp]
 [] ? nf_iterate+0x69/0xb0
 [] ? ip_local_deliver_finish+0x0/0x2d0
 [] ? nf_hook_slow+0x76/0x120
 [] ? ip_local_deliver_finish+0x0/0x2d0
 [] ? ip_local_deliver_finish+0xdd/0x2d0
 [] ? ip_local_deliver+0x98/0xa0
 [] ? ip_rcv_finish+0x12d/0x440
 [] ? ip_rcv+0x275/0x350
 [] ? __netif_receive_skb+0x4ab/0x750
 ...

With lockdep debugging:

 =
 [ BUG: bad unlock balance detected! ]
 -
 CslRx/12087 is trying to release lock (slock-AF_INET) at:
 [] sctp_generate_timeout_event+0x40/0xe0 [sctp]
 but there are no more locks to release!

 other info that might help us debug this:
 2 locks held by CslRx/12087:
 #0:  (>timers[i]){+.-...}, at: [] 
run_timer_softirq+0x16f/0x3e0
 #1:  (slock-AF_INET){+.-...}, at: [] 
sctp_generate_timeout_event+0x23/0xe0 [sctp]

Ensure the socket taken is also the same one that is released by
saving a copy of the socket before entering the timeout event
critical section.

Signed-off-by: Karl Heiss 
Signed-off-by: David S. Miller 
[bwh: Backported to 3.2:
 - Net namespaces are not used
 - Keep using sctp_bh_{,un}lock_sock()
 - Adjust context]
Signed-off-by: Ben Hutchings 
Signed-off-by: Zefan Li 
---
 net/sctp/sm_sideeffect.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 5fa033a..06c75b1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -249,11 +249,12 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
int error;
struct sctp_transport *transport = (struct sctp_transport *) peer;
struct sctp_association *asoc = transport->asoc;
+   struct sock *sk = asoc->base.sk;
 
/* Check whether a task is in the sock.  */
 
-   sctp_bh_lock_sock(asoc->base.sk);
-   if (sock_owned_by_user(asoc->base.sk)) {
+   sctp_bh_lock_sock(sk);
+   if (sock_owned_by_user(sk)) {
SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__);
 
/* Try again later.  */
@@ -276,10 +277,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
   transport, GFP_ATOMIC);
 
if (error)
-   asoc->base.sk->sk_err = -error;
+   sk->sk_err = -error;
 
 out_unlock:
-   sctp_bh_unlock_sock(asoc->base.sk);
+   sctp_bh_unlock_sock(sk);
sctp_transport_put(transport);
 }
 
@@ -289,10 +290,11 @@ out_unlock:
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
sctp_event_timeout_t timeout_type)
 {
+   struct sock *sk = asoc->base.sk;
int error = 0;
 
-   sctp_bh_lock_sock(asoc->base.sk);
-   if (sock_owned_by_user(asoc->base.sk)) {
+   sctp_bh_lock_sock(sk);
+   if (sock_owned_by_user(sk)) {
SCTP_DEBUG_PRINTK("%s:Sock is busy: timer %d\n",
  __func__,
  timeout_type);
@@ -316,10 +318,10 @@ static 

[PATCH 3.4 116/125] sctp: Prevent soft lockup when sctp_accept() is called during a timeout event

2016-10-12 Thread lizf
From: Karl Heiss 

3.4.113-rc1 review patch.  If anyone has any objections, please let me know.

--


commit 635682a14427d241bab7bbdeebb48a7d7b91638e upstream.

A case can occur when sctp_accept() is called by the user during
a heartbeat timeout event after the 4-way handshake.  Since
sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the
bh_sock_lock in sctp_generate_heartbeat_event() will be taken with
the listening socket but released with the new association socket.
The result is a deadlock on any future attempts to take the listening
socket lock.

Note that this race can occur with other SCTP timeouts that take
the bh_lock_sock() in the event sctp_accept() is called.

 BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0]
 ...
 RIP: 0010:[]  [] _spin_lock+0x1e/0x30
 RSP: 0018:880028323b20  EFLAGS: 0206
 RAX: 0002 RBX: 880028323b20 RCX: 
 RDX:  RSI: 880028323be0 RDI: 8804632c4b48
 RBP: 8100bb93 R08:  R09: 
 R10: 880610662280 R11: 0100 R12: 880028323aa0
 R13: 8804383c3880 R14: 880028323a90 R15: 81534225
 FS:  () GS:88002832() knlGS:
 CS:  0010 DS: 0018 ES: 0018 CR0: 8005003b
 CR2: 006df528 CR3: 01a85000 CR4: 06e0
 DR0:  DR1:  DR2: 
 DR3:  DR6: 0ff0 DR7: 0400
 Process swapper (pid: 0, threadinfo 880616b7, task 880616b6cab0)
 Stack:
 880028323c40 a01c2582 880614cfb020 
  0100 0014383a6c44 8804383c3880 880614e93c00
  880614e93c00  8804632c4b00 8804383c38b8
 Call Trace:
 
 [] ? sctp_rcv+0x492/0xa10 [sctp]
 [] ? nf_iterate+0x69/0xb0
 [] ? ip_local_deliver_finish+0x0/0x2d0
 [] ? nf_hook_slow+0x76/0x120
 [] ? ip_local_deliver_finish+0x0/0x2d0
 [] ? ip_local_deliver_finish+0xdd/0x2d0
 [] ? ip_local_deliver+0x98/0xa0
 [] ? ip_rcv_finish+0x12d/0x440
 [] ? ip_rcv+0x275/0x350
 [] ? __netif_receive_skb+0x4ab/0x750
 ...

With lockdep debugging:

 =
 [ BUG: bad unlock balance detected! ]
 -
 CslRx/12087 is trying to release lock (slock-AF_INET) at:
 [] sctp_generate_timeout_event+0x40/0xe0 [sctp]
 but there are no more locks to release!

 other info that might help us debug this:
 2 locks held by CslRx/12087:
 #0:  (>timers[i]){+.-...}, at: [] 
run_timer_softirq+0x16f/0x3e0
 #1:  (slock-AF_INET){+.-...}, at: [] 
sctp_generate_timeout_event+0x23/0xe0 [sctp]

Ensure the socket taken is also the same one that is released by
saving a copy of the socket before entering the timeout event
critical section.

Signed-off-by: Karl Heiss 
Signed-off-by: David S. Miller 
[bwh: Backported to 3.2:
 - Net namespaces are not used
 - Keep using sctp_bh_{,un}lock_sock()
 - Adjust context]
Signed-off-by: Ben Hutchings 
Signed-off-by: Zefan Li 
---
 net/sctp/sm_sideeffect.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 5fa033a..06c75b1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -249,11 +249,12 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
int error;
struct sctp_transport *transport = (struct sctp_transport *) peer;
struct sctp_association *asoc = transport->asoc;
+   struct sock *sk = asoc->base.sk;
 
/* Check whether a task is in the sock.  */
 
-   sctp_bh_lock_sock(asoc->base.sk);
-   if (sock_owned_by_user(asoc->base.sk)) {
+   sctp_bh_lock_sock(sk);
+   if (sock_owned_by_user(sk)) {
SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__);
 
/* Try again later.  */
@@ -276,10 +277,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
   transport, GFP_ATOMIC);
 
if (error)
-   asoc->base.sk->sk_err = -error;
+   sk->sk_err = -error;
 
 out_unlock:
-   sctp_bh_unlock_sock(asoc->base.sk);
+   sctp_bh_unlock_sock(sk);
sctp_transport_put(transport);
 }
 
@@ -289,10 +290,11 @@ out_unlock:
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
sctp_event_timeout_t timeout_type)
 {
+   struct sock *sk = asoc->base.sk;
int error = 0;
 
-   sctp_bh_lock_sock(asoc->base.sk);
-   if (sock_owned_by_user(asoc->base.sk)) {
+   sctp_bh_lock_sock(sk);
+   if (sock_owned_by_user(sk)) {
SCTP_DEBUG_PRINTK("%s:Sock is busy: timer %d\n",
  __func__,
  timeout_type);
@@ -316,10 +318,10 @@ static void sctp_generate_timeout_event(struct 
sctp_association *asoc,
   (void