В Пт, 29/05/2015 в 17:20 +0300, Andrew Vagin пишет: > Acked-by: Andrew Vagin <ava...@odin.com> > > I'm agree that we need to remove this function, but I don't know how it > fixes the bug.
tcp_v4_kill_ve_sockets() is called when all processes are killed, and their sockets have been already replaced with time wait sockets. So, when we are iterating through chain list: sk_nulls_for_each(sk, node, &head[i].chain) we can't find TCP sockets there. Every sk is of type inet_timewait_sock. Yes, they both are linked in the same table. struct sock (tcp_sock) and inet_timewait_sock are not a child and a parent: struct sock { struct inet_timewait_sock { struct sock_common __sk_common; struct sock_common __tw_common; socket_lock_t sk_lock; int tw_timeout; They are different. So, when we are doing bh_lock_sock(sk) in tcp_v4_kill_ve_sockets(), i.e. #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) we are trying to manipulate with tw_timeout, which is not zero, and can't acquire the spinlock. > On Fri, May 29, 2015 at 04:53:39PM +0300, Kirill Tkhai wrote: > > This is a leftover from earlier versions of PCS, > > and we do not need that functionality in 3.10. > > > > It was the reason of the kernel panic in 2.6.32: > > https://jira.sw.ru/browse/PSBM-33755, in the test > > of forced CT destroying. > > > > Also, tcp_v4_kill_ve_sockets() looks as the only > > user of synchronize_net(), so we're not need this > > function too (the only net hook fini_venet_acct_ip6_stat() > > from the chain doesn't need it). Besides this, there > > will be one more synchronize_rcu() later in > > ve_drop_context(). > > > > Signed-off-by: Kirill Tkhai <ktk...@odin.com> > > --- > > include/linux/ve_proto.h | 5 +-- > > kernel/ve/ve.c | 5 +-- > > net/ipv4/tcp_ipv4.c | 88 > > ---------------------------------------------- > > 3 files changed, 2 insertions(+), 96 deletions(-) > > > > diff --git a/include/linux/ve_proto.h b/include/linux/ve_proto.h > > index d491ab0..e77d6f9 100644 > > --- a/include/linux/ve_proto.h > > +++ b/include/linux/ve_proto.h > > @@ -50,12 +50,9 @@ typedef void (*ve_seq_print_t)(struct seq_file *, struct > > ve_struct *); > > void vzmon_register_veaddr_print_cb(ve_seq_print_t); > > void vzmon_unregister_veaddr_print_cb(ve_seq_print_t); > > > > -#ifdef CONFIG_INET > > -void tcp_v4_kill_ve_sockets(struct ve_struct *envid); > > -#ifdef CONFIG_VE_NETDEV > > +#if defined(CONFIG_INET) && defined(CONFIG_VE_NETDEV) > > int venet_init(void); > > #endif > > -#endif > > > > extern struct list_head ve_list_head; > > #define for_each_ve(ve) list_for_each_entry((ve), &ve_list_head, > > ve_list) > > diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c > > index 55b7d86..3128fdb 100644 > > --- a/kernel/ve/ve.c > > +++ b/kernel/ve/ve.c > > @@ -647,10 +647,7 @@ void ve_exit_ns(struct pid_namespace *pid_ns) > > > > down_write(&ve->op_sem); > > ve_hook_iterate_fini(VE_SS_CHAIN, ve); > > -#ifdef CONFIG_INET > > - tcp_v4_kill_ve_sockets(ve); > > - synchronize_net(); > > -#endif > > + > > ve_list_del(ve); > > ve_drop_context(ve); > > up_write(&ve->op_sem); > > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > > index ad4520c..a8ef57a 100644 > > --- a/net/ipv4/tcp_ipv4.c > > +++ b/net/ipv4/tcp_ipv4.c > > @@ -2812,91 +2812,3 @@ void __init tcp_v4_init(void) > > if (register_pernet_subsys(&tcp_sk_ops)) > > panic("Failed to create the TCP control socket.\n"); > > } > > - > > -#ifdef CONFIG_VE > > -static void tcp_kill_ve_onesk(struct sock *sk) > > -{ > > - struct tcp_sock *tp = tcp_sk(sk); > > - > > - /* Check the assumed state of the socket. */ > > - if (!sock_flag(sk, SOCK_DEAD)) { > > - printk(KERN_WARNING "Killing sk: dead %d, state %d, " > > - "wrseq %u unseq %u, wrqu %d.\n", > > - sock_flag(sk, SOCK_DEAD), sk->sk_state, > > - tp->write_seq, tp->snd_una, > > - !skb_queue_empty(&sk->sk_write_queue)); > > - sk->sk_err = ECONNRESET; > > - sk->sk_error_report(sk); > > - } > > - > > - tcp_send_active_reset(sk, GFP_ATOMIC); > > - switch (sk->sk_state) { > > - case TCP_FIN_WAIT1: > > - case TCP_CLOSING: > > - /* In these 2 states the peer may want us to retransmit > > - * some data and/or FIN. Entering "resetting mode" > > - * instead. > > - */ > > - tcp_time_wait(sk, TCP_CLOSE, 0); > > - break; > > - case TCP_FIN_WAIT2: > > - /* By some reason the socket may stay in this state > > - * without turning into a TW bucket. Fix it. > > - */ > > - tcp_time_wait(sk, TCP_FIN_WAIT2, 0); > > - break; > > - default: > > - /* Just jump into CLOSED state. */ > > - tcp_done(sk); > > - break; > > - } > > -} > > - > > -void tcp_v4_kill_ve_sockets(struct ve_struct *envid) > > -{ > > - struct inet_ehash_bucket *head; > > - int i, retry; > > - > > - /* alive */ > > -again: > > - retry = 0; > > - local_bh_disable(); > > - head = tcp_hashinfo.ehash; > > - for (i = 0; i < tcp_hashinfo.ehash_mask; i++) { > > - struct sock *sk; > > - struct hlist_nulls_node *node; > > - spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, i); > > -more_work: > > - spin_lock(lock); > > - sk_nulls_for_each(sk, node, &head[i].chain) { > > - if (sock_net(sk)->owner_ve == envid) { > > - sock_hold(sk); > > - spin_unlock(lock); > > - > > - bh_lock_sock(sk); > > - if (sock_owned_by_user(sk)) { > > - retry = 1; > > - bh_unlock_sock(sk); > > - sock_put(sk); > > - goto enable_bh; > > - } > > - /* sk might have disappeared from the hash > > before > > - * we got the lock */ > > - if (sk->sk_state != TCP_CLOSE) > > - tcp_kill_ve_onesk(sk); > > - bh_unlock_sock(sk); > > - sock_put(sk); > > - goto more_work; > > - } > > - } > > - spin_unlock(lock); > > - } > > -enable_bh: > > - local_bh_enable(); > > - if (retry) { > > - schedule_timeout_interruptible(HZ); > > - goto again; > > - } > > -} > > -EXPORT_SYMBOL(tcp_v4_kill_ve_sockets); > > -#endif > > _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel