В Пт, 29/05/2015 в 17:20 +0300, Andrew Vagin пишет:
> Acked-by: Andrew Vagin <ava...@odin.com>
> 
> I'm agree that we need to remove this function, but I don't know how it
> fixes the bug.

tcp_v4_kill_ve_sockets() is called when all processes are killed,
and their sockets have been already replaced with time wait sockets.

So, when we are iterating through chain list:

        sk_nulls_for_each(sk, node, &head[i].chain)

we can't find TCP sockets there. Every sk is of type inet_timewait_sock.
Yes, they both are linked in the same table.

struct sock (tcp_sock) and inet_timewait_sock are not a child and a parent:

struct sock {                           struct inet_timewait_sock {
   struct sock_common __sk_common;         struct sock_common __tw_common;
   socket_lock_t          sk_lock;         int                 tw_timeout;

They are different.

So, when we are doing bh_lock_sock(sk) in tcp_v4_kill_ve_sockets(), i.e.

#define bh_lock_sock(__sk)      spin_lock(&((__sk)->sk_lock.slock))

we are trying to manipulate with tw_timeout, which is not zero, and can't
acquire the spinlock.

> On Fri, May 29, 2015 at 04:53:39PM +0300, Kirill Tkhai wrote:
> > This is a leftover from earlier versions of PCS,
> > and we do not need that functionality in 3.10.
> > 
> > It was the reason of the kernel panic in 2.6.32:
> > https://jira.sw.ru/browse/PSBM-33755, in the test
> > of forced CT destroying.
> > 
> > Also, tcp_v4_kill_ve_sockets() looks as the only
> > user of synchronize_net(), so we're not need this
> > function too (the only net hook fini_venet_acct_ip6_stat()
> > from the chain doesn't need it). Besides this, there
> > will be one more synchronize_rcu() later in
> > ve_drop_context().
> > 
> > Signed-off-by: Kirill Tkhai <ktk...@odin.com>
> > ---
> >  include/linux/ve_proto.h |    5 +--
> >  kernel/ve/ve.c           |    5 +--
> >  net/ipv4/tcp_ipv4.c      |   88 
> > ----------------------------------------------
> >  3 files changed, 2 insertions(+), 96 deletions(-)
> > 
> > diff --git a/include/linux/ve_proto.h b/include/linux/ve_proto.h
> > index d491ab0..e77d6f9 100644
> > --- a/include/linux/ve_proto.h
> > +++ b/include/linux/ve_proto.h
> > @@ -50,12 +50,9 @@ typedef void (*ve_seq_print_t)(struct seq_file *, struct 
> > ve_struct *);
> >  void vzmon_register_veaddr_print_cb(ve_seq_print_t);
> >  void vzmon_unregister_veaddr_print_cb(ve_seq_print_t);
> >  
> > -#ifdef CONFIG_INET
> > -void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
> > -#ifdef CONFIG_VE_NETDEV
> > +#if defined(CONFIG_INET) && defined(CONFIG_VE_NETDEV)
> >  int venet_init(void);
> >  #endif
> > -#endif
> >  
> >  extern struct list_head ve_list_head;
> >  #define for_each_ve(ve)    list_for_each_entry((ve), &ve_list_head, 
> > ve_list)
> > diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> > index 55b7d86..3128fdb 100644
> > --- a/kernel/ve/ve.c
> > +++ b/kernel/ve/ve.c
> > @@ -647,10 +647,7 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
> >  
> >     down_write(&ve->op_sem);
> >     ve_hook_iterate_fini(VE_SS_CHAIN, ve);
> > -#ifdef CONFIG_INET
> > -   tcp_v4_kill_ve_sockets(ve);
> > -   synchronize_net();
> > -#endif
> > +
> >     ve_list_del(ve);
> >     ve_drop_context(ve);
> >     up_write(&ve->op_sem);
> > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> > index ad4520c..a8ef57a 100644
> > --- a/net/ipv4/tcp_ipv4.c
> > +++ b/net/ipv4/tcp_ipv4.c
> > @@ -2812,91 +2812,3 @@ void __init tcp_v4_init(void)
> >     if (register_pernet_subsys(&tcp_sk_ops))
> >             panic("Failed to create the TCP control socket.\n");
> >  }
> > -
> > -#ifdef CONFIG_VE
> > -static void tcp_kill_ve_onesk(struct sock *sk)
> > -{
> > -   struct tcp_sock *tp = tcp_sk(sk);
> > -
> > -   /* Check the assumed state of the socket. */
> > -   if (!sock_flag(sk, SOCK_DEAD)) {
> > -           printk(KERN_WARNING "Killing sk: dead %d, state %d, "
> > -                   "wrseq %u unseq %u, wrqu %d.\n",
> > -                   sock_flag(sk, SOCK_DEAD), sk->sk_state,
> > -                   tp->write_seq, tp->snd_una,
> > -                   !skb_queue_empty(&sk->sk_write_queue));
> > -           sk->sk_err = ECONNRESET;
> > -           sk->sk_error_report(sk);
> > -   }
> > -
> > -   tcp_send_active_reset(sk, GFP_ATOMIC);
> > -   switch (sk->sk_state) {
> > -           case TCP_FIN_WAIT1:
> > -           case TCP_CLOSING:
> > -                   /* In these 2 states the peer may want us to retransmit
> > -                    * some data and/or FIN.  Entering "resetting mode"
> > -                    * instead.
> > -                    */
> > -                   tcp_time_wait(sk, TCP_CLOSE, 0);
> > -                   break;
> > -           case TCP_FIN_WAIT2:
> > -                   /* By some reason the socket may stay in this state
> > -                    * without turning into a TW bucket.  Fix it.
> > -                    */
> > -                   tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
> > -                   break;
> > -           default:
> > -                   /* Just jump into CLOSED state. */
> > -                   tcp_done(sk);
> > -                   break;
> > -   }
> > -}
> > -
> > -void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
> > -{
> > -   struct inet_ehash_bucket *head;
> > -   int i, retry;
> > -
> > -   /* alive */
> > -again:
> > -   retry = 0;
> > -   local_bh_disable();
> > -   head = tcp_hashinfo.ehash;
> > -   for (i = 0; i < tcp_hashinfo.ehash_mask; i++) {
> > -           struct sock *sk;
> > -           struct hlist_nulls_node *node;
> > -           spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, i);
> > -more_work:
> > -           spin_lock(lock);
> > -           sk_nulls_for_each(sk, node, &head[i].chain) {
> > -                   if (sock_net(sk)->owner_ve == envid) {
> > -                           sock_hold(sk);
> > -                           spin_unlock(lock);
> > -
> > -                           bh_lock_sock(sk);
> > -                           if (sock_owned_by_user(sk)) {
> > -                                   retry = 1;
> > -                                   bh_unlock_sock(sk);
> > -                                   sock_put(sk);
> > -                                   goto enable_bh;
> > -                           }
> > -                           /* sk might have disappeared from the hash 
> > before
> > -                            * we got the lock */
> > -                           if (sk->sk_state != TCP_CLOSE)
> > -                                   tcp_kill_ve_onesk(sk);
> > -                           bh_unlock_sock(sk);
> > -                           sock_put(sk);
> > -                           goto more_work;
> > -                   }
> > -           }
> > -           spin_unlock(lock);
> > -   }
> > -enable_bh:
> > -   local_bh_enable();
> > -   if (retry) {
> > -           schedule_timeout_interruptible(HZ);
> > -           goto again;
> > -   }
> > -}
> > -EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
> > -#endif
> > 


_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to