Re: [PATCH v2 03/31] net: Introduce net_sem for protection of pernet_list

2018-01-18 Thread Kirill Tkhai
On 17.01.2018 23:04, Andrei Vagin wrote:
> On Mon, Nov 20, 2017 at 09:32:34PM +0300, Kirill Tkhai wrote:
>> Curently mutex is used to protect pernet operations list. It makes
>> cleanup_net() to execute ->exit methods of the same operations set,
>> which was used on the time of ->init, even after net namespace is
>> unlinked from net_namespace_list.
>>
>> But the problem is it's need to synchronize_rcu() after net is removed
>> from net_namespace_list():
>>
>> Destroy net_ns:
>> cleanup_net()
>>   mutex_lock(&net_mutex)
>>   list_del_rcu(&net->list)
>>   synchronize_rcu()  <--- Sleep there for 
>> ages
>>   list_for_each_entry_reverse(ops, &pernet_list, list)
>> ops_exit_list(ops, &net_exit_list)
>>   list_for_each_entry_reverse(ops, &pernet_list, list)
>> ops_free_list(ops, &net_exit_list)
>>   mutex_unlock(&net_mutex)
>>
>> This primitive is not fast, especially on the systems with many processors
>> and/or when preemptible RCU is enabled in config. So, all the time, while
>> cleanup_net() is waiting for RCU grace period, creation of new net namespaces
>> is not possible, the tasks, who makes it, are sleeping on the same mutex:
>>
>> Create net_ns:
>> copy_net_ns()
>>   mutex_lock_killable(&net_mutex)<--- Sleep there for 
>> ages
>>
>> I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop
>> with preemptible RCU enabled.
>>
>> The solution is to convert net_mutex to the rw_semaphore and add small locks
>> to really small number of pernet_operations, what really need them. Then,
>> pernet_operations::init/::exit methods, modifying the net-related data,
>> will require down_read() locking only, while down_write() will be used
>> for changing pernet_list.
>>
>> This gives signify performance increase, after all patch set is applied,
>> like you may see here:
>>
>> %for i in {1..1}; do unshare -n bash -c exit; done
>>
>> *before*
>> real 1m40,377s
>> user 0m9,672s
>> sys 0m19,928s
>>
>> *after*
>> real 0m17,007s
>> user 0m5,311s
>> sys 0m11,779
>>
>> (5.8 times faster)
>>
>> This patch starts replacing net_mutex to net_sem. It adds rw_semaphore,
>> describes the variables it protects, and makes to use where appropriate.
>> net_mutex is still present, and next patches will kick it out step-by-step.
>>
>> Signed-off-by: Kirill Tkhai 
>> ---
>>  include/linux/rtnetlink.h |1 +
>>  net/core/net_namespace.c  |   39 ++-
>>  net/core/rtnetlink.c  |4 ++--
>>  3 files changed, 29 insertions(+), 15 deletions(-)
>>
>> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
>> index 2032ce2eb20b..f640fc87fe1d 100644
>> --- a/include/linux/rtnetlink.h
>> +++ b/include/linux/rtnetlink.h
>> @@ -35,6 +35,7 @@ extern int rtnl_is_locked(void);
>>  
>>  extern wait_queue_head_t netdev_unregistering_wq;
>>  extern struct mutex net_mutex;
>> +extern struct rw_semaphore net_sem;
>>  
>>  #ifdef CONFIG_PROVE_LOCKING
>>  extern bool lockdep_rtnl_is_held(void);
>> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
>> index 2e512965bf42..859dce31e37e 100644
>> --- a/net/core/net_namespace.c
>> +++ b/net/core/net_namespace.c
>> @@ -41,6 +41,11 @@ struct net init_net = {
> 
>> static LIST_HEAD(pernet_list);
>> static struct list_head *first_device = &pernet_list;
>> DEFINE_MUTEX(net_mutex);
> 
> With all patches, we still have the net_mutex, I think we need to add a
> comment, which explains why we need it. Are "sync" pernet operations
> depricated after this series? Or is it ok to have them?
 
net_mutex will leave till the time all pernet_operations are converted.
But people, who don't use unconverted operations, will have performance
profit already. Comment is not a problem :)

Thanks, Kirill
 
>>  EXPORT_SYMBOL(init_net);
>>  
>>  static bool init_net_initialized;
>> +/*
>> + * net_sem: protects: pernet_list, net_generic_ids,
>> + * init_net_initialized and first_device pointer.
>> + */
>> +DECLARE_RWSEM(net_sem);
>>  
>>  #define MIN_PERNET_OPS_ID   \
>>  ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
>> @@ -279,7 +284,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
>>   */
>>  static __net_init int setup_net(struct net *net, struct user_namespace 
>> *user_ns)
>>  {
>> -/* Must be called with net_mutex held */
>> +/* Must be called with net_sem held */
>>  const struct pernet_operations *ops, *saved_ops;
>>  int error = 0;
>>  LIST_HEAD(net_exit_list);
>> @@ -411,12 +416,16 @@ struct net *copy_net_ns(unsigned long flags,
>>  net->ucounts = ucounts;
>>  get_user_ns(user_ns);
>>  
>> -rv = mutex_lock_killable(&net_mutex);
>> +rv = down_read_killable(&net_sem);
>>  if (rv < 0)
>>  goto put_userns;
>> -
>> +rv = mutex_lock_killable(&net_mutex);
>> +if (rv < 0)
>> +goto up_read;
>>  rv = setup_net(net, user_ns);
>>  mutex_unlock(&net_mutex);
>> +up_read:
>> +up_read

Re: [PATCH v2 03/31] net: Introduce net_sem for protection of pernet_list

2018-01-17 Thread Andrei Vagin
On Mon, Nov 20, 2017 at 09:32:34PM +0300, Kirill Tkhai wrote:
> Curently mutex is used to protect pernet operations list. It makes
> cleanup_net() to execute ->exit methods of the same operations set,
> which was used on the time of ->init, even after net namespace is
> unlinked from net_namespace_list.
> 
> But the problem is it's need to synchronize_rcu() after net is removed
> from net_namespace_list():
> 
> Destroy net_ns:
> cleanup_net()
>   mutex_lock(&net_mutex)
>   list_del_rcu(&net->list)
>   synchronize_rcu()  <--- Sleep there for ages
>   list_for_each_entry_reverse(ops, &pernet_list, list)
> ops_exit_list(ops, &net_exit_list)
>   list_for_each_entry_reverse(ops, &pernet_list, list)
> ops_free_list(ops, &net_exit_list)
>   mutex_unlock(&net_mutex)
> 
> This primitive is not fast, especially on the systems with many processors
> and/or when preemptible RCU is enabled in config. So, all the time, while
> cleanup_net() is waiting for RCU grace period, creation of new net namespaces
> is not possible, the tasks, who makes it, are sleeping on the same mutex:
> 
> Create net_ns:
> copy_net_ns()
>   mutex_lock_killable(&net_mutex)<--- Sleep there for ages
> 
> I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop
> with preemptible RCU enabled.
> 
> The solution is to convert net_mutex to the rw_semaphore and add small locks
> to really small number of pernet_operations, what really need them. Then,
> pernet_operations::init/::exit methods, modifying the net-related data,
> will require down_read() locking only, while down_write() will be used
> for changing pernet_list.
> 
> This gives signify performance increase, after all patch set is applied,
> like you may see here:
> 
> %for i in {1..1}; do unshare -n bash -c exit; done
> 
> *before*
> real 1m40,377s
> user 0m9,672s
> sys 0m19,928s
> 
> *after*
> real 0m17,007s
> user 0m5,311s
> sys 0m11,779
> 
> (5.8 times faster)
> 
> This patch starts replacing net_mutex to net_sem. It adds rw_semaphore,
> describes the variables it protects, and makes to use where appropriate.
> net_mutex is still present, and next patches will kick it out step-by-step.
> 
> Signed-off-by: Kirill Tkhai 
> ---
>  include/linux/rtnetlink.h |1 +
>  net/core/net_namespace.c  |   39 ++-
>  net/core/rtnetlink.c  |4 ++--
>  3 files changed, 29 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
> index 2032ce2eb20b..f640fc87fe1d 100644
> --- a/include/linux/rtnetlink.h
> +++ b/include/linux/rtnetlink.h
> @@ -35,6 +35,7 @@ extern int rtnl_is_locked(void);
>  
>  extern wait_queue_head_t netdev_unregistering_wq;
>  extern struct mutex net_mutex;
> +extern struct rw_semaphore net_sem;
>  
>  #ifdef CONFIG_PROVE_LOCKING
>  extern bool lockdep_rtnl_is_held(void);
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 2e512965bf42..859dce31e37e 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -41,6 +41,11 @@ struct net init_net = {

> static LIST_HEAD(pernet_list);
> static struct list_head *first_device = &pernet_list;
> DEFINE_MUTEX(net_mutex);

With all patches, we still have the net_mutex, I think we need to add a
comment, which explains why we need it. Are "sync" pernet operations
depricated after this series? Or is it ok to have them?


>  EXPORT_SYMBOL(init_net);
>  
>  static bool init_net_initialized;
> +/*
> + * net_sem: protects: pernet_list, net_generic_ids,
> + * init_net_initialized and first_device pointer.
> + */
> +DECLARE_RWSEM(net_sem);
>  
>  #define MIN_PERNET_OPS_ID\
>   ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
> @@ -279,7 +284,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
>   */
>  static __net_init int setup_net(struct net *net, struct user_namespace 
> *user_ns)
>  {
> - /* Must be called with net_mutex held */
> + /* Must be called with net_sem held */
>   const struct pernet_operations *ops, *saved_ops;
>   int error = 0;
>   LIST_HEAD(net_exit_list);
> @@ -411,12 +416,16 @@ struct net *copy_net_ns(unsigned long flags,
>   net->ucounts = ucounts;
>   get_user_ns(user_ns);
>  
> - rv = mutex_lock_killable(&net_mutex);
> + rv = down_read_killable(&net_sem);
>   if (rv < 0)
>   goto put_userns;
> -
> + rv = mutex_lock_killable(&net_mutex);
> + if (rv < 0)
> + goto up_read;
>   rv = setup_net(net, user_ns);
>   mutex_unlock(&net_mutex);
> +up_read:
> + up_read(&net_sem);
>   if (rv < 0) {
>  put_userns:
>   put_user_ns(user_ns);
> @@ -443,6 +452,7 @@ static void cleanup_net(struct work_struct *work)
>   list_replace_init(&cleanup_list, &net_kill_list);
>   spin_unlock_irq(&cleanup_list_lock);
>  
> + down_read(&net_sem);
>   mutex_lock(&net_mutex);
>  
>   /* Don't let anyo

[PATCH v2 03/31] net: Introduce net_sem for protection of pernet_list

2017-11-20 Thread Kirill Tkhai
Curently mutex is used to protect pernet operations list. It makes
cleanup_net() to execute ->exit methods of the same operations set,
which was used on the time of ->init, even after net namespace is
unlinked from net_namespace_list.

But the problem is it's need to synchronize_rcu() after net is removed
from net_namespace_list():

Destroy net_ns:
cleanup_net()
  mutex_lock(&net_mutex)
  list_del_rcu(&net->list)
  synchronize_rcu()  <--- Sleep there for ages
  list_for_each_entry_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list)
  list_for_each_entry_reverse(ops, &pernet_list, list)
ops_free_list(ops, &net_exit_list)
  mutex_unlock(&net_mutex)

This primitive is not fast, especially on the systems with many processors
and/or when preemptible RCU is enabled in config. So, all the time, while
cleanup_net() is waiting for RCU grace period, creation of new net namespaces
is not possible, the tasks, who makes it, are sleeping on the same mutex:

Create net_ns:
copy_net_ns()
  mutex_lock_killable(&net_mutex)<--- Sleep there for ages

I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop
with preemptible RCU enabled.

The solution is to convert net_mutex to the rw_semaphore and add small locks
to really small number of pernet_operations, what really need them. Then,
pernet_operations::init/::exit methods, modifying the net-related data,
will require down_read() locking only, while down_write() will be used
for changing pernet_list.

This gives signify performance increase, after all patch set is applied,
like you may see here:

%for i in {1..1}; do unshare -n bash -c exit; done

*before*
real 1m40,377s
user 0m9,672s
sys 0m19,928s

*after*
real 0m17,007s
user 0m5,311s
sys 0m11,779

(5.8 times faster)

This patch starts replacing net_mutex to net_sem. It adds rw_semaphore,
describes the variables it protects, and makes to use where appropriate.
net_mutex is still present, and next patches will kick it out step-by-step.

Signed-off-by: Kirill Tkhai 
---
 include/linux/rtnetlink.h |1 +
 net/core/net_namespace.c  |   39 ++-
 net/core/rtnetlink.c  |4 ++--
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 2032ce2eb20b..f640fc87fe1d 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -35,6 +35,7 @@ extern int rtnl_is_locked(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
 extern struct mutex net_mutex;
+extern struct rw_semaphore net_sem;
 
 #ifdef CONFIG_PROVE_LOCKING
 extern bool lockdep_rtnl_is_held(void);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2e512965bf42..859dce31e37e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -41,6 +41,11 @@ struct net init_net = {
 EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
+/*
+ * net_sem: protects: pernet_list, net_generic_ids,
+ * init_net_initialized and first_device pointer.
+ */
+DECLARE_RWSEM(net_sem);
 
 #define MIN_PERNET_OPS_ID  \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -279,7 +284,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
  */
 static __net_init int setup_net(struct net *net, struct user_namespace 
*user_ns)
 {
-   /* Must be called with net_mutex held */
+   /* Must be called with net_sem held */
const struct pernet_operations *ops, *saved_ops;
int error = 0;
LIST_HEAD(net_exit_list);
@@ -411,12 +416,16 @@ struct net *copy_net_ns(unsigned long flags,
net->ucounts = ucounts;
get_user_ns(user_ns);
 
-   rv = mutex_lock_killable(&net_mutex);
+   rv = down_read_killable(&net_sem);
if (rv < 0)
goto put_userns;
-
+   rv = mutex_lock_killable(&net_mutex);
+   if (rv < 0)
+   goto up_read;
rv = setup_net(net, user_ns);
mutex_unlock(&net_mutex);
+up_read:
+   up_read(&net_sem);
if (rv < 0) {
 put_userns:
put_user_ns(user_ns);
@@ -443,6 +452,7 @@ static void cleanup_net(struct work_struct *work)
list_replace_init(&cleanup_list, &net_kill_list);
spin_unlock_irq(&cleanup_list_lock);
 
+   down_read(&net_sem);
mutex_lock(&net_mutex);
 
/* Don't let anyone else find us. */
@@ -484,6 +494,7 @@ static void cleanup_net(struct work_struct *work)
ops_free_list(ops, &net_exit_list);
 
mutex_unlock(&net_mutex);
+   up_read(&net_sem);
 
/* Ensure there are no outstanding rcu callbacks using this
 * network namespace.
@@ -510,8 +521,10 @@ static void cleanup_net(struct work_struct *work)
  */
 void net_ns_barrier(void)
 {
+   down_write(&net_sem);
mutex_lock(&net_mutex);
mutex_unlock(&net_mutex);
+   up_write(&net_sem);
 }
 EXPORT_SYMBOL(net_ns_barrier);
 
@@ -838,12 +851