Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-18 Thread Jamal Hadi Salim

On 16-06-17 01:31 PM, Cong Wang wrote:


My patch is against -net. (I see you already figured out your patch is
missing in -net-next.)



Ok, should have re-read this email before working on the patch;->


Or are you suggesting to rebase it for -net-next? I think it fixes some real
bug so -net is better, although it is slightly large as a bug fix.



I am conflicted. There are a lot of changes in net-next at the moment;
adding this to -net seems like will definetely cause merge issues for
Dave.

Ok, Cong - patch attached and tested. Let me know what you think.

cheers,
jamal


diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index b7fa969..b52deb4 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -239,8 +239,7 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, 
void *val, int len)
return ret;
 }
 
-/* called when adding new meta information
- * under ife->tcf_lock
+/* called to find new metadata ops. Possibly load it as a module.
 */
 static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
void *val, int len)
@@ -251,11 +250,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
if (!ops) {
ret = -ENOENT;
 #ifdef CONFIG_MODULES
-   spin_unlock_bh(>tcf_lock);
rtnl_unlock();
request_module("ifemeta%u", metaid);
rtnl_lock();
-   spin_lock_bh(>tcf_lock);
ops = find_ife_oplist(metaid);
 #endif
}
@@ -272,7 +269,6 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
 */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
int len)
@@ -302,7 +298,9 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
}
}
 
+   spin_lock_bh(>tcf_lock);
list_add_tail(>metalist, >metalist);
+   spin_unlock_bh(>tcf_lock);
 
return ret;
 }
@@ -357,7 +355,6 @@ out_nlmsg_trim:
return -1;
 }
 
-/* under ife->tcf_lock */
 static void _tcf_ife_cleanup(struct tc_action *a, int bind)
 {
struct tcf_ife_info *ife = a->priv;
@@ -385,7 +382,6 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind)
spin_unlock_bh(>tcf_lock);
 }
 
-/* under ife->tcf_lock */
 static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
 {
int len = 0;
@@ -465,6 +461,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
 
ife = to_ife(a);
+   if (exists)
+   spin_lock_bh(>tcf_lock);
ife->flags = parm->flags;
 
if (parm->flags & IFE_ENCODE) {
@@ -475,10 +473,9 @@ static int tcf_ife_init(struct net *net, struct nlattr 
*nla,
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
 
-   spin_lock_bh(>tcf_lock);
ife->tcf_action = parm->action;
-
if (parm->flags & IFE_ENCODE) {
+
if (daddr)
ether_addr_copy(ife->eth_dst, daddr);
else
@@ -492,6 +489,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
ife->eth_type = ife_type;
}
 
+   if (exists)
+   spin_unlock_bh(>tcf_lock);
+
if (ret == ACT_P_CREATED)
INIT_LIST_HEAD(>metalist);
 
@@ -505,7 +505,6 @@ metadata_parse_err:
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
 
@@ -524,13 +523,10 @@ metadata_parse_err:
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
}
 
-   spin_unlock_bh(>tcf_lock);
-
if (ret == ACT_P_CREATED)
tcf_hash_insert(tn, a);
 


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-18 Thread Jamal Hadi Salim

On 16-06-17 01:31 PM, Cong Wang wrote:


My patch is against -net. (I see you already figured out your patch is
missing in -net-next.)



Ok, should have re-read this email before working on the patch;->


Or are you suggesting to rebase it for -net-next? I think it fixes some real
bug so -net is better, although it is slightly large as a bug fix.



I am conflicted. There are a lot of changes in net-next at the moment;
adding this to -net seems like will definetely cause merge issues for
Dave.

Ok, Cong - patch attached and tested. Let me know what you think.

cheers,
jamal


diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index b7fa969..b52deb4 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -239,8 +239,7 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, 
void *val, int len)
return ret;
 }
 
-/* called when adding new meta information
- * under ife->tcf_lock
+/* called to find new metadata ops. Possibly load it as a module.
 */
 static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
void *val, int len)
@@ -251,11 +250,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
if (!ops) {
ret = -ENOENT;
 #ifdef CONFIG_MODULES
-   spin_unlock_bh(>tcf_lock);
rtnl_unlock();
request_module("ifemeta%u", metaid);
rtnl_lock();
-   spin_lock_bh(>tcf_lock);
ops = find_ife_oplist(metaid);
 #endif
}
@@ -272,7 +269,6 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
 */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
int len)
@@ -302,7 +298,9 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
}
}
 
+   spin_lock_bh(>tcf_lock);
list_add_tail(>metalist, >metalist);
+   spin_unlock_bh(>tcf_lock);
 
return ret;
 }
@@ -357,7 +355,6 @@ out_nlmsg_trim:
return -1;
 }
 
-/* under ife->tcf_lock */
 static void _tcf_ife_cleanup(struct tc_action *a, int bind)
 {
struct tcf_ife_info *ife = a->priv;
@@ -385,7 +382,6 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind)
spin_unlock_bh(>tcf_lock);
 }
 
-/* under ife->tcf_lock */
 static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
 {
int len = 0;
@@ -465,6 +461,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
 
ife = to_ife(a);
+   if (exists)
+   spin_lock_bh(>tcf_lock);
ife->flags = parm->flags;
 
if (parm->flags & IFE_ENCODE) {
@@ -475,10 +473,9 @@ static int tcf_ife_init(struct net *net, struct nlattr 
*nla,
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
 
-   spin_lock_bh(>tcf_lock);
ife->tcf_action = parm->action;
-
if (parm->flags & IFE_ENCODE) {
+
if (daddr)
ether_addr_copy(ife->eth_dst, daddr);
else
@@ -492,6 +489,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
ife->eth_type = ife_type;
}
 
+   if (exists)
+   spin_unlock_bh(>tcf_lock);
+
if (ret == ACT_P_CREATED)
INIT_LIST_HEAD(>metalist);
 
@@ -505,7 +505,6 @@ metadata_parse_err:
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
 
@@ -524,13 +523,10 @@ metadata_parse_err:
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
}
 
-   spin_unlock_bh(>tcf_lock);
-
if (ret == ACT_P_CREATED)
tcf_hash_insert(tn, a);
 


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-17 Thread Cong Wang
On Fri, Jun 17, 2016 at 4:07 AM, Jamal Hadi Salim  wrote:
> On 16-06-17 01:38 AM, Cong Wang wrote:
>>
>> On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang 
>> wrote:
>>>
>>>
>>> I think we can just remove that tcf_lock, I am testing a patch now.
>>
>>
>> Please try the attached patch, I will do more tests tomorrow.
>>
>> Thanks!
>>
>
> Cong, What tree are you using? I dont see the time aggregation patches
> that I sent (and Dave took in) in your changes.

My patch is against -net. (I see you already figured out your patch is
missing in -net-next.)

Or are you suggesting to rebase it for -net-next? I think it fixes some real
bug so -net is better, although it is slightly large as a bug fix.

>
> Comments:
> Is GFP_ATOMIC really necessary? Thats user->kernel interface. GFP_KERNEL
> should be sufficient.

I added a read_lock(ife_mod_lock), this is why we need
GFP_ATOMIC.

Again, don't worry, this change should be in a separated patch,
you will not miss it again when I send them formally. ;)


> Also, it would be nice to kill the lock - but this feels like two
> patches in one. 1) to fix the alloc not to be under the lock 2) to
> kill said lock. Maybe split it as such for easier review.
> I am using this action extensively so will be happy to test.
> I think my patch is a good beginning to #1 - if you fix the forgotten
> unlock and ensure we lock around updating ife fields when it exists
> already (you said it in your earlier email and I thought about
> that afterwards).

Yes, it makes sense too. Your patch is smaller, if you plan to
backport it to stable, we can use your patch for -net  and -stable
and I am happy to rebase mine for -net-next.

I am fine with either way.

Thanks!


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-17 Thread Cong Wang
On Fri, Jun 17, 2016 at 4:07 AM, Jamal Hadi Salim  wrote:
> On 16-06-17 01:38 AM, Cong Wang wrote:
>>
>> On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang 
>> wrote:
>>>
>>>
>>> I think we can just remove that tcf_lock, I am testing a patch now.
>>
>>
>> Please try the attached patch, I will do more tests tomorrow.
>>
>> Thanks!
>>
>
> Cong, What tree are you using? I dont see the time aggregation patches
> that I sent (and Dave took in) in your changes.

My patch is against -net. (I see you already figured out your patch is
missing in -net-next.)

Or are you suggesting to rebase it for -net-next? I think it fixes some real
bug so -net is better, although it is slightly large as a bug fix.

>
> Comments:
> Is GFP_ATOMIC really necessary? Thats user->kernel interface. GFP_KERNEL
> should be sufficient.

I added a read_lock(ife_mod_lock), this is why we need
GFP_ATOMIC.

Again, don't worry, this change should be in a separated patch,
you will not miss it again when I send them formally. ;)


> Also, it would be nice to kill the lock - but this feels like two
> patches in one. 1) to fix the alloc not to be under the lock 2) to
> kill said lock. Maybe split it as such for easier review.
> I am using this action extensively so will be happy to test.
> I think my patch is a good beginning to #1 - if you fix the forgotten
> unlock and ensure we lock around updating ife fields when it exists
> already (you said it in your earlier email and I thought about
> that afterwards).

Yes, it makes sense too. Your patch is smaller, if you plan to
backport it to stable, we can use your patch for -net  and -stable
and I am happy to rebase mine for -net-next.

I am fine with either way.

Thanks!


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-17 Thread Cong Wang
On Fri, Jun 17, 2016 at 4:05 AM, Alexey Khoroshilov
 wrote:
> On 17.06.2016 08:38, Cong Wang wrote:
>> On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang  wrote:
>>>
>>> I think we can just remove that tcf_lock, I am testing a patch now.
>>
>> Please try the attached patch, I will do more tests tomorrow.
>>
>> Thanks!
>>
>
> Looks good with two notes:
> 1. add_metainfo() still contains
> ret = ops->alloc(mi, metaval);
> that allocates memory with GFP_KERNEL.
> So, I would add gfpflag argument to alloc() operation.

I thought about this too, but we just allocate 32+ bytes here,
not sure if it is really worth to pass a gfp flag.

>
> 2. It makes sense to mention ife_mod_lock in the comment before
> add_metainfo(), because ife_mod_lock is the reason to use GFP_ATOMIC there.

Don't worry, it is in a separated patch, I will explain this
in the changelog. (I sent a combined patch just for review/tests.)

Thanks!


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-17 Thread Cong Wang
On Fri, Jun 17, 2016 at 4:05 AM, Alexey Khoroshilov
 wrote:
> On 17.06.2016 08:38, Cong Wang wrote:
>> On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang  wrote:
>>>
>>> I think we can just remove that tcf_lock, I am testing a patch now.
>>
>> Please try the attached patch, I will do more tests tomorrow.
>>
>> Thanks!
>>
>
> Looks good with two notes:
> 1. add_metainfo() still contains
> ret = ops->alloc(mi, metaval);
> that allocates memory with GFP_KERNEL.
> So, I would add gfpflag argument to alloc() operation.

I thought about this too, but we just allocate 32+ bytes here,
not sure if it is really worth to pass a gfp flag.

>
> 2. It makes sense to mention ife_mod_lock in the comment before
> add_metainfo(), because ife_mod_lock is the reason to use GFP_ATOMIC there.

Don't worry, it is in a separated patch, I will explain this
in the changelog. (I sent a combined patch just for review/tests.)

Thanks!


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-17 Thread Alexey Khoroshilov
On 17.06.2016 08:38, Cong Wang wrote:
> On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang  wrote:
>>
>> I think we can just remove that tcf_lock, I am testing a patch now.
> 
> Please try the attached patch, I will do more tests tomorrow.
> 
> Thanks!
> 

Looks good with two notes:
1. add_metainfo() still contains
ret = ops->alloc(mi, metaval);
that allocates memory with GFP_KERNEL.
So, I would add gfpflag argument to alloc() operation.

2. It makes sense to mention ife_mod_lock in the comment before
add_metainfo(), because ife_mod_lock is the reason to use GFP_ATOMIC there.

--
Alexey


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-17 Thread Jamal Hadi Salim

On 16-06-17 01:38 AM, Cong Wang wrote:

On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang  wrote:


I think we can just remove that tcf_lock, I am testing a patch now.


Please try the attached patch, I will do more tests tomorrow.

Thanks!



Cong, What tree are you using? I dont see the time aggregation patches
that I sent (and Dave took in) in your changes.

Comments:
Is GFP_ATOMIC really necessary? Thats user->kernel interface. GFP_KERNEL 
should be sufficient.

Also, it would be nice to kill the lock - but this feels like two
patches in one. 1) to fix the alloc not to be under the lock 2) to
kill said lock. Maybe split it as such for easier review.
I am using this action extensively so will be happy to test.
I think my patch is a good beginning to #1 - if you fix the forgotten
unlock and ensure we lock around updating ife fields when it exists
already (you said it in your earlier email and I thought about
that afterwards).

cheers,
jamal



Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-17 Thread Alexey Khoroshilov
On 17.06.2016 08:38, Cong Wang wrote:
> On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang  wrote:
>>
>> I think we can just remove that tcf_lock, I am testing a patch now.
> 
> Please try the attached patch, I will do more tests tomorrow.
> 
> Thanks!
> 

Looks good with two notes:
1. add_metainfo() still contains
ret = ops->alloc(mi, metaval);
that allocates memory with GFP_KERNEL.
So, I would add gfpflag argument to alloc() operation.

2. It makes sense to mention ife_mod_lock in the comment before
add_metainfo(), because ife_mod_lock is the reason to use GFP_ATOMIC there.

--
Alexey


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-17 Thread Jamal Hadi Salim

On 16-06-17 01:38 AM, Cong Wang wrote:

On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang  wrote:


I think we can just remove that tcf_lock, I am testing a patch now.


Please try the attached patch, I will do more tests tomorrow.

Thanks!



Cong, What tree are you using? I dont see the time aggregation patches
that I sent (and Dave took in) in your changes.

Comments:
Is GFP_ATOMIC really necessary? Thats user->kernel interface. GFP_KERNEL 
should be sufficient.

Also, it would be nice to kill the lock - but this feels like two
patches in one. 1) to fix the alloc not to be under the lock 2) to
kill said lock. Maybe split it as such for easier review.
I am using this action extensively so will be happy to test.
I think my patch is a good beginning to #1 - if you fix the forgotten
unlock and ensure we lock around updating ife fields when it exists
already (you said it in your earlier email and I thought about
that afterwards).

cheers,
jamal



Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-16 Thread Cong Wang
On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang  wrote:
>
> I think we can just remove that tcf_lock, I am testing a patch now.

Please try the attached patch, I will do more tests tomorrow.

Thanks!
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 658046d..859fb02 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -240,8 +240,7 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, 
void *val, int len)
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
-*/
+ */
 static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
void *val, int len)
 {
@@ -251,11 +250,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
if (!ops) {
ret = -ENOENT;
 #ifdef CONFIG_MODULES
-   spin_unlock_bh(>tcf_lock);
rtnl_unlock();
request_module("ifemeta%u", metaid);
rtnl_lock();
-   spin_lock_bh(>tcf_lock);
ops = find_ife_oplist(metaid);
 #endif
}
@@ -272,8 +269,8 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
-*/
+ * under RTNL lock
+ */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
int len)
 {
@@ -284,7 +281,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
if (!ops)
return -ENOENT;
 
-   mi = kzalloc(sizeof(*mi), GFP_KERNEL);
+   mi = kzalloc(sizeof(*mi), GFP_ATOMIC);
if (!mi) {
/*put back what find_ife_oplist took */
module_put(ops->owner);
@@ -302,8 +299,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
}
}
 
-   list_add_tail(>metalist, >metalist);
-
+   list_add_tail_rcu(>metalist, >metalist);
return ret;
 }
 
@@ -313,11 +309,13 @@ static int use_all_metadata(struct tcf_ife_info *ife)
int rc = 0;
int installed = 0;
 
+   read_lock(_mod_lock);
list_for_each_entry(o, , list) {
rc = add_metainfo(ife, o->metaid, NULL, 0);
if (rc == 0)
installed += 1;
}
+   read_unlock(_mod_lock);
 
if (installed)
return 0;
@@ -340,10 +338,12 @@ static int dump_metalist(struct sk_buff *skb, struct 
tcf_ife_info *ife)
if (!nest)
goto out_nlmsg_trim;
 
-   list_for_each_entry(e, >metalist, metalist) {
+   rcu_read_lock();
+   list_for_each_entry_rcu(e, >metalist, metalist) {
if (!e->ops->get(skb, e))
total_encoded += 1;
}
+   rcu_read_unlock();
 
if (!total_encoded)
goto out_nlmsg_trim;
@@ -357,15 +357,14 @@ out_nlmsg_trim:
return -1;
 }
 
-/* under ife->tcf_lock */
-static void _tcf_ife_cleanup(struct tc_action *a, int bind)
+static void tcf_ife_cleanup(struct tc_action *a, int bind)
 {
struct tcf_ife_info *ife = a->priv;
struct tcf_meta_info *e, *n;
 
list_for_each_entry_safe(e, n, >metalist, metalist) {
module_put(e->ops->owner);
-   list_del(>metalist);
+   list_del_rcu(>metalist);
if (e->metaval) {
if (e->ops->release)
e->ops->release(e);
@@ -376,16 +375,6 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
}
 }
 
-static void tcf_ife_cleanup(struct tc_action *a, int bind)
-{
-   struct tcf_ife_info *ife = a->priv;
-
-   spin_lock_bh(>tcf_lock);
-   _tcf_ife_cleanup(a, bind);
-   spin_unlock_bh(>tcf_lock);
-}
-
-/* under ife->tcf_lock */
 static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
 {
int len = 0;
@@ -474,7 +463,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
 
-   spin_lock_bh(>tcf_lock);
ife->tcf_action = parm->action;
 
if (parm->flags & IFE_ENCODE) {
@@ -502,9 +490,8 @@ metadata_parse_err:
if (exists)
tcf_hash_release(a, bind);
if (ret == ACT_P_CREATED)
-   _tcf_ife_cleanup(a, bind);
+   tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
 
@@ -521,15 +508,12 @@ metadata_parse_err:
err = use_all_metadata(ife);
if (err) {
if (ret == ACT_P_CREATED)
-   _tcf_ife_cleanup(a, bind);
+   tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);

Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-16 Thread Cong Wang
On Thu, Jun 16, 2016 at 7:14 PM, Cong Wang  wrote:
>
> I think we can just remove that tcf_lock, I am testing a patch now.

Please try the attached patch, I will do more tests tomorrow.

Thanks!
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 658046d..859fb02 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -240,8 +240,7 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, 
void *val, int len)
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
-*/
+ */
 static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
void *val, int len)
 {
@@ -251,11 +250,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
if (!ops) {
ret = -ENOENT;
 #ifdef CONFIG_MODULES
-   spin_unlock_bh(>tcf_lock);
rtnl_unlock();
request_module("ifemeta%u", metaid);
rtnl_lock();
-   spin_lock_bh(>tcf_lock);
ops = find_ife_oplist(metaid);
 #endif
}
@@ -272,8 +269,8 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
-*/
+ * under RTNL lock
+ */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
int len)
 {
@@ -284,7 +281,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
if (!ops)
return -ENOENT;
 
-   mi = kzalloc(sizeof(*mi), GFP_KERNEL);
+   mi = kzalloc(sizeof(*mi), GFP_ATOMIC);
if (!mi) {
/*put back what find_ife_oplist took */
module_put(ops->owner);
@@ -302,8 +299,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
}
}
 
-   list_add_tail(>metalist, >metalist);
-
+   list_add_tail_rcu(>metalist, >metalist);
return ret;
 }
 
@@ -313,11 +309,13 @@ static int use_all_metadata(struct tcf_ife_info *ife)
int rc = 0;
int installed = 0;
 
+   read_lock(_mod_lock);
list_for_each_entry(o, , list) {
rc = add_metainfo(ife, o->metaid, NULL, 0);
if (rc == 0)
installed += 1;
}
+   read_unlock(_mod_lock);
 
if (installed)
return 0;
@@ -340,10 +338,12 @@ static int dump_metalist(struct sk_buff *skb, struct 
tcf_ife_info *ife)
if (!nest)
goto out_nlmsg_trim;
 
-   list_for_each_entry(e, >metalist, metalist) {
+   rcu_read_lock();
+   list_for_each_entry_rcu(e, >metalist, metalist) {
if (!e->ops->get(skb, e))
total_encoded += 1;
}
+   rcu_read_unlock();
 
if (!total_encoded)
goto out_nlmsg_trim;
@@ -357,15 +357,14 @@ out_nlmsg_trim:
return -1;
 }
 
-/* under ife->tcf_lock */
-static void _tcf_ife_cleanup(struct tc_action *a, int bind)
+static void tcf_ife_cleanup(struct tc_action *a, int bind)
 {
struct tcf_ife_info *ife = a->priv;
struct tcf_meta_info *e, *n;
 
list_for_each_entry_safe(e, n, >metalist, metalist) {
module_put(e->ops->owner);
-   list_del(>metalist);
+   list_del_rcu(>metalist);
if (e->metaval) {
if (e->ops->release)
e->ops->release(e);
@@ -376,16 +375,6 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
}
 }
 
-static void tcf_ife_cleanup(struct tc_action *a, int bind)
-{
-   struct tcf_ife_info *ife = a->priv;
-
-   spin_lock_bh(>tcf_lock);
-   _tcf_ife_cleanup(a, bind);
-   spin_unlock_bh(>tcf_lock);
-}
-
-/* under ife->tcf_lock */
 static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
 {
int len = 0;
@@ -474,7 +463,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
 
-   spin_lock_bh(>tcf_lock);
ife->tcf_action = parm->action;
 
if (parm->flags & IFE_ENCODE) {
@@ -502,9 +490,8 @@ metadata_parse_err:
if (exists)
tcf_hash_release(a, bind);
if (ret == ACT_P_CREATED)
-   _tcf_ife_cleanup(a, bind);
+   tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
 
@@ -521,15 +508,12 @@ metadata_parse_err:
err = use_all_metadata(ife);
if (err) {
if (ret == ACT_P_CREATED)
-   _tcf_ife_cleanup(a, bind);
+   tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
 

Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-16 Thread Cong Wang
On Thu, Jun 16, 2016 at 5:38 PM, Jamal Hadi Salim  wrote:
> On 16-06-16 05:43 PM, Cong Wang wrote:
>>
>> On Thu, Jun 16, 2016 at 1:50 PM, Alexey Khoroshilov
>>  wrote:
>>>
>>> tcf_ife_init() contains a big chunk of code executed with
>>> ife->tcf_lock spinlock held. But that code contains several calls
>>> to sleeping functions:
>>>populate_metalist() and use_all_metadata()
>>>  -> add_metainfo()
>>>-> find_ife_oplist(metaid)
>>>  -> read_lock()
>>>  -> try_module_get(o->owner)
>>>-> kzalloc(sizeof(*mi), GFP_KERNEL);
>>
>>
>> Hmm, we don't need to hold that spinlock when we create a new ife action,
>> since we haven't inserted it yet. We do need it when we modify an existing
>> one. So I am thinking if we can refactor that code to avoid spinlock
>> whenever possible.
>>
>
> Does attached (compile tested) patch help?

You at least miss the unlock in load_metaops_and_vet()?

I think we can just remove that tcf_lock, I am testing a patch now.


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-16 Thread Cong Wang
On Thu, Jun 16, 2016 at 5:38 PM, Jamal Hadi Salim  wrote:
> On 16-06-16 05:43 PM, Cong Wang wrote:
>>
>> On Thu, Jun 16, 2016 at 1:50 PM, Alexey Khoroshilov
>>  wrote:
>>>
>>> tcf_ife_init() contains a big chunk of code executed with
>>> ife->tcf_lock spinlock held. But that code contains several calls
>>> to sleeping functions:
>>>populate_metalist() and use_all_metadata()
>>>  -> add_metainfo()
>>>-> find_ife_oplist(metaid)
>>>  -> read_lock()
>>>  -> try_module_get(o->owner)
>>>-> kzalloc(sizeof(*mi), GFP_KERNEL);
>>
>>
>> Hmm, we don't need to hold that spinlock when we create a new ife action,
>> since we haven't inserted it yet. We do need it when we modify an existing
>> one. So I am thinking if we can refactor that code to avoid spinlock
>> whenever possible.
>>
>
> Does attached (compile tested) patch help?

You at least miss the unlock in load_metaops_and_vet()?

I think we can just remove that tcf_lock, I am testing a patch now.


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-16 Thread Jamal Hadi Salim

On 16-06-16 05:43 PM, Cong Wang wrote:

On Thu, Jun 16, 2016 at 1:50 PM, Alexey Khoroshilov
 wrote:

tcf_ife_init() contains a big chunk of code executed with
ife->tcf_lock spinlock held. But that code contains several calls
to sleeping functions:
   populate_metalist() and use_all_metadata()
 -> add_metainfo()
   -> find_ife_oplist(metaid)
 -> read_lock()
 -> try_module_get(o->owner)
   -> kzalloc(sizeof(*mi), GFP_KERNEL);


Hmm, we don't need to hold that spinlock when we create a new ife action,
since we haven't inserted it yet. We do need it when we modify an existing
one. So I am thinking if we can refactor that code to avoid spinlock
whenever possible.



Does attached (compile tested) patch help?


   -> ops->alloc(mi, metaval);
   -> module_put(ops->owner);
   _tcf_ife_cleanup()
 -> module_put()

The same problem is actual for tcf_ife_cleanup() as well.



Huh? Both module_put() and kfree() should not sleep, right?



I dont think there is any sleeping there ;->

cheers,
jamal
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 6bbc518..e341bef 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -302,7 +302,9 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
}
}
 
+   spin_lock_bh(>tcf_lock);
list_add_tail(>metalist, >metalist);
+   spin_unlock_bh(>tcf_lock);
 
return ret;
 }
@@ -474,7 +476,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
 
-   spin_lock_bh(>tcf_lock);
ife->tcf_action = parm->action;
 
if (parm->flags & IFE_ENCODE) {
@@ -504,7 +505,6 @@ metadata_parse_err:
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
 
@@ -523,13 +523,10 @@ metadata_parse_err:
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
}
 
-   spin_unlock_bh(>tcf_lock);
-
if (ret == ACT_P_CREATED)
tcf_hash_insert(tn, a);
 


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-16 Thread Jamal Hadi Salim

On 16-06-16 05:43 PM, Cong Wang wrote:

On Thu, Jun 16, 2016 at 1:50 PM, Alexey Khoroshilov
 wrote:

tcf_ife_init() contains a big chunk of code executed with
ife->tcf_lock spinlock held. But that code contains several calls
to sleeping functions:
   populate_metalist() and use_all_metadata()
 -> add_metainfo()
   -> find_ife_oplist(metaid)
 -> read_lock()
 -> try_module_get(o->owner)
   -> kzalloc(sizeof(*mi), GFP_KERNEL);


Hmm, we don't need to hold that spinlock when we create a new ife action,
since we haven't inserted it yet. We do need it when we modify an existing
one. So I am thinking if we can refactor that code to avoid spinlock
whenever possible.



Does attached (compile tested) patch help?


   -> ops->alloc(mi, metaval);
   -> module_put(ops->owner);
   _tcf_ife_cleanup()
 -> module_put()

The same problem is actual for tcf_ife_cleanup() as well.



Huh? Both module_put() and kfree() should not sleep, right?



I dont think there is any sleeping there ;->

cheers,
jamal
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 6bbc518..e341bef 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -302,7 +302,9 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
}
}
 
+   spin_lock_bh(>tcf_lock);
list_add_tail(>metalist, >metalist);
+   spin_unlock_bh(>tcf_lock);
 
return ret;
 }
@@ -474,7 +476,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
 
-   spin_lock_bh(>tcf_lock);
ife->tcf_action = parm->action;
 
if (parm->flags & IFE_ENCODE) {
@@ -504,7 +505,6 @@ metadata_parse_err:
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
 
@@ -523,13 +523,10 @@ metadata_parse_err:
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(a, bind);
 
-   spin_unlock_bh(>tcf_lock);
return err;
}
}
 
-   spin_unlock_bh(>tcf_lock);
-
if (ret == ACT_P_CREATED)
tcf_hash_insert(tn, a);
 


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-16 Thread Cong Wang
On Thu, Jun 16, 2016 at 1:50 PM, Alexey Khoroshilov
 wrote:
> tcf_ife_init() contains a big chunk of code executed with
> ife->tcf_lock spinlock held. But that code contains several calls
> to sleeping functions:
>   populate_metalist() and use_all_metadata()
> -> add_metainfo()
>   -> find_ife_oplist(metaid)
> -> read_lock()
> -> try_module_get(o->owner)
>   -> kzalloc(sizeof(*mi), GFP_KERNEL);

Hmm, we don't need to hold that spinlock when we create a new ife action,
since we haven't inserted it yet. We do need it when we modify an existing
one. So I am thinking if we can refactor that code to avoid spinlock
whenever possible.

>   -> ops->alloc(mi, metaval);
>   -> module_put(ops->owner);
>   _tcf_ife_cleanup()
> -> module_put()
>
> The same problem is actual for tcf_ife_cleanup() as well.
>

Huh? Both module_put() and kfree() should not sleep, right?


Re: [BUG] act_ife: sleeping functions called in atomic context

2016-06-16 Thread Cong Wang
On Thu, Jun 16, 2016 at 1:50 PM, Alexey Khoroshilov
 wrote:
> tcf_ife_init() contains a big chunk of code executed with
> ife->tcf_lock spinlock held. But that code contains several calls
> to sleeping functions:
>   populate_metalist() and use_all_metadata()
> -> add_metainfo()
>   -> find_ife_oplist(metaid)
> -> read_lock()
> -> try_module_get(o->owner)
>   -> kzalloc(sizeof(*mi), GFP_KERNEL);

Hmm, we don't need to hold that spinlock when we create a new ife action,
since we haven't inserted it yet. We do need it when we modify an existing
one. So I am thinking if we can refactor that code to avoid spinlock
whenever possible.

>   -> ops->alloc(mi, metaval);
>   -> module_put(ops->owner);
>   _tcf_ife_cleanup()
> -> module_put()
>
> The same problem is actual for tcf_ife_cleanup() as well.
>

Huh? Both module_put() and kfree() should not sleep, right?