Re: [PATCH v3 net-next] fix unsafe set_memory_rw from softirq

2013-10-04 Thread Heiko Carstens
On Thu, Oct 03, 2013 at 07:24:06PM -0700, Alexei Starovoitov wrote:
 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
 index 7092392..a5df511 100644
 --- a/arch/s390/net/bpf_jit_comp.c
 +++ b/arch/s390/net/bpf_jit_comp.c
 @@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp)
   struct bpf_binary_header *header = (void *)addr;
 
   if (fp-bpf_func == sk_run_filter)
 - return;
 + goto free_filter;
   set_memory_rw(addr, header-pages);
   module_free(NULL, header);
 +free_filter:
 + kfree(fp);
  }

For the s390 part:

Acked-by: Heiko Carstens heiko.carst...@de.ibm.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v3 net-next] fix unsafe set_memory_rw from softirq

2013-10-04 Thread Ingo Molnar

* Alexei Starovoitov a...@plumgrid.com wrote:

 on x86 system with net.core.bpf_jit_enable = 1
 
 sudo tcpdump -i eth1 'tcp port 22'
 
 causes the warning:
 [   56.766097]  Possible unsafe locking scenario:
 [   56.766097]
 [   56.780146]CPU0
 [   56.786807]
 [   56.793188]   lock((vb-lock)-rlock);
 [   56.799593]   Interrupt
 [   56.805889] lock((vb-lock)-rlock);
 [   56.812266]
 [   56.812266]  *** DEADLOCK ***
 [   56.812266]
 [   56.830670] 1 lock held by ksoftirqd/1/13:
 [   56.836838]  #0:  (rcu_read_lock){.+.+..}, at: [8118f44c] 
 vm_unmap_aliases+0x8c/0x380
 [   56.849757]
 [   56.849757] stack backtrace:
 [   56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45
 [   56.868721] Hardware name: System manufacturer System Product Name/P8Z77 
 WS, BIOS 3007 07/26/2012
 [   56.882004]  821944c0 88080bbdb8c8 8175a145 
 0007
 [   56.895630]  88080bbd5f40 88080bbdb928 81755b14 
 0001
 [   56.909313]  88080001 8808 8101178f 
 0001
 [   56.923006] Call Trace:
 [   56.929532]  [8175a145] dump_stack+0x55/0x76
 [   56.936067]  [81755b14] print_usage_bug+0x1f7/0x208
 [   56.942445]  [8101178f] ? save_stack_trace+0x2f/0x50
 [   56.948932]  [810cc0a0] ? check_usage_backwards+0x150/0x150
 [   56.955470]  [810ccb52] mark_lock+0x282/0x2c0
 [   56.961945]  [810ccfed] __lock_acquire+0x45d/0x1d50
 [   56.968474]  [810cce6e] ? __lock_acquire+0x2de/0x1d50
 [   56.975140]  [81393bf5] ? cpumask_next_and+0x55/0x90
 [   56.981942]  [810cef72] lock_acquire+0x92/0x1d0
 [   56.988745]  [8118f52a] ? vm_unmap_aliases+0x16a/0x380
 [   56.995619]  [817628f1] _raw_spin_lock+0x41/0x50
 [   57.002493]  [8118f52a] ? vm_unmap_aliases+0x16a/0x380
 [   57.009447]  [8118f52a] vm_unmap_aliases+0x16a/0x380
 [   57.016477]  [8118f44c] ? vm_unmap_aliases+0x8c/0x380
 [   57.023607]  [810436b0] change_page_attr_set_clr+0xc0/0x460
 [   57.030818]  [810cfb8d] ? trace_hardirqs_on+0xd/0x10
 [   57.037896]  [811a8330] ? kmem_cache_free+0xb0/0x2b0
 [   57.044789]  [811b59c3] ? free_object_rcu+0x93/0xa0
 [   57.051720]  [81043d9f] set_memory_rw+0x2f/0x40
 [   57.058727]  [8104e17c] bpf_jit_free+0x2c/0x40
 [   57.065577]  [81642cba] sk_filter_release_rcu+0x1a/0x30
 [   57.072338]  [811108e2] rcu_process_callbacks+0x202/0x7c0
 [   57.078962]  [81057f17] __do_softirq+0xf7/0x3f0
 [   57.085373]  [81058245] run_ksoftirqd+0x35/0x70
 
 cannot reuse jited filter memory, since it's readonly,
 so use original bpf insns memory to hold work_struct
 
 defer kfree of sk_filter until jit completed freeing
 
 tested on x86_64 and i386
 
 Signed-off-by: Alexei Starovoitov a...@plumgrid.com
 ---
  arch/arm/net/bpf_jit_32.c   |1 +
  arch/powerpc/net/bpf_jit_comp.c |1 +
  arch/s390/net/bpf_jit_comp.c|4 +++-
  arch/sparc/net/bpf_jit_comp.c   |1 +
  arch/x86/net/bpf_jit_comp.c |   20 +++-
  include/linux/filter.h  |   11 +--
  net/core/filter.c   |   11 +++
  7 files changed, 37 insertions(+), 12 deletions(-)
 
 diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
 index f50d223..99b44e0 100644
 --- a/arch/arm/net/bpf_jit_32.c
 +++ b/arch/arm/net/bpf_jit_32.c
 @@ -930,4 +930,5 @@ void bpf_jit_free(struct sk_filter *fp)
  {
   if (fp-bpf_func != sk_run_filter)
   module_free(NULL, fp-bpf_func);
 + kfree(fp);
  }
 diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
 index bf56e33..2345bdb 100644
 --- a/arch/powerpc/net/bpf_jit_comp.c
 +++ b/arch/powerpc/net/bpf_jit_comp.c
 @@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp)
  {
   if (fp-bpf_func != sk_run_filter)
   module_free(NULL, fp-bpf_func);
 + kfree(fp);
  }
 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
 index 7092392..a5df511 100644
 --- a/arch/s390/net/bpf_jit_comp.c
 +++ b/arch/s390/net/bpf_jit_comp.c
 @@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp)
   struct bpf_binary_header *header = (void *)addr;
  
   if (fp-bpf_func == sk_run_filter)
 - return;
 + goto free_filter;
   set_memory_rw(addr, header-pages);
   module_free(NULL, header);
 +free_filter:
 + kfree(fp);
  }
 diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
 index 9c7be59..218b6b2 100644
 --- a/arch/sparc/net/bpf_jit_comp.c
 +++ b/arch/sparc/net/bpf_jit_comp.c
 @@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp)
  {
   if (fp-bpf_func != sk_run_filter)
   module_free(NULL, fp-bpf_func);
 + kfree(fp);
  }
 diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
 index 79c216a..1396a0a 100644
 --- 

Re: [PATCH v3 net-next] fix unsafe set_memory_rw from softirq

2013-10-04 Thread Ingo Molnar

* Eric Dumazet eduma...@google.com wrote:

 1)
 
  I took a brief look at arch/x86/net/bpf_jit_comp.c while reviewing this
  patch.
 
  You need to split up bpf_jit_compile(), it's an obscenely large, ~600
  lines long function. We don't do that in modern, maintainable kernel code.
 
  2)
 
  This 128 bytes extra padding:
 
  /* Most of BPF filters are really small,
   * but if some of them fill a page, allow at least
   * 128 extra bytes to insert a random section of int3
   */
  sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE);
 
  why is it done? It's not clear to me from the comment.
 
 
 commit 314beb9bcabfd6b4542ccbced2402af2c6f6142a
 Author: Eric Dumazet eduma...@google.com
 Date:   Fri May 17 16:37:03 2013 +
 
 x86: bpf_jit_comp: secure bpf jit against spraying attacks
 
 hpa bringed into my attention some security related issues
 with BPF JIT on x86.
 
 This patch makes sure the bpf generated code is marked read only,
 as other kernel text sections.
 
 It also splits the unused space (we vmalloc() and only use a fraction of
 the page) in two parts, so that the generated bpf code not starts at a
 known offset in the page, but a pseudo random one.

Thanks for the explanation - that makes sense.

Ingo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v3 net-next] fix unsafe set_memory_rw from softirq

2013-10-04 Thread Ingo Molnar

* Alexei Starovoitov a...@plumgrid.com wrote:

   #else
  +#include linux/slab.h
 
  Inlines in the middle of header files are generally frowned upon.
 
  The standard pattern is to put them at the top, that way it's easier to
  see the dependencies and there's also less .config dependent inclusion,
  which makes header hell cleanup work easier.
 
 Agree. I only followed the style that is already in filter.h 20 lines above.

 #ifdef CONFIG_BPF_JIT
 #include stdarg.h
 #include linux/linkage.h
 #include linux/printk.h
 
 as part of the cleanup can move all of them to the top. In the separate 
 commit?

Yeah, sure, that's fine.

struct sk_filter *fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog-len;
  + unsigned int sk_fsize = max_t(u32, fsize, sizeof(struct work_struct))
  + + sizeof(*fp);
 
  Using the structure definition I suggested, this could be replaced with
  the more obvious:
 
  unsigned int sk_fsize = max(fsize, sizeof(*fp));
 
 with helper function it's even cleaner. Fixed in V4

So my thought was that the helper function is perhaps too trivial and 
somewhat obscures the allocation pattern, but yeah. Either way is fine 
with me.

  A couple of questions/suggestions:
 
  1)
 
  I took a brief look at arch/x86/net/bpf_jit_comp.c while reviewing this
  patch.
 
  You need to split up bpf_jit_compile(), it's an obscenely large, ~600
  lines long function. We don't do that in modern, maintainable kernel code.
 
 I had the same thought, therefore in my proposed generalization of bpf:
 http://patchwork.ozlabs.org/patch/279280/
 It is split into two. do_jit() is still a bit large at 400 lines. Can
 split it further.

Yeah, I think as long as you split out the loop iterator into a separate 
function it gets much better.

The actual instruction generation code within the iterator looks good in a 
single chunk - splitting it up further than that might in fact make it 
less readable.

  3)
 
  It's nice code altogether! Are there any plans to generalize its 
  interfaces, to allow arbitrary bytecode to be used by other kernel 
  subsystems as well? In particular tracing filters could make use of 
  it, but it would also allow safe probe points.
 
 That was exactly the reasons to generalize bpf as I proposed.

Ok, cool :-)

For the x86 bits:

Acked-by: Ingo Molnar mi...@kernel.org

Thanks,

Ingo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v3 net-next] fix unsafe set_memory_rw from softirq

2013-10-04 Thread Alexei Starovoitov
On Thu, Oct 3, 2013 at 10:16 PM, Eric Dumazet eric.duma...@gmail.com wrote:
 On Thu, 2013-10-03 at 21:11 -0700, Alexei Starovoitov wrote:

 -static inline unsigned int sk_filter_len(const struct sk_filter *fp)
 +static inline unsigned int sk_filter_size(const struct sk_filter *fp,
 + unsigned int proglen)
  {
 -   return fp-len * sizeof(struct sock_filter) + sizeof(*fp);
 +   return max(sizeof(*fp),
 +  offsetof(struct sk_filter, insns[proglen]));
  }

indeed that's cleaner.
Like this then:
-static inline unsigned int sk_filter_len(const struct sk_filter *fp)
+static inline unsigned int sk_filter_size(unsigned int proglen)
 {
-   return fp-len * sizeof(struct sock_filter) + sizeof(*fp);
+   return max(sizeof(struct sk_filter),
+  offsetof(struct sk_filter, insns[proglen]));
 }

testing it... will send v4 shortly
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v3 net-next] fix unsafe set_memory_rw from softirq

2013-10-04 Thread Alexei Starovoitov
on x86 system with net.core.bpf_jit_enable = 1

sudo tcpdump -i eth1 'tcp port 22'

causes the warning:
[   56.766097]  Possible unsafe locking scenario:
[   56.766097]
[   56.780146]CPU0
[   56.786807]
[   56.793188]   lock((vb-lock)-rlock);
[   56.799593]   Interrupt
[   56.805889] lock((vb-lock)-rlock);
[   56.812266]
[   56.812266]  *** DEADLOCK ***
[   56.812266]
[   56.830670] 1 lock held by ksoftirqd/1/13:
[   56.836838]  #0:  (rcu_read_lock){.+.+..}, at: [8118f44c] 
vm_unmap_aliases+0x8c/0x380
[   56.849757]
[   56.849757] stack backtrace:
[   56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45
[   56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, 
BIOS 3007 07/26/2012
[   56.882004]  821944c0 88080bbdb8c8 8175a145 
0007
[   56.895630]  88080bbd5f40 88080bbdb928 81755b14 
0001
[   56.909313]  88080001 8808 8101178f 
0001
[   56.923006] Call Trace:
[   56.929532]  [8175a145] dump_stack+0x55/0x76
[   56.936067]  [81755b14] print_usage_bug+0x1f7/0x208
[   56.942445]  [8101178f] ? save_stack_trace+0x2f/0x50
[   56.948932]  [810cc0a0] ? check_usage_backwards+0x150/0x150
[   56.955470]  [810ccb52] mark_lock+0x282/0x2c0
[   56.961945]  [810ccfed] __lock_acquire+0x45d/0x1d50
[   56.968474]  [810cce6e] ? __lock_acquire+0x2de/0x1d50
[   56.975140]  [81393bf5] ? cpumask_next_and+0x55/0x90
[   56.981942]  [810cef72] lock_acquire+0x92/0x1d0
[   56.988745]  [8118f52a] ? vm_unmap_aliases+0x16a/0x380
[   56.995619]  [817628f1] _raw_spin_lock+0x41/0x50
[   57.002493]  [8118f52a] ? vm_unmap_aliases+0x16a/0x380
[   57.009447]  [8118f52a] vm_unmap_aliases+0x16a/0x380
[   57.016477]  [8118f44c] ? vm_unmap_aliases+0x8c/0x380
[   57.023607]  [810436b0] change_page_attr_set_clr+0xc0/0x460
[   57.030818]  [810cfb8d] ? trace_hardirqs_on+0xd/0x10
[   57.037896]  [811a8330] ? kmem_cache_free+0xb0/0x2b0
[   57.044789]  [811b59c3] ? free_object_rcu+0x93/0xa0
[   57.051720]  [81043d9f] set_memory_rw+0x2f/0x40
[   57.058727]  [8104e17c] bpf_jit_free+0x2c/0x40
[   57.065577]  [81642cba] sk_filter_release_rcu+0x1a/0x30
[   57.072338]  [811108e2] rcu_process_callbacks+0x202/0x7c0
[   57.078962]  [81057f17] __do_softirq+0xf7/0x3f0
[   57.085373]  [81058245] run_ksoftirqd+0x35/0x70

cannot reuse jited filter memory, since it's readonly,
so use original bpf insns memory to hold work_struct

defer kfree of sk_filter until jit completed freeing

tested on x86_64 and i386

Signed-off-by: Alexei Starovoitov a...@plumgrid.com
---
 arch/arm/net/bpf_jit_32.c   |1 +
 arch/powerpc/net/bpf_jit_comp.c |1 +
 arch/s390/net/bpf_jit_comp.c|4 +++-
 arch/sparc/net/bpf_jit_comp.c   |1 +
 arch/x86/net/bpf_jit_comp.c |   20 +++-
 include/linux/filter.h  |   11 +--
 net/core/filter.c   |   11 +++
 7 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index f50d223..99b44e0 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -930,4 +930,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
if (fp-bpf_func != sk_run_filter)
module_free(NULL, fp-bpf_func);
+   kfree(fp);
 }
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index bf56e33..2345bdb 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
if (fp-bpf_func != sk_run_filter)
module_free(NULL, fp-bpf_func);
+   kfree(fp);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 7092392..a5df511 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp)
struct bpf_binary_header *header = (void *)addr;
 
if (fp-bpf_func == sk_run_filter)
-   return;
+   goto free_filter;
set_memory_rw(addr, header-pages);
module_free(NULL, header);
+free_filter:
+   kfree(fp);
 }
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 9c7be59..218b6b2 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
if (fp-bpf_func != sk_run_filter)
module_free(NULL, fp-bpf_func);
+   kfree(fp);
 }
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 79c216a..1396a0a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -772,13 +772,23 @@ out:
return;
 }
 
+static void 

[PATCH v3 net-next] fix unsafe set_memory_rw from softirq

2013-10-04 Thread Alexei Starovoitov
on x86 system with net.core.bpf_jit_enable = 1

sudo tcpdump -i eth1 'tcp port 22'

causes the warning:
[   56.766097]  Possible unsafe locking scenario:
[   56.766097]
[   56.780146]CPU0
[   56.786807]
[   56.793188]   lock((vb-lock)-rlock);
[   56.799593]   Interrupt
[   56.805889] lock((vb-lock)-rlock);
[   56.812266]
[   56.812266]  *** DEADLOCK ***
[   56.812266]
[   56.830670] 1 lock held by ksoftirqd/1/13:
[   56.836838]  #0:  (rcu_read_lock){.+.+..}, at: [8118f44c] 
vm_unmap_aliases+0x8c/0x380
[   56.849757]
[   56.849757] stack backtrace:
[   56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45
[   56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, 
BIOS 3007 07/26/2012
[   56.882004]  821944c0 88080bbdb8c8 8175a145 
0007
[   56.895630]  88080bbd5f40 88080bbdb928 81755b14 
0001
[   56.909313]  88080001 8808 8101178f 
0001
[   56.923006] Call Trace:
[   56.929532]  [8175a145] dump_stack+0x55/0x76
[   56.936067]  [81755b14] print_usage_bug+0x1f7/0x208
[   56.942445]  [8101178f] ? save_stack_trace+0x2f/0x50
[   56.948932]  [810cc0a0] ? check_usage_backwards+0x150/0x150
[   56.955470]  [810ccb52] mark_lock+0x282/0x2c0
[   56.961945]  [810ccfed] __lock_acquire+0x45d/0x1d50
[   56.968474]  [810cce6e] ? __lock_acquire+0x2de/0x1d50
[   56.975140]  [81393bf5] ? cpumask_next_and+0x55/0x90
[   56.981942]  [810cef72] lock_acquire+0x92/0x1d0
[   56.988745]  [8118f52a] ? vm_unmap_aliases+0x16a/0x380
[   56.995619]  [817628f1] _raw_spin_lock+0x41/0x50
[   57.002493]  [8118f52a] ? vm_unmap_aliases+0x16a/0x380
[   57.009447]  [8118f52a] vm_unmap_aliases+0x16a/0x380
[   57.016477]  [8118f44c] ? vm_unmap_aliases+0x8c/0x380
[   57.023607]  [810436b0] change_page_attr_set_clr+0xc0/0x460
[   57.030818]  [810cfb8d] ? trace_hardirqs_on+0xd/0x10
[   57.037896]  [811a8330] ? kmem_cache_free+0xb0/0x2b0
[   57.044789]  [811b59c3] ? free_object_rcu+0x93/0xa0
[   57.051720]  [81043d9f] set_memory_rw+0x2f/0x40
[   57.058727]  [8104e17c] bpf_jit_free+0x2c/0x40
[   57.065577]  [81642cba] sk_filter_release_rcu+0x1a/0x30
[   57.072338]  [811108e2] rcu_process_callbacks+0x202/0x7c0
[   57.078962]  [81057f17] __do_softirq+0xf7/0x3f0
[   57.085373]  [81058245] run_ksoftirqd+0x35/0x70

cannot reuse jited filter memory, since it's readonly,
so use original bpf insns memory to hold work_struct

defer kfree of sk_filter until jit completed freeing

tested on x86_64 and i386

Signed-off-by: Alexei Starovoitov a...@plumgrid.com
---
 arch/arm/net/bpf_jit_32.c   |1 +
 arch/powerpc/net/bpf_jit_comp.c |1 +
 arch/s390/net/bpf_jit_comp.c|4 +++-
 arch/sparc/net/bpf_jit_comp.c   |1 +
 arch/x86/net/bpf_jit_comp.c |   20 +++-
 include/linux/filter.h  |   11 +--
 net/core/filter.c   |   11 +++
 7 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index f50d223..99b44e0 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -930,4 +930,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
if (fp-bpf_func != sk_run_filter)
module_free(NULL, fp-bpf_func);
+   kfree(fp);
 }
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index bf56e33..2345bdb 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
if (fp-bpf_func != sk_run_filter)
module_free(NULL, fp-bpf_func);
+   kfree(fp);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 7092392..a5df511 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp)
struct bpf_binary_header *header = (void *)addr;
 
if (fp-bpf_func == sk_run_filter)
-   return;
+   goto free_filter;
set_memory_rw(addr, header-pages);
module_free(NULL, header);
+free_filter:
+   kfree(fp);
 }
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 9c7be59..218b6b2 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp)
 {
if (fp-bpf_func != sk_run_filter)
module_free(NULL, fp-bpf_func);
+   kfree(fp);
 }
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 79c216a..1396a0a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -772,13 +772,23 @@ out:
return;
 }
 
+static void 

Re: [PATCH v3 net-next] fix unsafe set_memory_rw from softirq

2013-10-03 Thread Eric Dumazet
On Thu, 2013-10-03 at 21:11 -0700, Alexei Starovoitov wrote:

 diff --git a/include/linux/filter.h b/include/linux/filter.h
 index a6ac848..5d66cd9 100644
 --- a/include/linux/filter.h
 +++ b/include/linux/filter.h
 @@ -25,15 +25,20 @@ struct sk_filter
  {
   atomic_trefcnt;
   unsigned intlen;/* Number of filter blocks */
 + struct rcu_head rcu;
   unsigned int(*bpf_func)(const struct sk_buff *skb,
   const struct sock_filter *filter);
 - struct rcu_head rcu;
 + /* insns start right after bpf_func, so that sk_run_filter() fetches
 +  * first insn from the same cache line that was used to call into
 +  * sk_run_filter()
 +  */
   struct sock_filter  insns[0];
  };
  
  static inline unsigned int sk_filter_len(const struct sk_filter *fp)
  {
 - return fp-len * sizeof(struct sock_filter) + sizeof(*fp);
 + return max(fp-len * sizeof(struct sock_filter),
 +sizeof(struct work_struct)) + sizeof(*fp);
  }

I would use for include/linux/filter.h this (untested) diff :

(Note the include linux/workqueue.h)

I also remove your comment about cache lines, since there is nothing
to align stuff on a cache line boundary.

diff --git a/include/linux/filter.h b/include/linux/filter.h
index a6ac848..281b05c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -6,6 +6,7 @@
 
 #include linux/atomic.h
 #include linux/compat.h
+#include linux/workqueue.h
 #include uapi/linux/filter.h
 
 #ifdef CONFIG_COMPAT
@@ -25,15 +26,20 @@ struct sk_filter
 {
atomic_trefcnt;
unsigned intlen;/* Number of filter blocks */
+   struct rcu_head rcu;
unsigned int(*bpf_func)(const struct sk_buff *skb,
const struct sock_filter *filter);
-   struct rcu_head rcu;
-   struct sock_filter  insns[0];
+   union {
+   struct work_struct  work;
+   struct sock_filter  insns[0];
+   };
 };
 
-static inline unsigned int sk_filter_len(const struct sk_filter *fp)
+static inline unsigned int sk_filter_size(const struct sk_filter *fp,
+ unsigned int proglen)
 {
-   return fp-len * sizeof(struct sock_filter) + sizeof(*fp);
+   return max(sizeof(*fp),
+  offsetof(struct sk_filter, insns[proglen]));
 }
 
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);



This way, you can use sk_filter_size(fp, fprog-len)
instead of doing the max() games in sk_attach_filter() and
sk_unattached_filter_create()

Other than that, I think your patch is fine.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev