Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-04 Thread Masami Hiramatsu
On Wed, 4 Nov 2020 09:47:22 -0500
Steven Rostedt  wrote:

> On Wed, 4 Nov 2020 11:08:52 +0900
> Masami Hiramatsu  wrote:
> 
> > kretprobe_hash_lock() and kretprobe_table_lock() will be called from
> > outside of the kprobe pre_handler context. So, please keep in_nmi()
> > in those functions.
> > for the pre_handler_kretprobe(), this looks good to me.
> > 
> 
> Final version, before sending to Linus.

This looks good to me :)

Thank you!

> 
> -- Steve
> 
> From: "Steven Rostedt (VMware)" 
> Subject: [PATCH] kprobes: Tell lockdep about kprobe nesting
> 
> Since the kprobe handlers have protection that prohibits other handlers from
> executing in other contexts (like if an NMI comes in while processing a
> kprobe, and executes the same kprobe, it will get fail with a "busy"
> return). Lockdep is unaware of this protection. Use lockdep's nesting api to
> differentiate between locks taken in INT3 context and other context to
> suppress the false warnings.
> 
> Link: 
> https://lore.kernel.org/r/20201102160234.fa0ae70915ad9e2b21c08...@kernel.org
> 
> Cc: Peter Zijlstra 
> Acked-by: Masami Hiramatsu 
> Signed-off-by: Steven Rostedt (VMware) 
> ---
>  kernel/kprobes.c | 25 +
>  1 file changed, 21 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 8a12a25fa40d..41fdbb7953c6 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -1249,7 +1249,13 @@ __acquires(hlist_lock)
>  
>   *head = _inst_table[hash];
>   hlist_lock = kretprobe_table_lock_ptr(hash);
> - raw_spin_lock_irqsave(hlist_lock, *flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  * Differentiate when it is taken in NMI context.
> +  */
> + raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
>  }
>  NOKPROBE_SYMBOL(kretprobe_hash_lock);
>  
> @@ -1258,7 +1264,13 @@ static void kretprobe_table_lock(unsigned long hash,
>  __acquires(hlist_lock)
>  {
>   raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
> - raw_spin_lock_irqsave(hlist_lock, *flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  * Differentiate when it is taken in NMI context.
> +  */
> + raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
>  }
>  NOKPROBE_SYMBOL(kretprobe_table_lock);
>  
> @@ -2028,7 +2040,12 @@ static int pre_handler_kretprobe(struct kprobe *p, 
> struct pt_regs *regs)
>  
>   /* TODO: consider to only swap the RA after the last pre_handler fired 
> */
>   hash = hash_ptr(current, KPROBE_HASH_BITS);
> - raw_spin_lock_irqsave(>lock, flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  */
> + raw_spin_lock_irqsave_nested(>lock, flags, 1);
>   if (!hlist_empty(>free_instances)) {
>   ri = hlist_entry(rp->free_instances.first,
>   struct kretprobe_instance, hlist);
> @@ -2039,7 +2056,7 @@ static int pre_handler_kretprobe(struct kprobe *p, 
> struct pt_regs *regs)
>   ri->task = current;
>  
>   if (rp->entry_handler && rp->entry_handler(ri, regs)) {
> - raw_spin_lock_irqsave(>lock, flags);
> + raw_spin_lock_irqsave_nested(>lock, flags, 1);
>   hlist_add_head(>hlist, >free_instances);
>   raw_spin_unlock_irqrestore(>lock, flags);
>   return 0;
> -- 
> 2.25.4
> 


-- 
Masami Hiramatsu 


Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-04 Thread Steven Rostedt
On Wed, 4 Nov 2020 11:08:52 +0900
Masami Hiramatsu  wrote:

> kretprobe_hash_lock() and kretprobe_table_lock() will be called from
> outside of the kprobe pre_handler context. So, please keep in_nmi()
> in those functions.
> for the pre_handler_kretprobe(), this looks good to me.
> 

Final version, before sending to Linus.

-- Steve

From: "Steven Rostedt (VMware)" 
Subject: [PATCH] kprobes: Tell lockdep about kprobe nesting

Since the kprobe handlers have protection that prohibits other handlers from
executing in other contexts (like if an NMI comes in while processing a
kprobe, and executes the same kprobe, it will get fail with a "busy"
return). Lockdep is unaware of this protection. Use lockdep's nesting api to
differentiate between locks taken in INT3 context and other context to
suppress the false warnings.

Link: 
https://lore.kernel.org/r/20201102160234.fa0ae70915ad9e2b21c08...@kernel.org

Cc: Peter Zijlstra 
Acked-by: Masami Hiramatsu 
Signed-off-by: Steven Rostedt (VMware) 
---
 kernel/kprobes.c | 25 +
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8a12a25fa40d..41fdbb7953c6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1249,7 +1249,13 @@ __acquires(hlist_lock)
 
*head = _inst_table[hash];
hlist_lock = kretprobe_table_lock_ptr(hash);
-   raw_spin_lock_irqsave(hlist_lock, *flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+* Differentiate when it is taken in NMI context.
+*/
+   raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
 }
 NOKPROBE_SYMBOL(kretprobe_hash_lock);
 
@@ -1258,7 +1264,13 @@ static void kretprobe_table_lock(unsigned long hash,
 __acquires(hlist_lock)
 {
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
-   raw_spin_lock_irqsave(hlist_lock, *flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+* Differentiate when it is taken in NMI context.
+*/
+   raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
 }
 NOKPROBE_SYMBOL(kretprobe_table_lock);
 
@@ -2028,7 +2040,12 @@ static int pre_handler_kretprobe(struct kprobe *p, 
struct pt_regs *regs)
 
/* TODO: consider to only swap the RA after the last pre_handler fired 
*/
hash = hash_ptr(current, KPROBE_HASH_BITS);
-   raw_spin_lock_irqsave(>lock, flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+*/
+   raw_spin_lock_irqsave_nested(>lock, flags, 1);
if (!hlist_empty(>free_instances)) {
ri = hlist_entry(rp->free_instances.first,
struct kretprobe_instance, hlist);
@@ -2039,7 +2056,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct 
pt_regs *regs)
ri->task = current;
 
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
-   raw_spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave_nested(>lock, flags, 1);
hlist_add_head(>hlist, >free_instances);
raw_spin_unlock_irqrestore(>lock, flags);
return 0;
-- 
2.25.4



Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-03 Thread Masami Hiramatsu
On Tue, 3 Nov 2020 11:09:13 -0500
Steven Rostedt  wrote:

> On Tue, 3 Nov 2020 14:39:38 +0900
> Masami Hiramatsu  wrote:
> 
> > Ah, OK. This looks good to me.
> > 
> > BTW, in_nmi() in pre_handler_kretprobe() always be true because
> > now int3 is treated as an NMI. So you can always pass 1 there.
> 
> What about the below patch then?

kretprobe_hash_lock() and kretprobe_table_lock() will be called from
outside of the kprobe pre_handler context. So, please keep in_nmi()
in those functions.
for the pre_handler_kretprobe(), this looks good to me.

Thank you,

> > Acked-by: Masami Hiramatsu 
> 
> Thanks!
> 
> From 29ac1a5c9068df06f3196173d4325c8076759551 Mon Sep 17 00:00:00 2001
> From: "Steven Rostedt (VMware)" 
> Date: Mon, 2 Nov 2020 09:17:49 -0500
> Subject: [PATCH] kprobes: Tell lockdep about kprobe nesting
> 
> Since the kprobe handlers have protection that prohibits other handlers from
> executing in other contexts (like if an NMI comes in while processing a
> kprobe, and executes the same kprobe, it will get fail with a "busy"
> return). Lockdep is unaware of this protection. Use lockdep's nesting api to
> differentiate between locks taken in INT3 context and other context to
> suppress the false warnings.
> 
> Link: 
> https://lore.kernel.org/r/20201102160234.fa0ae70915ad9e2b21c08...@kernel.org
> 
> Cc: Peter Zijlstra 
> Signed-off-by: Steven Rostedt (VMware) 
> ---
>  kernel/kprobes.c | 23 +++
>  1 file changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 8a12a25fa40d..30889ea5514f 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -1249,7 +1249,12 @@ __acquires(hlist_lock)
>  
>   *head = _inst_table[hash];
>   hlist_lock = kretprobe_table_lock_ptr(hash);
> - raw_spin_lock_irqsave(hlist_lock, *flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  */
> + raw_spin_lock_irqsave_nested(hlist_lock, *flags, 1);
>  }
>  NOKPROBE_SYMBOL(kretprobe_hash_lock);
>  
> @@ -1258,7 +1263,12 @@ static void kretprobe_table_lock(unsigned long hash,
>  __acquires(hlist_lock)
>  {
>   raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
> - raw_spin_lock_irqsave(hlist_lock, *flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  */
> + raw_spin_lock_irqsave_nested(hlist_lock, *flags, 1);
>  }
>  NOKPROBE_SYMBOL(kretprobe_table_lock);
>  
> @@ -2028,7 +2038,12 @@ static int pre_handler_kretprobe(struct kprobe *p, 
> struct pt_regs *regs)
>  
>   /* TODO: consider to only swap the RA after the last pre_handler fired 
> */
>   hash = hash_ptr(current, KPROBE_HASH_BITS);
> - raw_spin_lock_irqsave(>lock, flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  */
> + raw_spin_lock_irqsave_nested(>lock, flags, 1);
>   if (!hlist_empty(>free_instances)) {
>   ri = hlist_entry(rp->free_instances.first,
>   struct kretprobe_instance, hlist);
> @@ -2039,7 +2054,7 @@ static int pre_handler_kretprobe(struct kprobe *p, 
> struct pt_regs *regs)
>   ri->task = current;
>  
>   if (rp->entry_handler && rp->entry_handler(ri, regs)) {
> - raw_spin_lock_irqsave(>lock, flags);
> + raw_spin_lock_irqsave_nested(>lock, flags, 1);
>   hlist_add_head(>hlist, >free_instances);
>   raw_spin_unlock_irqrestore(>lock, flags);
>   return 0;
> -- 
> 2.25.4
> 


-- 
Masami Hiramatsu 


Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-03 Thread Steven Rostedt
On Tue, 3 Nov 2020 14:39:38 +0900
Masami Hiramatsu  wrote:

> Ah, OK. This looks good to me.
> 
> BTW, in_nmi() in pre_handler_kretprobe() always be true because
> now int3 is treated as an NMI. So you can always pass 1 there.

What about the below patch then?

> 
> Acked-by: Masami Hiramatsu 

Thanks!

>From 29ac1a5c9068df06f3196173d4325c8076759551 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" 
Date: Mon, 2 Nov 2020 09:17:49 -0500
Subject: [PATCH] kprobes: Tell lockdep about kprobe nesting

Since the kprobe handlers have protection that prohibits other handlers from
executing in other contexts (like if an NMI comes in while processing a
kprobe, and executes the same kprobe, it will get fail with a "busy"
return). Lockdep is unaware of this protection. Use lockdep's nesting api to
differentiate between locks taken in INT3 context and other context to
suppress the false warnings.

Link: 
https://lore.kernel.org/r/20201102160234.fa0ae70915ad9e2b21c08...@kernel.org

Cc: Peter Zijlstra 
Signed-off-by: Steven Rostedt (VMware) 
---
 kernel/kprobes.c | 23 +++
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8a12a25fa40d..30889ea5514f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1249,7 +1249,12 @@ __acquires(hlist_lock)
 
*head = _inst_table[hash];
hlist_lock = kretprobe_table_lock_ptr(hash);
-   raw_spin_lock_irqsave(hlist_lock, *flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+*/
+   raw_spin_lock_irqsave_nested(hlist_lock, *flags, 1);
 }
 NOKPROBE_SYMBOL(kretprobe_hash_lock);
 
@@ -1258,7 +1263,12 @@ static void kretprobe_table_lock(unsigned long hash,
 __acquires(hlist_lock)
 {
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
-   raw_spin_lock_irqsave(hlist_lock, *flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+*/
+   raw_spin_lock_irqsave_nested(hlist_lock, *flags, 1);
 }
 NOKPROBE_SYMBOL(kretprobe_table_lock);
 
@@ -2028,7 +2038,12 @@ static int pre_handler_kretprobe(struct kprobe *p, 
struct pt_regs *regs)
 
/* TODO: consider to only swap the RA after the last pre_handler fired 
*/
hash = hash_ptr(current, KPROBE_HASH_BITS);
-   raw_spin_lock_irqsave(>lock, flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+*/
+   raw_spin_lock_irqsave_nested(>lock, flags, 1);
if (!hlist_empty(>free_instances)) {
ri = hlist_entry(rp->free_instances.first,
struct kretprobe_instance, hlist);
@@ -2039,7 +2054,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct 
pt_regs *regs)
ri->task = current;
 
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
-   raw_spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave_nested(>lock, flags, 1);
hlist_add_head(>hlist, >free_instances);
raw_spin_unlock_irqrestore(>lock, flags);
return 0;
-- 
2.25.4



Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-02 Thread Masami Hiramatsu
On Mon, 2 Nov 2020 09:27:26 -0500
Steven Rostedt  wrote:

> 
> [ Peter Z, please take a look a this ]
> 
> On Mon, 2 Nov 2020 16:02:34 +0900
> Masami Hiramatsu  wrote:
> 
> > >From 509b27efef8c7dbf56cab2e812916d6cd778c745 Mon Sep 17 00:00:00 2001  
> > From: Masami Hiramatsu 
> > Date: Mon, 2 Nov 2020 15:37:28 +0900
> > Subject: [PATCH] kprobes: Disable lockdep for kprobe busy area
> > 
> > Since the code area in between kprobe_busy_begin()/end() prohibits
> > other kprobs to call probe handlers, we can avoid inconsitent
> > locks there. But lockdep doesn't know that, so it warns rp->lock
> > or kretprobe_table_lock.
> > 
> > To supress those false-positive errors, disable lockdep while
> > kprobe_busy is set.
> > 
> > Signed-off-by: Masami Hiramatsu 
> > ---
> >  kernel/kprobes.c | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > index 8a12a25fa40d..c7196e583600 100644
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -1295,10 +1295,12 @@ void kprobe_busy_begin(void)
> > __this_cpu_write(current_kprobe, _busy);
> > kcb = get_kprobe_ctlblk();
> > kcb->kprobe_status = KPROBE_HIT_ACTIVE;
> > +   lockdep_off();
> >  }
> >  
> >  void kprobe_busy_end(void)
> >  {
> > +   lockdep_on();
> > __this_cpu_write(current_kprobe, NULL);
> > preempt_enable();
> >  }
> > -- 
> 
> No, this is not the correct workaround (too big of a hammer). You could do
> the following:
> 
> From 4139d9c8437b0bd2262e989ca4eb0a83b7e7bb72 Mon Sep 17 00:00:00 2001
> From: "Steven Rostedt (VMware)" 
> Date: Mon, 2 Nov 2020 09:17:49 -0500
> Subject: [PATCH] kprobes: Tell lockdep about kprobe nesting
> 
> Since the kprobe handlers have protection that prohibits other handlers from
> executing in other contexts (like if an NMI comes in while processing a
> kprobe, and executes the same kprobe, it will get fail with a "busy"
> return). Lockdep is unaware of this protection. Use lockdep's nesting api to
> differentiate between locks taken in NMI context and other context to
> supress the false warnings.

Ah, OK. This looks good to me.

BTW, in_nmi() in pre_handler_kretprobe() always be true because
now int3 is treated as an NMI. So you can always pass 1 there.

Acked-by: Masami Hiramatsu 

Thank you,

> 
> Link: 
> https://lore.kernel.org/r/20201102160234.fa0ae70915ad9e2b21c08...@kernel.org
> 
> Cc: Peter Zijlstra 
> Signed-off-by: Steven Rostedt (VMware) 
> ---
>  kernel/kprobes.c | 24 
>  1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 8a12a25fa40d..ccb285867059 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -1249,7 +1249,12 @@ __acquires(hlist_lock)
>  
>   *head = _inst_table[hash];
>   hlist_lock = kretprobe_table_lock_ptr(hash);
> - raw_spin_lock_irqsave(hlist_lock, *flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  */
> + raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
>  }
>  NOKPROBE_SYMBOL(kretprobe_hash_lock);
>  
> @@ -1258,7 +1263,12 @@ static void kretprobe_table_lock(unsigned long hash,
>  __acquires(hlist_lock)
>  {
>   raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
> - raw_spin_lock_irqsave(hlist_lock, *flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  */
> + raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
>  }
>  NOKPROBE_SYMBOL(kretprobe_table_lock);
>  
> @@ -2025,10 +2035,16 @@ static int pre_handler_kretprobe(struct kprobe *p, 
> struct pt_regs *regs)
>   struct kretprobe *rp = container_of(p, struct kretprobe, kp);
>   unsigned long hash, flags = 0;
>   struct kretprobe_instance *ri;
> + int nmi = !!in_nmi();
>  
>   /* TODO: consider to only swap the RA after the last pre_handler fired 
> */
>   hash = hash_ptr(current, KPROBE_HASH_BITS);
> - raw_spin_lock_irqsave(>lock, flags);
> + /*
> +  * Nested is a workaround that will soon not be needed.
> +  * There's other protections that make sure the same lock
> +  * is not taken on the same CPU that lockdep is unaware of.
> +  */
> + raw_spin_lock_irqsave_nested(>lock, flags, nmi);
>   if (!hlist_empty(>free_instances)) {
>   ri = hlist_entry(rp->free_instances.first,
>   struct kretprobe_instance, hlist);
> @@ -2039,7 +2055,7 @@ static int pre_handler_kretprobe(struct kprobe *p, 
> struct pt_regs *regs)
>   ri->task = current;
>  
>   if (rp->entry_handler && rp->entry_handler(ri, regs)) {
> - raw_spin_lock_irqsave(>lock, flags);
> +   

Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-02 Thread Steven Rostedt


[ Peter Z, please take a look a this ]

On Mon, 2 Nov 2020 16:02:34 +0900
Masami Hiramatsu  wrote:

> >From 509b27efef8c7dbf56cab2e812916d6cd778c745 Mon Sep 17 00:00:00 2001  
> From: Masami Hiramatsu 
> Date: Mon, 2 Nov 2020 15:37:28 +0900
> Subject: [PATCH] kprobes: Disable lockdep for kprobe busy area
> 
> Since the code area in between kprobe_busy_begin()/end() prohibits
> other kprobs to call probe handlers, we can avoid inconsitent
> locks there. But lockdep doesn't know that, so it warns rp->lock
> or kretprobe_table_lock.
> 
> To supress those false-positive errors, disable lockdep while
> kprobe_busy is set.
> 
> Signed-off-by: Masami Hiramatsu 
> ---
>  kernel/kprobes.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 8a12a25fa40d..c7196e583600 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -1295,10 +1295,12 @@ void kprobe_busy_begin(void)
>   __this_cpu_write(current_kprobe, _busy);
>   kcb = get_kprobe_ctlblk();
>   kcb->kprobe_status = KPROBE_HIT_ACTIVE;
> + lockdep_off();
>  }
>  
>  void kprobe_busy_end(void)
>  {
> + lockdep_on();
>   __this_cpu_write(current_kprobe, NULL);
>   preempt_enable();
>  }
> -- 

No, this is not the correct workaround (too big of a hammer). You could do
the following:

>From 4139d9c8437b0bd2262e989ca4eb0a83b7e7bb72 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" 
Date: Mon, 2 Nov 2020 09:17:49 -0500
Subject: [PATCH] kprobes: Tell lockdep about kprobe nesting

Since the kprobe handlers have protection that prohibits other handlers from
executing in other contexts (like if an NMI comes in while processing a
kprobe, and executes the same kprobe, it will get fail with a "busy"
return). Lockdep is unaware of this protection. Use lockdep's nesting api to
differentiate between locks taken in NMI context and other context to
supress the false warnings.

Link: 
https://lore.kernel.org/r/20201102160234.fa0ae70915ad9e2b21c08...@kernel.org

Cc: Peter Zijlstra 
Signed-off-by: Steven Rostedt (VMware) 
---
 kernel/kprobes.c | 24 
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8a12a25fa40d..ccb285867059 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1249,7 +1249,12 @@ __acquires(hlist_lock)
 
*head = _inst_table[hash];
hlist_lock = kretprobe_table_lock_ptr(hash);
-   raw_spin_lock_irqsave(hlist_lock, *flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+*/
+   raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
 }
 NOKPROBE_SYMBOL(kretprobe_hash_lock);
 
@@ -1258,7 +1263,12 @@ static void kretprobe_table_lock(unsigned long hash,
 __acquires(hlist_lock)
 {
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
-   raw_spin_lock_irqsave(hlist_lock, *flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+*/
+   raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
 }
 NOKPROBE_SYMBOL(kretprobe_table_lock);
 
@@ -2025,10 +2035,16 @@ static int pre_handler_kretprobe(struct kprobe *p, 
struct pt_regs *regs)
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
unsigned long hash, flags = 0;
struct kretprobe_instance *ri;
+   int nmi = !!in_nmi();
 
/* TODO: consider to only swap the RA after the last pre_handler fired 
*/
hash = hash_ptr(current, KPROBE_HASH_BITS);
-   raw_spin_lock_irqsave(>lock, flags);
+   /*
+* Nested is a workaround that will soon not be needed.
+* There's other protections that make sure the same lock
+* is not taken on the same CPU that lockdep is unaware of.
+*/
+   raw_spin_lock_irqsave_nested(>lock, flags, nmi);
if (!hlist_empty(>free_instances)) {
ri = hlist_entry(rp->free_instances.first,
struct kretprobe_instance, hlist);
@@ -2039,7 +2055,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct 
pt_regs *regs)
ri->task = current;
 
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
-   raw_spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave_nested(>lock, flags, nmi);
hlist_add_head(>hlist, >free_instances);
raw_spin_unlock_irqrestore(>lock, flags);
return 0;
-- 
2.25.4



Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-01 Thread Masami Hiramatsu
On Mon, 2 Nov 2020 14:53:34 +0900
Masami Hiramatsu  wrote:

> On Mon, 2 Nov 2020 14:11:38 +0900
> Masami Hiramatsu  wrote:
> 
> > On Fri, 30 Oct 2020 21:38:31 -0400
> > Steven Rostedt  wrote:
> > 
> > > On Sat, 29 Aug 2020 22:02:36 +0900
> > > Masami Hiramatsu  wrote:
> > > 
> > > > Since the commit 9b38cc704e84 ("kretprobe: Prevent triggering
> > > > kretprobe from within kprobe_flush_task") sets a dummy current
> > > > kprobe in the trampoline handler by kprobe_busy_begin/end(),
> > > > it is not possible to run a kretprobe pre handler in kretprobe
> > > > trampoline handler context even with the NMI. If the NMI interrupts
> > > > a kretprobe_trampoline_handler() and it hits a kretprobe, the
> > > > 2nd kretprobe will detect recursion correctly and it will be
> > > > skipped.
> > > > This means we have almost no double-lock issue on kretprobes by NMI.
> > > > 
> > > > The last one point is in cleanup_rp_inst() which also takes
> > > > kretprobe_table_lock without setting up current kprobes.
> > > > So adding kprobe_busy_begin/end() there allows us to remove
> > > > in_nmi() check.
> > > > 
> > > > The above commit applies kprobe_busy_begin/end() on x86, but
> > > > now all arch implementation are unified to generic one, we can
> > > > safely remove the in_nmi() check from arch independent code.
> > > >
> > > 
> > > So are you saying that lockdep is lying?
> > > 
> > > Kprobe smoke test: started
> > > 
> > > 
> > > WARNING: inconsistent lock state
> > > 5.10.0-rc1-test+ #29 Not tainted
> > > 
> > > inconsistent {INITIAL USE} -> {IN-NMI} usage.
> > > swapper/0/1 [HC1[1]:SC0[0]:HE0:SE1] takes:
> > > 82b07118 (>lock){}-{2:2}, at: 
> > > pre_handler_kretprobe+0x4b/0x193
> > > {INITIAL USE} state was registered at:
> > >   lock_acquire+0x280/0x325
> > >   _raw_spin_lock+0x30/0x3f
> > >   recycle_rp_inst+0x3f/0x86
> > >   __kretprobe_trampoline_handler+0x13a/0x177
> > >   trampoline_handler+0x48/0x57
> > >   kretprobe_trampoline+0x2a/0x4f
> > >   kretprobe_trampoline+0x0/0x4f
> > >   init_kprobes+0x193/0x19d
> > >   do_one_initcall+0xf9/0x27e
> > >   kernel_init_freeable+0x16e/0x2b6
> > >   kernel_init+0xe/0x109
> > >   ret_from_fork+0x22/0x30
> > > irq event stamp: 1670
> > > hardirqs last  enabled at (1669): [] 
> > > slab_free_freelist_hook+0xb4/0xfd
> > > hardirqs last disabled at (1670): [] exc_int3+0xae/0x10a
> > > softirqs last  enabled at (1484): [] 
> > > __do_softirq+0x352/0x38d
> > > softirqs last disabled at (1471): [] 
> > > asm_call_irq_on_stack+0x12/0x20
> > > 
> > > other info that might help us debug this:
> > >  Possible unsafe locking scenario:
> > > 
> > >CPU0
> > >
> > >   lock(>lock);
> > >   
> > > lock(>lock);
> > > 
> > >  *** DEADLOCK ***
> > > 
> > > no locks held by swapper/0/1.
> > > 
> > > stack backtrace:
> > > CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.0-rc1-test+ #29
> > > Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
> > > Call Trace:
> > >  dump_stack+0x7d/0x9f
> > >  print_usage_bug+0x1c0/0x1d3
> > >  lock_acquire+0x302/0x325
> > >  ? pre_handler_kretprobe+0x4b/0x193
> > >  ? stop_machine_from_inactive_cpu+0x120/0x120
> > >  _raw_spin_lock_irqsave+0x43/0x58
> > >  ? pre_handler_kretprobe+0x4b/0x193
> > >  pre_handler_kretprobe+0x4b/0x193
> > >  ? stop_machine_from_inactive_cpu+0x120/0x120
> > >  ? kprobe_target+0x1/0x16
> > >  kprobe_int3_handler+0xd0/0x109
> > >  exc_int3+0xb8/0x10a
> > >  asm_exc_int3+0x31/0x40
> > > RIP: 0010:kprobe_target+0x1/0x16
> > >  5d c3 cc
> > > RSP: :c9033e00 EFLAGS: 0246
> > > RAX: 8110ea77 RBX: 0001 RCX: c9033cb4
> > > RDX: 0231 RSI:  RDI: 3ca57c35
> > > RBP: c9033e20 R08:  R09: 8111d207
> > > R10: 8881002ab480 R11: 8881002ab480 R12: 
> > > R13: 82a52af0 R14: 0200 R15: 888100331130
> > >  ? register_kprobe+0x43c/0x492
> > >  ? stop_machine_from_inactive_cpu+0x120/0x120
> > >  ? kprobe_target+0x1/0x16
> > >  ? init_test_probes+0x2c6/0x38a
> > >  init_kprobes+0x193/0x19d
> > >  ? debugfs_kprobe_init+0xb8/0xb8
> > >  do_one_initcall+0xf9/0x27e
> > >  ? rcu_read_lock_sched_held+0x3e/0x75
> > >  ? init_mm_internals+0x27b/0x284
> > >  kernel_init_freeable+0x16e/0x2b6
> > >  ? rest_init+0x152/0x152
> > >  kernel_init+0xe/0x109
> > >  ret_from_fork+0x22/0x30
> > > Kprobe smoke test: passed successfully
> > > 
> > > Config attached.
> > 
> > Thanks for the report! Let me check what happen.
> 
> OK, confirmed. But this is actually false-positive report.
> 
> The lockdep reports rp->lock case between pre_handler_kretprobe()
> and recycle_rp_inst() from __kretprobe_trampoline_handler().
> Since kretprobe_trampoline_handler() sets current_kprobe,
> if other kprobes hits on same CPU, those are skipped. This means
> pre_handler_kretprobe() is not called while executing
> 

Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-01 Thread Masami Hiramatsu
On Mon, 2 Nov 2020 14:11:38 +0900
Masami Hiramatsu  wrote:

> On Fri, 30 Oct 2020 21:38:31 -0400
> Steven Rostedt  wrote:
> 
> > On Sat, 29 Aug 2020 22:02:36 +0900
> > Masami Hiramatsu  wrote:
> > 
> > > Since the commit 9b38cc704e84 ("kretprobe: Prevent triggering
> > > kretprobe from within kprobe_flush_task") sets a dummy current
> > > kprobe in the trampoline handler by kprobe_busy_begin/end(),
> > > it is not possible to run a kretprobe pre handler in kretprobe
> > > trampoline handler context even with the NMI. If the NMI interrupts
> > > a kretprobe_trampoline_handler() and it hits a kretprobe, the
> > > 2nd kretprobe will detect recursion correctly and it will be
> > > skipped.
> > > This means we have almost no double-lock issue on kretprobes by NMI.
> > > 
> > > The last one point is in cleanup_rp_inst() which also takes
> > > kretprobe_table_lock without setting up current kprobes.
> > > So adding kprobe_busy_begin/end() there allows us to remove
> > > in_nmi() check.
> > > 
> > > The above commit applies kprobe_busy_begin/end() on x86, but
> > > now all arch implementation are unified to generic one, we can
> > > safely remove the in_nmi() check from arch independent code.
> > >
> > 
> > So are you saying that lockdep is lying?
> > 
> > Kprobe smoke test: started
> > 
> > 
> > WARNING: inconsistent lock state
> > 5.10.0-rc1-test+ #29 Not tainted
> > 
> > inconsistent {INITIAL USE} -> {IN-NMI} usage.
> > swapper/0/1 [HC1[1]:SC0[0]:HE0:SE1] takes:
> > 82b07118 (>lock){}-{2:2}, at: 
> > pre_handler_kretprobe+0x4b/0x193
> > {INITIAL USE} state was registered at:
> >   lock_acquire+0x280/0x325
> >   _raw_spin_lock+0x30/0x3f
> >   recycle_rp_inst+0x3f/0x86
> >   __kretprobe_trampoline_handler+0x13a/0x177
> >   trampoline_handler+0x48/0x57
> >   kretprobe_trampoline+0x2a/0x4f
> >   kretprobe_trampoline+0x0/0x4f
> >   init_kprobes+0x193/0x19d
> >   do_one_initcall+0xf9/0x27e
> >   kernel_init_freeable+0x16e/0x2b6
> >   kernel_init+0xe/0x109
> >   ret_from_fork+0x22/0x30
> > irq event stamp: 1670
> > hardirqs last  enabled at (1669): [] 
> > slab_free_freelist_hook+0xb4/0xfd
> > hardirqs last disabled at (1670): [] exc_int3+0xae/0x10a
> > softirqs last  enabled at (1484): [] 
> > __do_softirq+0x352/0x38d
> > softirqs last disabled at (1471): [] 
> > asm_call_irq_on_stack+0x12/0x20
> > 
> > other info that might help us debug this:
> >  Possible unsafe locking scenario:
> > 
> >CPU0
> >
> >   lock(>lock);
> >   
> > lock(>lock);
> > 
> >  *** DEADLOCK ***
> > 
> > no locks held by swapper/0/1.
> > 
> > stack backtrace:
> > CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.0-rc1-test+ #29
> > Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
> > Call Trace:
> >  dump_stack+0x7d/0x9f
> >  print_usage_bug+0x1c0/0x1d3
> >  lock_acquire+0x302/0x325
> >  ? pre_handler_kretprobe+0x4b/0x193
> >  ? stop_machine_from_inactive_cpu+0x120/0x120
> >  _raw_spin_lock_irqsave+0x43/0x58
> >  ? pre_handler_kretprobe+0x4b/0x193
> >  pre_handler_kretprobe+0x4b/0x193
> >  ? stop_machine_from_inactive_cpu+0x120/0x120
> >  ? kprobe_target+0x1/0x16
> >  kprobe_int3_handler+0xd0/0x109
> >  exc_int3+0xb8/0x10a
> >  asm_exc_int3+0x31/0x40
> > RIP: 0010:kprobe_target+0x1/0x16
> >  5d c3 cc
> > RSP: :c9033e00 EFLAGS: 0246
> > RAX: 8110ea77 RBX: 0001 RCX: c9033cb4
> > RDX: 0231 RSI:  RDI: 3ca57c35
> > RBP: c9033e20 R08:  R09: 8111d207
> > R10: 8881002ab480 R11: 8881002ab480 R12: 
> > R13: 82a52af0 R14: 0200 R15: 888100331130
> >  ? register_kprobe+0x43c/0x492
> >  ? stop_machine_from_inactive_cpu+0x120/0x120
> >  ? kprobe_target+0x1/0x16
> >  ? init_test_probes+0x2c6/0x38a
> >  init_kprobes+0x193/0x19d
> >  ? debugfs_kprobe_init+0xb8/0xb8
> >  do_one_initcall+0xf9/0x27e
> >  ? rcu_read_lock_sched_held+0x3e/0x75
> >  ? init_mm_internals+0x27b/0x284
> >  kernel_init_freeable+0x16e/0x2b6
> >  ? rest_init+0x152/0x152
> >  kernel_init+0xe/0x109
> >  ret_from_fork+0x22/0x30
> > Kprobe smoke test: passed successfully
> > 
> > Config attached.
> 
> Thanks for the report! Let me check what happen.

OK, confirmed. But this is actually false-positive report.

The lockdep reports rp->lock case between pre_handler_kretprobe()
and recycle_rp_inst() from __kretprobe_trampoline_handler().
Since kretprobe_trampoline_handler() sets current_kprobe,
if other kprobes hits on same CPU, those are skipped. This means
pre_handler_kretprobe() is not called while executing
__kretprobe_trampoline_handler().

Actually, since this rp->lock is expected to be removed in the last
patch in this series ([21/21]), I left this as is, but we might better
to treat this case because the latter half of this series will be
merged in 5.11.

Hmm, are there any way to tell lockdep this is safe?

Thank 

Re: [PATCH v5 14/21] kprobes: Remove NMI context check

2020-11-01 Thread Masami Hiramatsu
On Fri, 30 Oct 2020 21:38:31 -0400
Steven Rostedt  wrote:

> On Sat, 29 Aug 2020 22:02:36 +0900
> Masami Hiramatsu  wrote:
> 
> > Since the commit 9b38cc704e84 ("kretprobe: Prevent triggering
> > kretprobe from within kprobe_flush_task") sets a dummy current
> > kprobe in the trampoline handler by kprobe_busy_begin/end(),
> > it is not possible to run a kretprobe pre handler in kretprobe
> > trampoline handler context even with the NMI. If the NMI interrupts
> > a kretprobe_trampoline_handler() and it hits a kretprobe, the
> > 2nd kretprobe will detect recursion correctly and it will be
> > skipped.
> > This means we have almost no double-lock issue on kretprobes by NMI.
> > 
> > The last one point is in cleanup_rp_inst() which also takes
> > kretprobe_table_lock without setting up current kprobes.
> > So adding kprobe_busy_begin/end() there allows us to remove
> > in_nmi() check.
> > 
> > The above commit applies kprobe_busy_begin/end() on x86, but
> > now all arch implementation are unified to generic one, we can
> > safely remove the in_nmi() check from arch independent code.
> >
> 
> So are you saying that lockdep is lying?
> 
> Kprobe smoke test: started
> 
> 
> WARNING: inconsistent lock state
> 5.10.0-rc1-test+ #29 Not tainted
> 
> inconsistent {INITIAL USE} -> {IN-NMI} usage.
> swapper/0/1 [HC1[1]:SC0[0]:HE0:SE1] takes:
> 82b07118 (>lock){}-{2:2}, at: pre_handler_kretprobe+0x4b/0x193
> {INITIAL USE} state was registered at:
>   lock_acquire+0x280/0x325
>   _raw_spin_lock+0x30/0x3f
>   recycle_rp_inst+0x3f/0x86
>   __kretprobe_trampoline_handler+0x13a/0x177
>   trampoline_handler+0x48/0x57
>   kretprobe_trampoline+0x2a/0x4f
>   kretprobe_trampoline+0x0/0x4f
>   init_kprobes+0x193/0x19d
>   do_one_initcall+0xf9/0x27e
>   kernel_init_freeable+0x16e/0x2b6
>   kernel_init+0xe/0x109
>   ret_from_fork+0x22/0x30
> irq event stamp: 1670
> hardirqs last  enabled at (1669): [] 
> slab_free_freelist_hook+0xb4/0xfd
> hardirqs last disabled at (1670): [] exc_int3+0xae/0x10a
> softirqs last  enabled at (1484): [] 
> __do_softirq+0x352/0x38d
> softirqs last disabled at (1471): [] 
> asm_call_irq_on_stack+0x12/0x20
> 
> other info that might help us debug this:
>  Possible unsafe locking scenario:
> 
>CPU0
>
>   lock(>lock);
>   
> lock(>lock);
> 
>  *** DEADLOCK ***
> 
> no locks held by swapper/0/1.
> 
> stack backtrace:
> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.0-rc1-test+ #29
> Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
> Call Trace:
>  dump_stack+0x7d/0x9f
>  print_usage_bug+0x1c0/0x1d3
>  lock_acquire+0x302/0x325
>  ? pre_handler_kretprobe+0x4b/0x193
>  ? stop_machine_from_inactive_cpu+0x120/0x120
>  _raw_spin_lock_irqsave+0x43/0x58
>  ? pre_handler_kretprobe+0x4b/0x193
>  pre_handler_kretprobe+0x4b/0x193
>  ? stop_machine_from_inactive_cpu+0x120/0x120
>  ? kprobe_target+0x1/0x16
>  kprobe_int3_handler+0xd0/0x109
>  exc_int3+0xb8/0x10a
>  asm_exc_int3+0x31/0x40
> RIP: 0010:kprobe_target+0x1/0x16
>  5d c3 cc
> RSP: :c9033e00 EFLAGS: 0246
> RAX: 8110ea77 RBX: 0001 RCX: c9033cb4
> RDX: 0231 RSI:  RDI: 3ca57c35
> RBP: c9033e20 R08:  R09: 8111d207
> R10: 8881002ab480 R11: 8881002ab480 R12: 
> R13: 82a52af0 R14: 0200 R15: 888100331130
>  ? register_kprobe+0x43c/0x492
>  ? stop_machine_from_inactive_cpu+0x120/0x120
>  ? kprobe_target+0x1/0x16
>  ? init_test_probes+0x2c6/0x38a
>  init_kprobes+0x193/0x19d
>  ? debugfs_kprobe_init+0xb8/0xb8
>  do_one_initcall+0xf9/0x27e
>  ? rcu_read_lock_sched_held+0x3e/0x75
>  ? init_mm_internals+0x27b/0x284
>  kernel_init_freeable+0x16e/0x2b6
>  ? rest_init+0x152/0x152
>  kernel_init+0xe/0x109
>  ret_from_fork+0x22/0x30
> Kprobe smoke test: passed successfully
> 
> Config attached.

Thanks for the report! Let me check what happen.

Thank you,

> 
> -- Steve


-- 
Masami Hiramatsu 


[PATCH v5 14/21] kprobes: Remove NMI context check

2020-08-29 Thread Masami Hiramatsu
Since the commit 9b38cc704e84 ("kretprobe: Prevent triggering
kretprobe from within kprobe_flush_task") sets a dummy current
kprobe in the trampoline handler by kprobe_busy_begin/end(),
it is not possible to run a kretprobe pre handler in kretprobe
trampoline handler context even with the NMI. If the NMI interrupts
a kretprobe_trampoline_handler() and it hits a kretprobe, the
2nd kretprobe will detect recursion correctly and it will be
skipped.
This means we have almost no double-lock issue on kretprobes by NMI.

The last one point is in cleanup_rp_inst() which also takes
kretprobe_table_lock without setting up current kprobes.
So adding kprobe_busy_begin/end() there allows us to remove
in_nmi() check.

The above commit applies kprobe_busy_begin/end() on x86, but
now all arch implementation are unified to generic one, we can
safely remove the in_nmi() check from arch independent code.

Signed-off-by: Masami Hiramatsu 
---
 kernel/kprobes.c |   16 
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index a0afaa79024e..211138225fa5 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1359,7 +1359,8 @@ static void cleanup_rp_inst(struct kretprobe *rp)
struct hlist_node *next;
struct hlist_head *head;
 
-   /* No race here */
+   /* To avoid recursive kretprobe by NMI, set kprobe busy here */
+   kprobe_busy_begin();
for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
kretprobe_table_lock(hash, );
head = _inst_table[hash];
@@ -1369,6 +1370,8 @@ static void cleanup_rp_inst(struct kretprobe *rp)
}
kretprobe_table_unlock(hash, );
}
+   kprobe_busy_end();
+
free_rp_inst(rp);
 }
 NOKPROBE_SYMBOL(cleanup_rp_inst);
@@ -2035,17 +2038,6 @@ static int pre_handler_kretprobe(struct kprobe *p, 
struct pt_regs *regs)
unsigned long hash, flags = 0;
struct kretprobe_instance *ri;
 
-   /*
-* To avoid deadlocks, prohibit return probing in NMI contexts,
-* just skip the probe and increase the (inexact) 'nmissed'
-* statistical counter, so that the user is informed that
-* something happened:
-*/
-   if (unlikely(in_nmi())) {
-   rp->nmissed++;
-   return 0;
-   }
-
/* TODO: consider to only swap the RA after the last pre_handler fired 
*/
hash = hash_ptr(current, KPROBE_HASH_BITS);
raw_spin_lock_irqsave(>lock, flags);