Re: [PATCH v1 1/1] binder: fix freeze race

2021-09-10 Thread Li Li
On Thu, Sep 9, 2021 at 11:03 PM Dan Carpenter  wrote:
>
> On Thu, Sep 09, 2021 at 04:21:41PM -0700, Li Li wrote:
> > @@ -4648,6 +4647,22 @@ static int binder_ioctl_get_node_debug_info(struct 
> > binder_proc *proc,
> >   return 0;
> >  }
> >
> > +static int binder_txns_pending(struct binder_proc *proc)
> > +{
> > + struct rb_node *n;
> > + struct binder_thread *thread;
> > +
> > + if (proc->outstanding_txns > 0)
> > + return 1;
>
> Make this function bool.

Will include the change (as well as the extra ->outstanding_txns
check) in the next revision.
>
> > +
> > + for (n = rb_first(&proc->threads); n; n = rb_next(n)) {
> > + thread = rb_entry(n, struct binder_thread, rb_node);
> > + if (thread->transaction_stack)
> > + return 1;
> > + }
> > + return 0;
> > +}
> > +
> >  static int binder_ioctl_freeze(struct binder_freeze_info *info,
> >  struct binder_proc *target_proc)
> >  {
> > @@ -4682,6 +4697,14 @@ static int binder_ioctl_freeze(struct 
> > binder_freeze_info *info,
> >   if (!ret && target_proc->outstanding_txns)
> >   ret = -EAGAIN;
>
> These two lines can be deleted now because binder_txns_pending() checks
> ->outstanding_txns.
>

Thanks,
Li
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v1 1/1] binder: fix freeze race

2021-09-09 Thread Dan Carpenter
On Thu, Sep 09, 2021 at 04:21:41PM -0700, Li Li wrote:
> @@ -4648,6 +4647,22 @@ static int binder_ioctl_get_node_debug_info(struct 
> binder_proc *proc,
>   return 0;
>  }
>  
> +static int binder_txns_pending(struct binder_proc *proc)
> +{
> + struct rb_node *n;
> + struct binder_thread *thread;
> +
> + if (proc->outstanding_txns > 0)
> + return 1;

Make this function bool.

> +
> + for (n = rb_first(&proc->threads); n; n = rb_next(n)) {
> + thread = rb_entry(n, struct binder_thread, rb_node);
> + if (thread->transaction_stack)
> + return 1;
> + }
> + return 0;
> +}
> +
>  static int binder_ioctl_freeze(struct binder_freeze_info *info,
>  struct binder_proc *target_proc)
>  {
> @@ -4682,6 +4697,14 @@ static int binder_ioctl_freeze(struct 
> binder_freeze_info *info,
>   if (!ret && target_proc->outstanding_txns)
>   ret = -EAGAIN;

These two lines can be deleted now because binder_txns_pending() checks
->outstanding_txns.

>  
> + /* Also check pending transactions that wait for reply */
> + if (ret >= 0) {
> + binder_inner_proc_lock(target_proc);
> + if (binder_txns_pending(target_proc))
> + ret = -EAGAIN;
> + binder_inner_proc_unlock(target_proc);
> + }
> +
>   if (ret < 0) {
>   binder_inner_proc_lock(target_proc);
>   target_proc->is_frozen = false;

regards,
dan carpenter

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v1 1/1] binder: fix freeze race

2021-09-09 Thread Todd Kjos
On Thu, Sep 9, 2021 at 4:21 PM Li Li  wrote:
>
> From: Li Li 
>
> Currently cgroup freezer is used to freeze the application threads, and
> BINDER_FREEZE is used to freeze binder interface. There's already a
> mechanism for BINDER_FREEZE to wait for any existing transactions to
> drain out before actually freezing the binder interface.
>
> But freezing an app requires 2 steps, freezing the binder interface with
> BINDER_FREEZEwhich and then freezing the application main threads with
> cgroupfs. This is not an atomic operation. The following race issue
> might happen.
>
> 1) Binder interface is frozen by ioctl(BINDER_FREEZE);
> 2) Main thread initiates a new sync binder transaction;
> 3) Main thread is frozen by "echo 1 > cgroup.freeze";
> 4) The response reaches the frozen thread, which will unexpectedly fail.
>
> This patch provides a mechanism for user space freezer manager to check
> if there's any new pending transaction happening between BINDER_FREEZE
> and freezing main thread. If there's any, the main thread freezing
> operation can be rolled back.
>
> Furthermore, the response might reach the binder driver before the
> rollback actually happens. That will also cause failed transaction.
>
> As the other process doesn't wait for another response of the response,
> the failed response transaction can be fixed by treating the response
> transaction like an oneway/async one, allowing it to reach the frozen
> thread. And it will be consumed when the thread gets unfrozen later.
>
> Bug: 198493121

Please remove "Bug: " tag. Replace it with a "Fixes:" tag that cites
the patch that introduced the race.

> Signed-off-by: Li Li 
> ---
>  drivers/android/binder.c  | 32 +++
>  drivers/android/binder_internal.h |  2 ++
>  2 files changed, 30 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index d9030cb6b1e4..f9713a97578c 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -3038,9 +3038,8 @@ static void binder_transaction(struct binder_proc *proc,
> if (reply) {
> binder_enqueue_thread_work(thread, tcomplete);
> binder_inner_proc_lock(target_proc);
> -   if (target_thread->is_dead || target_proc->is_frozen) {
> -   return_error = target_thread->is_dead ?
> -   BR_DEAD_REPLY : BR_FROZEN_REPLY;
> +   if (target_thread->is_dead) {
> +   return_error = BR_DEAD_REPLY;
> binder_inner_proc_unlock(target_proc);
> goto err_dead_proc_or_thread;
> }
> @@ -4648,6 +4647,22 @@ static int binder_ioctl_get_node_debug_info(struct 
> binder_proc *proc,
> return 0;
>  }
>
> +static int binder_txns_pending(struct binder_proc *proc)
> +{
> +   struct rb_node *n;
> +   struct binder_thread *thread;
> +
> +   if (proc->outstanding_txns > 0)
> +   return 1;
> +
> +   for (n = rb_first(&proc->threads); n; n = rb_next(n)) {
> +   thread = rb_entry(n, struct binder_thread, rb_node);
> +   if (thread->transaction_stack)
> +   return 1;
> +   }
> +   return 0;
> +}
> +
>  static int binder_ioctl_freeze(struct binder_freeze_info *info,
>struct binder_proc *target_proc)
>  {
> @@ -4682,6 +4697,14 @@ static int binder_ioctl_freeze(struct 
> binder_freeze_info *info,
> if (!ret && target_proc->outstanding_txns)
> ret = -EAGAIN;
>
> +   /* Also check pending transactions that wait for reply */
> +   if (ret >= 0) {
> +   binder_inner_proc_lock(target_proc);
> +   if (binder_txns_pending(target_proc))

The convention in the binder driver is to append "_ilocked" to the
function name if the function expects the inner proc lock to be held
(so "binder_txns_pending_ilocked(target_proc) in this case)".

> +   ret = -EAGAIN;
> +   binder_inner_proc_unlock(target_proc);
> +   }
> +
> if (ret < 0) {
> binder_inner_proc_lock(target_proc);
> target_proc->is_frozen = false;
> @@ -4705,7 +4728,8 @@ static int binder_ioctl_get_freezer_info(
> if (target_proc->pid == info->pid) {
> found = true;
> binder_inner_proc_lock(target_proc);
> -   info->sync_recv |= target_proc->sync_recv;
> +   info->sync_recv |= target_proc->sync_recv |
> +   (binder_txns_pending(target_proc) << 
> 1);
> info->async_recv |= target_proc->async_recv;
> binder_inner_proc_unlock(target_proc);
> }
> diff --git a/drivers/android/binder_internal.h 
> b/drivers/android/binder_internal.h
> index 810c0b84d3f8..402c4d4362a8 100644
> --- a/driv

Re: [PATCH v1 1/1] binder: fix freeze race

2021-09-09 Thread Li Li
Hi Todd,

Thanks for reviewing the patch! Please see my reply below.

And I'll send out v2 soon addressing your concerns.

On Thu, Sep 9, 2021 at 4:54 PM Todd Kjos  wrote:
>
> On Thu, Sep 9, 2021 at 4:21 PM Li Li  wrote:
> >
> > From: Li Li 
> >
> > Currently cgroup freezer is used to freeze the application threads, and
> > BINDER_FREEZE is used to freeze binder interface. There's already a
> > mechanism for BINDER_FREEZE to wait for any existing transactions to
> > drain out before actually freezing the binder interface.
> >
> > But freezing an app requires 2 steps, freezing the binder interface with
> > BINDER_FREEZEwhich and then freezing the application main threads with
> > cgroupfs. This is not an atomic operation. The following race issue
> > might happen.
> >
> > 1) Binder interface is frozen by ioctl(BINDER_FREEZE);
> > 2) Main thread initiates a new sync binder transaction;
> > 3) Main thread is frozen by "echo 1 > cgroup.freeze";
> > 4) The response reaches the frozen thread, which will unexpectedly fail.
> >
> > This patch provides a mechanism for user space freezer manager to check
> > if there's any new pending transaction happening between BINDER_FREEZE
> > and freezing main thread. If there's any, the main thread freezing
> > operation can be rolled back.
> >
> > Furthermore, the response might reach the binder driver before the
> > rollback actually happens. That will also cause failed transaction.
> >
> > As the other process doesn't wait for another response of the response,
> > the failed response transaction can be fixed by treating the response
> > transaction like an oneway/async one, allowing it to reach the frozen
> > thread. And it will be consumed when the thread gets unfrozen later.
> >
> > Bug: 198493121
>
> Please remove "Bug: " tag. Replace it with a "Fixes:" tag that cites
> the patch that introduced the race.
>
Done in v2.

> > Signed-off-by: Li Li 
> > ---
> >  drivers/android/binder.c  | 32 +++
> >  drivers/android/binder_internal.h |  2 ++
> >  2 files changed, 30 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > index d9030cb6b1e4..f9713a97578c 100644
> > --- a/drivers/android/binder.c
> > +++ b/drivers/android/binder.c
> > @@ -3038,9 +3038,8 @@ static void binder_transaction(struct binder_proc 
> > *proc,
> > if (reply) {
> > binder_enqueue_thread_work(thread, tcomplete);
> > binder_inner_proc_lock(target_proc);
> > -   if (target_thread->is_dead || target_proc->is_frozen) {
> > -   return_error = target_thread->is_dead ?
> > -   BR_DEAD_REPLY : BR_FROZEN_REPLY;
> > +   if (target_thread->is_dead) {
> > +   return_error = BR_DEAD_REPLY;
> > binder_inner_proc_unlock(target_proc);
> > goto err_dead_proc_or_thread;
> > }
> > @@ -4648,6 +4647,22 @@ static int binder_ioctl_get_node_debug_info(struct 
> > binder_proc *proc,
> > return 0;
> >  }
> >
> > +static int binder_txns_pending(struct binder_proc *proc)
> > +{
> > +   struct rb_node *n;
> > +   struct binder_thread *thread;
> > +
> > +   if (proc->outstanding_txns > 0)
> > +   return 1;
> > +
> > +   for (n = rb_first(&proc->threads); n; n = rb_next(n)) {
> > +   thread = rb_entry(n, struct binder_thread, rb_node);
> > +   if (thread->transaction_stack)
> > +   return 1;
> > +   }
> > +   return 0;
> > +}
> > +
> >  static int binder_ioctl_freeze(struct binder_freeze_info *info,
> >struct binder_proc *target_proc)
> >  {
> > @@ -4682,6 +4697,14 @@ static int binder_ioctl_freeze(struct 
> > binder_freeze_info *info,
> > if (!ret && target_proc->outstanding_txns)
> > ret = -EAGAIN;
> >
> > +   /* Also check pending transactions that wait for reply */
> > +   if (ret >= 0) {
> > +   binder_inner_proc_lock(target_proc);
> > +   if (binder_txns_pending(target_proc))
>
> The convention in the binder driver is to append "_ilocked" to the
> function name if the function expects the inner proc lock to be held
> (so "binder_txns_pending_ilocked(target_proc) in this case)".
>
Done in v2.

> > +   ret = -EAGAIN;
> > +   binder_inner_proc_unlock(target_proc);
> > +   }
> > +
> > if (ret < 0) {
> > binder_inner_proc_lock(target_proc);
> > target_proc->is_frozen = false;
> > @@ -4705,7 +4728,8 @@ static int binder_ioctl_get_freezer_info(
> > if (target_proc->pid == info->pid) {
> > found = true;
> > binder_inner_proc_lock(target_proc);
> > -   info->sync_recv |= target_proc->sync_recv;
> > +   info->s

[PATCH v1 1/1] binder: fix freeze race

2021-09-09 Thread Li Li
From: Li Li 

Currently cgroup freezer is used to freeze the application threads, and
BINDER_FREEZE is used to freeze binder interface. There's already a
mechanism for BINDER_FREEZE to wait for any existing transactions to
drain out before actually freezing the binder interface.

But freezing an app requires 2 steps, freezing the binder interface with
BINDER_FREEZEwhich and then freezing the application main threads with
cgroupfs. This is not an atomic operation. The following race issue
might happen.

1) Binder interface is frozen by ioctl(BINDER_FREEZE);
2) Main thread initiates a new sync binder transaction;
3) Main thread is frozen by "echo 1 > cgroup.freeze";
4) The response reaches the frozen thread, which will unexpectedly fail.

This patch provides a mechanism for user space freezer manager to check
if there's any new pending transaction happening between BINDER_FREEZE
and freezing main thread. If there's any, the main thread freezing
operation can be rolled back.

Furthermore, the response might reach the binder driver before the
rollback actually happens. That will also cause failed transaction.

As the other process doesn't wait for another response of the response,
the failed response transaction can be fixed by treating the response
transaction like an oneway/async one, allowing it to reach the frozen
thread. And it will be consumed when the thread gets unfrozen later.

Bug: 198493121
Signed-off-by: Li Li 
---
 drivers/android/binder.c  | 32 +++
 drivers/android/binder_internal.h |  2 ++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d9030cb6b1e4..f9713a97578c 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3038,9 +3038,8 @@ static void binder_transaction(struct binder_proc *proc,
if (reply) {
binder_enqueue_thread_work(thread, tcomplete);
binder_inner_proc_lock(target_proc);
-   if (target_thread->is_dead || target_proc->is_frozen) {
-   return_error = target_thread->is_dead ?
-   BR_DEAD_REPLY : BR_FROZEN_REPLY;
+   if (target_thread->is_dead) {
+   return_error = BR_DEAD_REPLY;
binder_inner_proc_unlock(target_proc);
goto err_dead_proc_or_thread;
}
@@ -4648,6 +4647,22 @@ static int binder_ioctl_get_node_debug_info(struct 
binder_proc *proc,
return 0;
 }
 
+static int binder_txns_pending(struct binder_proc *proc)
+{
+   struct rb_node *n;
+   struct binder_thread *thread;
+
+   if (proc->outstanding_txns > 0)
+   return 1;
+
+   for (n = rb_first(&proc->threads); n; n = rb_next(n)) {
+   thread = rb_entry(n, struct binder_thread, rb_node);
+   if (thread->transaction_stack)
+   return 1;
+   }
+   return 0;
+}
+
 static int binder_ioctl_freeze(struct binder_freeze_info *info,
   struct binder_proc *target_proc)
 {
@@ -4682,6 +4697,14 @@ static int binder_ioctl_freeze(struct binder_freeze_info 
*info,
if (!ret && target_proc->outstanding_txns)
ret = -EAGAIN;
 
+   /* Also check pending transactions that wait for reply */
+   if (ret >= 0) {
+   binder_inner_proc_lock(target_proc);
+   if (binder_txns_pending(target_proc))
+   ret = -EAGAIN;
+   binder_inner_proc_unlock(target_proc);
+   }
+
if (ret < 0) {
binder_inner_proc_lock(target_proc);
target_proc->is_frozen = false;
@@ -4705,7 +4728,8 @@ static int binder_ioctl_get_freezer_info(
if (target_proc->pid == info->pid) {
found = true;
binder_inner_proc_lock(target_proc);
-   info->sync_recv |= target_proc->sync_recv;
+   info->sync_recv |= target_proc->sync_recv |
+   (binder_txns_pending(target_proc) << 1);
info->async_recv |= target_proc->async_recv;
binder_inner_proc_unlock(target_proc);
}
diff --git a/drivers/android/binder_internal.h 
b/drivers/android/binder_internal.h
index 810c0b84d3f8..402c4d4362a8 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -378,6 +378,8 @@ struct binder_ref {
  *binder transactions
  *(protected by @inner_lock)
  * @sync_recv:process received sync transactions since last frozen
+ *bit 0: received sync transaction after being frozen
+ *bit 1: new pending sync transaction during freezing
  *(protected by @inner_lock)
  * @async_recv:   process received async transa