Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-16 Thread Todd Kjos
Thanks Peter. We'll give that patch a try as part of our refactoring.
Looking at finer-grained locking and we'll try going back to rt_mutex
plus this patch.

On Wed, Sep 14, 2016 at 9:55 AM, Peter Zijlstra  wrote:
> On Wed, Sep 14, 2016 at 06:13:40PM +0200, Peter Zijlstra wrote:
>> On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote:
>> > On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
>> > > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra  
>> > > wrote:
>> > > > cgroups should be irrelevant, PI is unaware of them.
>> > >
>> > > I don't think cgroups are irrelevant. PI being unaware of them
>> > > explains the problem I described. If the task that holds the lock is
>> > > in a cgroup that has a low cpu.shares value, then boosting the task's
>> > > priority within that group does necessarily make it any more likely to
>> > > run.
>> >
>> > Thing is, for FIFO/DL the important parameters (prio and deadline resp.)
>> > are not cgroup dependent.
>> >
>> > For CFS you're right, and as per usual, cgroups will be a royal pain.
>> > While we can compute the total weight in the block chain, getting that
>> > back to a task which is stuck in a cgroup is just not going to work
>> > well.
>>
>> Not to mention the fact that the weight depends on the current running
>> state. Having those tasks block insta changes the actual weight.
>>
>> > /me curses @ cgroups.. bloody stupid things.
>>
>> More of that.
>
>
> Something a little like so... completely untested, and has a fair number
> of corner cases still left open I think..
>
>
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1691,8 +1691,8 @@ struct task_struct {
> struct seccomp seccomp;
>
>  /* Thread group tracking */
> -   u32 parent_exec_id;
> -   u32 self_exec_id;
> +   u32 parent_exec_id;
> +   u32 self_exec_id;
>  /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
>   * mempolicy */
> spinlock_t alloc_lock;
> @@ -1710,6 +1710,11 @@ struct task_struct {
> struct task_struct *pi_top_task;
> /* Deadlock detection and priority inheritance handling */
> struct rt_mutex_waiter *pi_blocked_on;
> +   unsigned long pi_weight;
> +#ifdef CONFIG_CGROUP_SCHED
> +   struct task_group *orig_task_group;
> +   unsigned long normal_weight;
> +#endif
>  #endif
>
>  #ifdef CONFIG_DEBUG_MUTEXES
> @@ -1977,6 +1982,8 @@ static inline int tsk_nr_cpus_allowed(st
> return p->nr_cpus_allowed;
>  }
>
> +extern unsigned long task_pi_weight(struct task_struct *p);
> +
>  #define TNF_MIGRATED   0x01
>  #define TNF_NO_GROUP   0x02
>  #define TNF_SHARED 0x04
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -20,6 +20,19 @@
>  #include "rtmutex_common.h"
>
>  /*
> + * !(rt_prio || dl_prio)
> + */
> +static inline bool other_prio(int prio)
> +{
> +   return prio >= MAX_RT_PRIO;
> +}
> +
> +static inline bool other_task(struct task_struct *task)
> +{
> +   return other_prio(task->prio);
> +}
> +
> +/*
>   * lock->owner state tracking:
>   *
>   * lock->owner holds the task_struct pointer of the owner. Bit 0
> @@ -226,6 +239,11 @@ rt_mutex_enqueue(struct rt_mutex *lock,
>
> rb_link_node(&waiter->tree_entry, parent, link);
> rb_insert_color(&waiter->tree_entry, &lock->waiters);
> +   /*
> +* We want the lock->waiter_weight to reflect the sum of all queued
> +* waiters.
> +*/
> +   lock->waiters_weight += waiter->weight;
>  }
>
>  static void
> @@ -239,6 +257,7 @@ rt_mutex_dequeue(struct rt_mutex *lock,
>
> rb_erase(&waiter->tree_entry, &lock->waiters);
> RB_CLEAR_NODE(&waiter->tree_entry);
> +   lock->waiters_weight += waiter->weight;
>  }
>
>  static void
> @@ -265,6 +284,18 @@ rt_mutex_enqueue_pi(struct task_struct *
>
> rb_link_node(&waiter->pi_tree_entry, parent, link);
> rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
> +
> +   /*
> +* Since a task can own multiple locks, and we have one pi_waiter
> +* for every lock, propagate the lock->waiter_weight, which is the sum
> +* of all weights queued on that lock, into the top waiter, and add
> +* that to the owning task's pi_weight.
> +*
> +* This results in pi_weight being the sum of all blocked waiters
> +* on this task.
> +*/
> +   waiter->top_weight = waiter->lock->waiters_weight;
> +   task->pi_weight += waiter->top_weight;
>  }
>
>  static void
> @@ -278,6 +309,7 @@ rt_mutex_dequeue_pi(struct task_struct *
>
> rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
> RB_CLEAR_NODE(&waiter->pi_tree_entry);
> +   task->pi_weight -= waiter->top_weight;
>  }
>
>  static void rt_mutex_adjust_prio(struct task_struct *p)
> @@ -497,7 +529,7 @@ static int rt_mutex_adjust_prio_chain(st
>  * detection is enabled we continue, but stop the
>  * requeueing in 

Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-14 Thread Peter Zijlstra
On Wed, Sep 14, 2016 at 06:13:40PM +0200, Peter Zijlstra wrote:
> On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote:
> > On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
> > > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra  
> > > wrote:
> > > > cgroups should be irrelevant, PI is unaware of them.
> > > 
> > > I don't think cgroups are irrelevant. PI being unaware of them
> > > explains the problem I described. If the task that holds the lock is
> > > in a cgroup that has a low cpu.shares value, then boosting the task's
> > > priority within that group does necessarily make it any more likely to
> > > run.
> > 
> > Thing is, for FIFO/DL the important parameters (prio and deadline resp.)
> > are not cgroup dependent.
> > 
> > For CFS you're right, and as per usual, cgroups will be a royal pain.
> > While we can compute the total weight in the block chain, getting that
> > back to a task which is stuck in a cgroup is just not going to work
> > well.
> 
> Not to mention the fact that the weight depends on the current running
> state. Having those tasks block insta changes the actual weight.
> 
> > /me curses @ cgroups.. bloody stupid things.
> 
> More of that.


Something a little like so... completely untested, and has a fair number
of corner cases still left open I think..


--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1691,8 +1691,8 @@ struct task_struct {
struct seccomp seccomp;
 
 /* Thread group tracking */
-   u32 parent_exec_id;
-   u32 self_exec_id;
+   u32 parent_exec_id;
+   u32 self_exec_id;
 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
  * mempolicy */
spinlock_t alloc_lock;
@@ -1710,6 +1710,11 @@ struct task_struct {
struct task_struct *pi_top_task;
/* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
+   unsigned long pi_weight;
+#ifdef CONFIG_CGROUP_SCHED
+   struct task_group *orig_task_group;
+   unsigned long normal_weight;
+#endif
 #endif
 
 #ifdef CONFIG_DEBUG_MUTEXES
@@ -1977,6 +1982,8 @@ static inline int tsk_nr_cpus_allowed(st
return p->nr_cpus_allowed;
 }
 
+extern unsigned long task_pi_weight(struct task_struct *p);
+
 #define TNF_MIGRATED   0x01
 #define TNF_NO_GROUP   0x02
 #define TNF_SHARED 0x04
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -20,6 +20,19 @@
 #include "rtmutex_common.h"
 
 /*
+ * !(rt_prio || dl_prio)
+ */
+static inline bool other_prio(int prio)
+{
+   return prio >= MAX_RT_PRIO;
+}
+
+static inline bool other_task(struct task_struct *task)
+{
+   return other_prio(task->prio);
+}
+
+/*
  * lock->owner state tracking:
  *
  * lock->owner holds the task_struct pointer of the owner. Bit 0
@@ -226,6 +239,11 @@ rt_mutex_enqueue(struct rt_mutex *lock,
 
rb_link_node(&waiter->tree_entry, parent, link);
rb_insert_color(&waiter->tree_entry, &lock->waiters);
+   /*
+* We want the lock->waiter_weight to reflect the sum of all queued
+* waiters.
+*/
+   lock->waiters_weight += waiter->weight;
 }
 
 static void
@@ -239,6 +257,7 @@ rt_mutex_dequeue(struct rt_mutex *lock,
 
rb_erase(&waiter->tree_entry, &lock->waiters);
RB_CLEAR_NODE(&waiter->tree_entry);
+   lock->waiters_weight += waiter->weight;
 }
 
 static void
@@ -265,6 +284,18 @@ rt_mutex_enqueue_pi(struct task_struct *
 
rb_link_node(&waiter->pi_tree_entry, parent, link);
rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
+
+   /*
+* Since a task can own multiple locks, and we have one pi_waiter
+* for every lock, propagate the lock->waiter_weight, which is the sum
+* of all weights queued on that lock, into the top waiter, and add
+* that to the owning task's pi_weight.
+*
+* This results in pi_weight being the sum of all blocked waiters
+* on this task.
+*/
+   waiter->top_weight = waiter->lock->waiters_weight;
+   task->pi_weight += waiter->top_weight;
 }
 
 static void
@@ -278,6 +309,7 @@ rt_mutex_dequeue_pi(struct task_struct *
 
rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
RB_CLEAR_NODE(&waiter->pi_tree_entry);
+   task->pi_weight -= waiter->top_weight;
 }
 
 static void rt_mutex_adjust_prio(struct task_struct *p)
@@ -497,7 +529,7 @@ static int rt_mutex_adjust_prio_chain(st
 * detection is enabled we continue, but stop the
 * requeueing in the chain walk.
 */
-   if (top_waiter != task_top_pi_waiter(task)) {
+   if (top_waiter != task_top_pi_waiter(task) && 
!other_task(task)) {
if (!detect_deadlock)
goto out_unlock_pi;
else
@@ -512,7 +544,7 @@ static int rt_mutex_adjust_prio_chain(st
 * enabled we continue, but stop the requeueing in the

Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-14 Thread Peter Zijlstra
On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote:
> On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
> > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra  
> > wrote:
> > > cgroups should be irrelevant, PI is unaware of them.
> > 
> > I don't think cgroups are irrelevant. PI being unaware of them
> > explains the problem I described. If the task that holds the lock is
> > in a cgroup that has a low cpu.shares value, then boosting the task's
> > priority within that group does necessarily make it any more likely to
> > run.
> 
> Thing is, for FIFO/DL the important parameters (prio and deadline resp.)
> are not cgroup dependent.
> 
> For CFS you're right, and as per usual, cgroups will be a royal pain.
> While we can compute the total weight in the block chain, getting that
> back to a task which is stuck in a cgroup is just not going to work
> well.

Not to mention the fact that the weight depends on the current running
state. Having those tasks block insta changes the actual weight.

> /me curses @ cgroups.. bloody stupid things.

More of that.


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-14 Thread Peter Zijlstra
On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
> On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra  wrote:
> > cgroups should be irrelevant, PI is unaware of them.
> 
> I don't think cgroups are irrelevant. PI being unaware of them
> explains the problem I described. If the task that holds the lock is
> in a cgroup that has a low cpu.shares value, then boosting the task's
> priority within that group does necessarily make it any more likely to
> run.

Thing is, for FIFO/DL the important parameters (prio and deadline resp.)
are not cgroup dependent.

For CFS you're right, and as per usual, cgroups will be a royal pain.
While we can compute the total weight in the block chain, getting that
back to a task which is stuck in a cgroup is just not going to work
well.

The only 'solution' I can come up with in a hurry is, when the task is
boosted, move it to the root cgroup. That of course has a whole heap of
problems all on its own.

/me curses @ cgroups.. bloody stupid things.


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-14 Thread Peter Zijlstra
On Wed, Sep 14, 2016 at 09:10:01AM +0200, Peter Zijlstra wrote:
> On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:

> > Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything
> > in the rtmutex code or documentation that indicates that they don't
> > work for normal tasks. From what I can tell the priority gets boosted
> > in every case. This may not work as well for CFS tasks as for realtime
> > tasks, but it should at least help when there is a large priority
> > difference.
> 
> It does something (it used to explicitly ignore OTHER) but its not
> something well defined or usable.

I looked again, and while it updates the ->prio field for OTHER tasks,
that does not seem to cause a change to the actual weight field (which
is derived from ->static_prio).

So it really should not do anything.. as I remebered it.


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-14 Thread Peter Zijlstra
On Wed, Sep 14, 2016 at 09:10:01AM +0200, Peter Zijlstra wrote:
> We could define a meaningful something for CFS and implement that, but
> it isn't currently done.

So the generalization of the Priority Inheritance Protocol is Proxy
Execution Protocol, which basically lets the boosted task run _as_ the
task on the block chain as picked by the schedule function (treating all
of them as runnable). Where 'as' means it really consumes scheduling
resources of the (blocked) donor task.

Since the scheduling function for FIFO is: pick the highest prio one and
go for it, this trivially reduces to PI for FIFO.

Now, Proxy Execution Protocol is 'trivial' to implement on UP, but has a
few wobbles on SMP.

But we can use it to define a sensible definition for a WFQ class
scheduler (like CFS). For these the scheduling function basically runs
the boosted task as every donor task on the block chain gets their slice.
Alternatively, since it treats all 'blocked' tasks as runnable, the
total weight of the boosted task is its own weight plus the sum of
weight on the block chain.

Which is something that shouldn't be too hard to implement, but very
much isn't what happens today.


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-14 Thread Peter Zijlstra
On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
> On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra  wrote:
> > On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:
> >
> >> A previous attempt to fix this problem, changed the lock to use
> >> rt_mutex instead of mutex, but this apparently did not work as well as
> >> this patch. I believe the added overhead was noticeable, and it did
> >> not work when the preempted thread was in a different cgroup (I don't
> >> know if this is still the case).
> >
> > Do you actually have RR/FIFO/DL tasks? Currently PI isn't
> > defined/implemented for OTHER.
> >
> 
> Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything
> in the rtmutex code or documentation that indicates that they don't
> work for normal tasks. From what I can tell the priority gets boosted
> in every case. This may not work as well for CFS tasks as for realtime
> tasks, but it should at least help when there is a large priority
> difference.

It does something (it used to explicitly ignore OTHER) but its not
something well defined or usable.

> > cgroups should be irrelevant, PI is unaware of them.
> 
> I don't think cgroups are irrelevant. PI being unaware of them
> explains the problem I described. If the task that holds the lock is
> in a cgroup that has a low cpu.shares value, then boosting the task's
> priority within that group does necessarily make it any more likely to
> run.

See, the problem is that 'priority' is a meaningless concept for
OTHER/CFS.

In any case, typically only RT tasks care about PI, and the traditional
Priority Inheritance algorithm only really works correctly for FIFO. As
is RR has issues and DL is a crude hack, CFS is really just an accident
by not explicitly exempting it.

We could define a meaningful something for CFS and implement that, but
it isn't currently done.


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-13 Thread Arve Hjønnevåg
On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra  wrote:
> On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:
>
>> A previous attempt to fix this problem, changed the lock to use
>> rt_mutex instead of mutex, but this apparently did not work as well as
>> this patch. I believe the added overhead was noticeable, and it did
>> not work when the preempted thread was in a different cgroup (I don't
>> know if this is still the case).
>
> Do you actually have RR/FIFO/DL tasks? Currently PI isn't
> defined/implemented for OTHER.
>

Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything
in the rtmutex code or documentation that indicates that they don't
work for normal tasks. From what I can tell the priority gets boosted
in every case. This may not work as well for CFS tasks as for realtime
tasks, but it should at least help when there is a large priority
difference.

> cgroups should be irrelevant, PI is unaware of them.

I don't think cgroups are irrelevant. PI being unaware of them
explains the problem I described. If the task that holds the lock is
in a cgroup that has a low cpu.shares value, then boosting the task's
priority within that group does necessarily make it any more likely to
run.

-- 
Arve Hjønnevåg


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-13 Thread Arve Hjønnevåg
On Mon, Sep 12, 2016 at 11:42 PM, Greg Kroah-Hartman
 wrote:
> On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:
>> On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman
>>  wrote:
>> > On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
>> >> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
>> >>
>> >> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
>> >> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
>> >> > > > In Android systems, the display pipeline relies on low
>> >> > > > latency binder transactions and is therefore sensitive to
>> >> > > > delays caused by contention for the global binder lock.
>> >> > > > Jank is siginificantly reduced by disabling preemption
>> >> > > > while the global binder lock is held.
>> >> > >
>> >> > > That's now how preempt_disable is supposed to use.  It is for critical
>> >> >
>> >> > not, that's supposed to be _not_. Just to be absolutely clear, this is
>> >> > NOT how you're supposed to use preempt_disable().
>> >> >
>> >> > > sections that use per-cpu or similar resources.
>> >> > >
>> >> > > >
>> >> > > > Originally-from: Riley Andrews 
>> >> > > > Signed-off-by: Todd Kjos 
>> >> >
>> >> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
>> >> > > > binder_proc *proc, int flags)
>> >> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>> >> > > >   unlock_task_sighand(proc->tsk, &irqs);
>> >> > > >
>> >> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
>> >> > > > + preempt_enable_no_resched();
>> >> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
>> >> > > > + preempt_disable();
>> >> >
>> >> > And the fact that people want to use preempt_enable_no_resched() shows
>> >> > that they're absolutely clueless.
>> >> >
>> >> > This is so broken its not funny.
>> >> >
>> >> > NAK NAK NAK
>> >>
>> >> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
>> >> documents clearly that this is tinkering and not proper software
>> >> engineering.
>> >
>> > I have pointed out in the other thread for this patch (the one that had
>> > a patch that could be applied) that the single lock in the binder code
>> > is the main problem here, it should be solved instead of this messing
>> > around with priorities.
>> >
>>
>> While removing the single lock in the binder driver would help reduce
>> the problem that this patch tries to work around, it would not fix it.
>> The largest problems occur when a very low priority thread gets
>> preempted while holding the lock. When a high priority thread then
>> needs the same lock it can't get it. Changing the driver to use more
>> fine-grained locking would reduce the set of threads that can trigger
>> this problem, but there are processes that receive work from both high
>> and low priority threads and could still end up in the same situation.
>
> Ok, but how would this patch solve the problem?  It only reduces the
> window of when the preemption could occur, it doesn't stop it from
> happening entirely.
>

I agree, this patch does not _solve_ the problem either. It only
reduces it, as there are many places where it re-enables preemption
for significant work.

>> A previous attempt to fix this problem, changed the lock to use
>> rt_mutex instead of mutex, but this apparently did not work as well as
>> this patch. I believe the added overhead was noticeable, and it did
>> not work when the preempted thread was in a different cgroup (I don't
>> know if this is still the case).
>
> Why would rt_mutex solve this?

If the task that holds the lock would run when you try to get the
lock, there would be no long delay to get the lock. rt_mutex seems to
be designed to solve this problem.

>
> I worry that this is only going to get worse as more portions of the
> Android userspace/HAL get rewritten to use binder more and more.  What
> about trying to get rid of the lock entirely?  That way the task
> priority levels would work "properly" here.
>

I think a good first step would be to switch to locks with a smaller
scope. I'm not how useful it would be to eliminate locks in this
driver since it calls into other code that uses locks.

> thanks,
>
> greg k-h

-- 
Arve Hjønnevåg


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-13 Thread Peter Zijlstra
On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:

> A previous attempt to fix this problem, changed the lock to use
> rt_mutex instead of mutex, but this apparently did not work as well as
> this patch. I believe the added overhead was noticeable, and it did
> not work when the preempted thread was in a different cgroup (I don't
> know if this is still the case).

Do you actually have RR/FIFO/DL tasks? Currently PI isn't
defined/implemented for OTHER.

cgroups should be irrelevant, PI is unaware of them.


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-12 Thread Greg Kroah-Hartman
On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:
> On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman
>  wrote:
> > On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
> >> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
> >>
> >> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
> >> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> >> > > > In Android systems, the display pipeline relies on low
> >> > > > latency binder transactions and is therefore sensitive to
> >> > > > delays caused by contention for the global binder lock.
> >> > > > Jank is siginificantly reduced by disabling preemption
> >> > > > while the global binder lock is held.
> >> > >
> >> > > That's now how preempt_disable is supposed to use.  It is for critical
> >> >
> >> > not, that's supposed to be _not_. Just to be absolutely clear, this is
> >> > NOT how you're supposed to use preempt_disable().
> >> >
> >> > > sections that use per-cpu or similar resources.
> >> > >
> >> > > >
> >> > > > Originally-from: Riley Andrews 
> >> > > > Signed-off-by: Todd Kjos 
> >> >
> >> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> >> > > > binder_proc *proc, int flags)
> >> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> >> > > >   unlock_task_sighand(proc->tsk, &irqs);
> >> > > >
> >> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
> >> > > > + preempt_enable_no_resched();
> >> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
> >> > > > + preempt_disable();
> >> >
> >> > And the fact that people want to use preempt_enable_no_resched() shows
> >> > that they're absolutely clueless.
> >> >
> >> > This is so broken its not funny.
> >> >
> >> > NAK NAK NAK
> >>
> >> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
> >> documents clearly that this is tinkering and not proper software
> >> engineering.
> >
> > I have pointed out in the other thread for this patch (the one that had
> > a patch that could be applied) that the single lock in the binder code
> > is the main problem here, it should be solved instead of this messing
> > around with priorities.
> >
> 
> While removing the single lock in the binder driver would help reduce
> the problem that this patch tries to work around, it would not fix it.
> The largest problems occur when a very low priority thread gets
> preempted while holding the lock. When a high priority thread then
> needs the same lock it can't get it. Changing the driver to use more
> fine-grained locking would reduce the set of threads that can trigger
> this problem, but there are processes that receive work from both high
> and low priority threads and could still end up in the same situation.

Ok, but how would this patch solve the problem?  It only reduces the
window of when the preemption could occur, it doesn't stop it from
happening entirely.

> A previous attempt to fix this problem, changed the lock to use
> rt_mutex instead of mutex, but this apparently did not work as well as
> this patch. I believe the added overhead was noticeable, and it did
> not work when the preempted thread was in a different cgroup (I don't
> know if this is still the case).

Why would rt_mutex solve this?

I worry that this is only going to get worse as more portions of the
Android userspace/HAL get rewritten to use binder more and more.  What
about trying to get rid of the lock entirely?  That way the task
priority levels would work "properly" here.

thanks,

greg k-h


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-12 Thread Arve Hjønnevåg
On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman
 wrote:
> On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
>> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
>>
>> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
>> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
>> > > > In Android systems, the display pipeline relies on low
>> > > > latency binder transactions and is therefore sensitive to
>> > > > delays caused by contention for the global binder lock.
>> > > > Jank is siginificantly reduced by disabling preemption
>> > > > while the global binder lock is held.
>> > >
>> > > That's now how preempt_disable is supposed to use.  It is for critical
>> >
>> > not, that's supposed to be _not_. Just to be absolutely clear, this is
>> > NOT how you're supposed to use preempt_disable().
>> >
>> > > sections that use per-cpu or similar resources.
>> > >
>> > > >
>> > > > Originally-from: Riley Andrews 
>> > > > Signed-off-by: Todd Kjos 
>> >
>> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
>> > > > binder_proc *proc, int flags)
>> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>> > > >   unlock_task_sighand(proc->tsk, &irqs);
>> > > >
>> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
>> > > > + preempt_enable_no_resched();
>> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
>> > > > + preempt_disable();
>> >
>> > And the fact that people want to use preempt_enable_no_resched() shows
>> > that they're absolutely clueless.
>> >
>> > This is so broken its not funny.
>> >
>> > NAK NAK NAK
>>
>> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
>> documents clearly that this is tinkering and not proper software
>> engineering.
>
> I have pointed out in the other thread for this patch (the one that had
> a patch that could be applied) that the single lock in the binder code
> is the main problem here, it should be solved instead of this messing
> around with priorities.
>

While removing the single lock in the binder driver would help reduce
the problem that this patch tries to work around, it would not fix it.
The largest problems occur when a very low priority thread gets
preempted while holding the lock. When a high priority thread then
needs the same lock it can't get it. Changing the driver to use more
fine-grained locking would reduce the set of threads that can trigger
this problem, but there are processes that receive work from both high
and low priority threads and could still end up in the same situation.

A previous attempt to fix this problem, changed the lock to use
rt_mutex instead of mutex, but this apparently did not work as well as
this patch. I believe the added overhead was noticeable, and it did
not work when the preempted thread was in a different cgroup (I don't
know if this is still the case).

It would be useful to generic solution to this problem.

> So don't worry, I'm not taking this change :)
>
> thanks,
>
> greg k-h



-- 
Arve Hjønnevåg


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-12 Thread Todd Kjos
Thanks for the reviews. We'll come up with a different solution.

On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman
 wrote:
> On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
>> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
>>
>> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
>> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
>> > > > In Android systems, the display pipeline relies on low
>> > > > latency binder transactions and is therefore sensitive to
>> > > > delays caused by contention for the global binder lock.
>> > > > Jank is siginificantly reduced by disabling preemption
>> > > > while the global binder lock is held.
>> > >
>> > > That's now how preempt_disable is supposed to use.  It is for critical
>> >
>> > not, that's supposed to be _not_. Just to be absolutely clear, this is
>> > NOT how you're supposed to use preempt_disable().
>> >
>> > > sections that use per-cpu or similar resources.
>> > >
>> > > >
>> > > > Originally-from: Riley Andrews 
>> > > > Signed-off-by: Todd Kjos 
>> >
>> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
>> > > > binder_proc *proc, int flags)
>> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>> > > >   unlock_task_sighand(proc->tsk, &irqs);
>> > > >
>> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
>> > > > + preempt_enable_no_resched();
>> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
>> > > > + preempt_disable();
>> >
>> > And the fact that people want to use preempt_enable_no_resched() shows
>> > that they're absolutely clueless.
>> >
>> > This is so broken its not funny.
>> >
>> > NAK NAK NAK
>>
>> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
>> documents clearly that this is tinkering and not proper software
>> engineering.
>
> I have pointed out in the other thread for this patch (the one that had
> a patch that could be applied) that the single lock in the binder code
> is the main problem here, it should be solved instead of this messing
> around with priorities.
>
> So don't worry, I'm not taking this change :)
>
> thanks,
>
> greg k-h


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-10 Thread Greg Kroah-Hartman
On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
> 
> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> > > > In Android systems, the display pipeline relies on low
> > > > latency binder transactions and is therefore sensitive to
> > > > delays caused by contention for the global binder lock.
> > > > Jank is siginificantly reduced by disabling preemption
> > > > while the global binder lock is held.
> > > 
> > > That's now how preempt_disable is supposed to use.  It is for critical
> > 
> > not, that's supposed to be _not_. Just to be absolutely clear, this is
> > NOT how you're supposed to use preempt_disable().
> > 
> > > sections that use per-cpu or similar resources.
> > > 
> > > > 
> > > > Originally-from: Riley Andrews 
> > > > Signed-off-by: Todd Kjos 
> > 
> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> > > > binder_proc *proc, int flags)
> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> > > >   unlock_task_sighand(proc->tsk, &irqs);
> > > > 
> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
> > > > + preempt_enable_no_resched();
> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
> > > > + preempt_disable();
> > 
> > And the fact that people want to use preempt_enable_no_resched() shows
> > that they're absolutely clueless.
> > 
> > This is so broken its not funny.
> > 
> > NAK NAK NAK
> 
> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
> documents clearly that this is tinkering and not proper software
> engineering.

I have pointed out in the other thread for this patch (the one that had
a patch that could be applied) that the single lock in the binder code
is the main problem here, it should be solved instead of this messing
around with priorities.

So don't worry, I'm not taking this change :)

thanks,

greg k-h


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-10 Thread Thomas Gleixner
On Sat, 10 Sep 2016, Peter Zijlstra wrote:

> On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
> > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> > > In Android systems, the display pipeline relies on low
> > > latency binder transactions and is therefore sensitive to
> > > delays caused by contention for the global binder lock.
> > > Jank is siginificantly reduced by disabling preemption
> > > while the global binder lock is held.
> > 
> > That's now how preempt_disable is supposed to use.  It is for critical
> 
> not, that's supposed to be _not_. Just to be absolutely clear, this is
> NOT how you're supposed to use preempt_disable().
> 
> > sections that use per-cpu or similar resources.
> > 
> > > 
> > > Originally-from: Riley Andrews 
> > > Signed-off-by: Todd Kjos 
> 
> > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> > > binder_proc *proc, int flags)
> > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> > >   unlock_task_sighand(proc->tsk, &irqs);
> > > 
> > > - return __alloc_fd(files, 0, rlim_cur, flags);
> > > + preempt_enable_no_resched();
> > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
> > > + preempt_disable();
> 
> And the fact that people want to use preempt_enable_no_resched() shows
> that they're absolutely clueless.
> 
> This is so broken its not funny.
> 
> NAK NAK NAK

Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
documents clearly that this is tinkering and not proper software
engineering.

NAK from my side as well.

Thanks,

Thomas




Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-10 Thread Peter Zijlstra
On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
> On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> > In Android systems, the display pipeline relies on low
> > latency binder transactions and is therefore sensitive to
> > delays caused by contention for the global binder lock.
> > Jank is siginificantly reduced by disabling preemption
> > while the global binder lock is held.
> 
> That's now how preempt_disable is supposed to use.  It is for critical

not, that's supposed to be _not_. Just to be absolutely clear, this is
NOT how you're supposed to use preempt_disable().

> sections that use per-cpu or similar resources.
> 
> > 
> > Originally-from: Riley Andrews 
> > Signed-off-by: Todd Kjos 

> > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> > binder_proc *proc, int flags)
> >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> >   unlock_task_sighand(proc->tsk, &irqs);
> > 
> > - return __alloc_fd(files, 0, rlim_cur, flags);
> > + preempt_enable_no_resched();
> > + ret = __alloc_fd(files, 0, rlim_cur, flags);
> > + preempt_disable();

And the fact that people want to use preempt_enable_no_resched() shows
that they're absolutely clueless.

This is so broken its not funny.

NAK NAK NAK




Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-10 Thread Christoph Hellwig
On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> In Android systems, the display pipeline relies on low
> latency binder transactions and is therefore sensitive to
> delays caused by contention for the global binder lock.
> Jank is siginificantly reduced by disabling preemption
> while the global binder lock is held.

That's now how preempt_disable is supposed to use.  It is for critical
sections that use per-cpu or similar resources.

> 
> Originally-from: Riley Andrews 
> Signed-off-by: Todd Kjos 
> ---
>  drivers/android/binder.c | 194 
> +++
>  1 file changed, 146 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 16288e7..c36e420 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   struct files_struct *files = proc->files;
>   unsigned long rlim_cur;
>   unsigned long irqs;
> + int ret;
> 
>   if (files == NULL)
>   return -ESRCH;
> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>   unlock_task_sighand(proc->tsk, &irqs);
> 
> - return __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_enable_no_resched();
> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_disable();
> +
> + return ret;
>  }
> 
>  /*
> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>  static void task_fd_install(
>   struct binder_proc *proc, unsigned int fd, struct file *file)
>  {
> - if (proc->files)
> + if (proc->files) {
> + preempt_enable_no_resched();
>   __fd_install(proc->files, fd, file);
> + preempt_disable();
> + }
>  }
> 
>  /*
> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
>  {
>   trace_binder_lock(tag);
>   mutex_lock(&binder_main_lock);
> + preempt_disable();
>   trace_binder_locked(tag);
>  }
> 
> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
>  {
>   trace_binder_unlock(tag);
>   mutex_unlock(&binder_main_lock);
> + preempt_enable();
> +}
> +
> +static inline void *kzalloc_nopreempt(size_t size)
> +{
> + void *ptr;
> +
> + ptr = kzalloc(size, GFP_NOWAIT);
> + if (ptr)
> + return ptr;
> +
> + preempt_enable_no_resched();
> + ptr = kzalloc(size, GFP_KERNEL);
> + preempt_disable();
> +
> + return ptr;
> +}
> +
> +static inline long copy_to_user_nopreempt(void __user *to,
> +  const void *from, long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_to_user(to, from, n);
> + preempt_disable();
> + return ret;
> +}
> +
> +static inline long copy_from_user_nopreempt(void *to,
> +const void __user *from,
> +long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_from_user(to, from, n);
> + preempt_disable();
> + return ret;
>  }
> 
> +#define get_user_nopreempt(x, ptr) \
> +({ \
> + int __ret; \
> + preempt_enable_no_resched(); \
> + __ret = get_user(x, ptr); \
> + preempt_disable(); \
> + __ret; \
> +})
> +
> +#define put_user_nopreempt(x, ptr) \
> +({ \
> + int __ret; \
> + preempt_enable_no_resched(); \
> + __ret = put_user(x, ptr); \
> + preempt_disable(); \
> + __ret; \
> +})
> +
>  static void binder_set_nice(long nice)
>  {
>   long min_nice;
> @@ -568,6 +634,8 @@ static int binder_update_page_range(struct
> binder_proc *proc, int allocate,
>   else
>   mm = get_task_mm(proc->tsk);
> 
> + preempt_enable_no_resched();
> +
>   if (mm) {
>   down_write(&mm->mmap_sem);
>   vma = proc->vma;
> @@ -622,6 +690,9 @@ static int binder_update_page_range(struct
> binder_proc *proc, int allocate,
>   up_write(&mm->mmap_sem);
>   mmput(mm);
>   }
> +
> + preempt_disable();
> +
>   return 0;
> 
>  free_range:
> @@ -644,6 +715,9 @@ err_no_vma:
>   up_write(&mm->mmap_sem);
>   mmput(mm);
>   }
> +
> + preempt_disable();
> +
>   return -ENOMEM;
>  }
> 
> @@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct
> binder_proc *proc,
>   return NULL;
>   }
> 
> - node = kzalloc(sizeof(*node), GFP_KERNEL);
> + node = kzalloc_nopreempt(sizeof(*node));
>   if (node == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_NODE);
> @@ -1040,7 +1114,7 @@ static struct binder_ref
> *binder_get_ref_for_node(struct binder_proc *proc,
>   else
>   return ref;
>   }
> - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
> + new_ref = kzalloc_nopreempt(sizeof(*ref));
>   if (new_ref == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_REF);
> @@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc 
> *proc,
>   e->to_proc = target_proc->pid;
> 
>   /* TODO: reuse incoming transaction for reply */
> - t = kzalloc(sizeof(*t), GFP_KERNEL);
> + t = kzalloc_nopreempt(sizeof(*t));
>   if (t == NULL) {
>   return_error = BR_FAILED_REPLY;
>   goto err_alloc_t_failed;
>   }
>   binder_stats_created(BINDER_STAT_TRANSACTION);
> 
>

Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-10 Thread Greg KH
On Sat, Sep 10, 2016 at 01:18:47PM +0200, Greg KH wrote:
> On Fri, Sep 09, 2016 at 10:39:32AM -0700, Todd Kjos wrote:
> > On Fri, Sep 9, 2016 at 8:44 AM, Greg KH  wrote:
> > > On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
> > >> From: Todd Kjos 
> > >>
> > >> In Android systems, the display pipeline relies on low
> > >> latency binder transactions and is therefore sensitive to
> > >> delays caused by contention for the global binder lock.
> > >> Jank is significantly reduced by disabling preemption
> > >> while the global binder lock is held.
> > >
> > > What is the technical definition of "Jank"?  :)
> > 
> > I'll rephrase in the next version to "dropped or delayed frames".
> 
> Heh, thanks :)
> 
> Also in the next version can you fix the errors found by the 0-day build
> bot?
> 
> > >> This patch was originated by Riley Andrews 
> > >> with tweaks and forward-porting by me.
> > >>
> > >> Originally-from: Riley Andrews 
> > >> Signed-off-by: Todd Kjos 
> > >> ---
> > >>  drivers/android/binder.c | 194 
> > >> +++
> > >>  1 file changed, 146 insertions(+), 48 deletions(-)
> > >>
> > >> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > >> index 16288e7..c36e420 100644
> > >> --- a/drivers/android/binder.c
> > >> +++ b/drivers/android/binder.c
> > >> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct 
> > >> binder_proc *proc, int flags)
> > >>   struct files_struct *files = proc->files;
> > >>   unsigned long rlim_cur;
> > >>   unsigned long irqs;
> > >> + int ret;
> > >>
> > >>   if (files == NULL)
> > >>   return -ESRCH;
> > >> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct 
> > >> binder_proc *proc, int flags)
> > >>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> > >>   unlock_task_sighand(proc->tsk, &irqs);
> > >>
> > >> - return __alloc_fd(files, 0, rlim_cur, flags);
> > >> + preempt_enable_no_resched();
> > >> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> > >> + preempt_disable();
> > >> +
> > >> + return ret;
> > >>  }
> > >>
> > >>  /*
> > >> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct 
> > >> binder_proc *proc, int flags)
> > >>  static void task_fd_install(
> > >>   struct binder_proc *proc, unsigned int fd, struct file *file)
> > >>  {
> > >> - if (proc->files)
> > >> + if (proc->files) {
> > >> + preempt_enable_no_resched();
> > >>   __fd_install(proc->files, fd, file);
> > >> + preempt_disable();
> > >> + }
> > >>  }
> > >>
> > >>  /*
> > >> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
> > >>  {
> > >>   trace_binder_lock(tag);
> > >>   mutex_lock(&binder_main_lock);
> > >> + preempt_disable();
> > >>   trace_binder_locked(tag);
> > >>  }
> > >>
> > >> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
> > >>  {
> > >>   trace_binder_unlock(tag);
> > >>   mutex_unlock(&binder_main_lock);
> > >> + preempt_enable();
> > >> +}
> > >> +
> > >> +static inline void *kzalloc_nopreempt(size_t size)
> > >> +{
> > >> + void *ptr;
> > >> +
> > >> + ptr = kzalloc(size, GFP_NOWAIT);
> > >> + if (ptr)
> > >> + return ptr;
> > >> +
> > >> + preempt_enable_no_resched();
> > >> + ptr = kzalloc(size, GFP_KERNEL);
> > >> + preempt_disable();
> > >
> > > Doesn't the allocator retry if the first one fails anyway?  Why not
> > > GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
> > > usage?
> > 
> > I suspect we have hit the second, since we do get into cases where
> > direct reclaim is needed. I can't confirm since I haven't instrumented
> > this case. As you say, if we use GFP_ATOMIC instead, maybe we
> > wouldn't, but even then I'd be concerned that we could deplete the
> > memory reserved for atomic. The general idea of trying for a fast,
> > nowait allocation and then enabling preempt for the rare potentially
> > blocking allocation seems reasonable, doesn't it?
> 
> Yes it is, so much so that I think there's a generic kernel function for
> it already.  Adding in the linux-mm mailing list to be told that I'm
> wrong about this :)

Ok, adding the correct linux-mm list address this time...

greg k-h


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-10 Thread Greg KH
On Fri, Sep 09, 2016 at 10:39:32AM -0700, Todd Kjos wrote:
> On Fri, Sep 9, 2016 at 8:44 AM, Greg KH  wrote:
> > On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
> >> From: Todd Kjos 
> >>
> >> In Android systems, the display pipeline relies on low
> >> latency binder transactions and is therefore sensitive to
> >> delays caused by contention for the global binder lock.
> >> Jank is significantly reduced by disabling preemption
> >> while the global binder lock is held.
> >
> > What is the technical definition of "Jank"?  :)
> 
> I'll rephrase in the next version to "dropped or delayed frames".

Heh, thanks :)

Also in the next version can you fix the errors found by the 0-day build
bot?

> >> This patch was originated by Riley Andrews 
> >> with tweaks and forward-porting by me.
> >>
> >> Originally-from: Riley Andrews 
> >> Signed-off-by: Todd Kjos 
> >> ---
> >>  drivers/android/binder.c | 194 
> >> +++
> >>  1 file changed, 146 insertions(+), 48 deletions(-)
> >>
> >> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> >> index 16288e7..c36e420 100644
> >> --- a/drivers/android/binder.c
> >> +++ b/drivers/android/binder.c
> >> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc 
> >> *proc, int flags)
> >>   struct files_struct *files = proc->files;
> >>   unsigned long rlim_cur;
> >>   unsigned long irqs;
> >> + int ret;
> >>
> >>   if (files == NULL)
> >>   return -ESRCH;
> >> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct 
> >> binder_proc *proc, int flags)
> >>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> >>   unlock_task_sighand(proc->tsk, &irqs);
> >>
> >> - return __alloc_fd(files, 0, rlim_cur, flags);
> >> + preempt_enable_no_resched();
> >> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> >> + preempt_disable();
> >> +
> >> + return ret;
> >>  }
> >>
> >>  /*
> >> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct 
> >> binder_proc *proc, int flags)
> >>  static void task_fd_install(
> >>   struct binder_proc *proc, unsigned int fd, struct file *file)
> >>  {
> >> - if (proc->files)
> >> + if (proc->files) {
> >> + preempt_enable_no_resched();
> >>   __fd_install(proc->files, fd, file);
> >> + preempt_disable();
> >> + }
> >>  }
> >>
> >>  /*
> >> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
> >>  {
> >>   trace_binder_lock(tag);
> >>   mutex_lock(&binder_main_lock);
> >> + preempt_disable();
> >>   trace_binder_locked(tag);
> >>  }
> >>
> >> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
> >>  {
> >>   trace_binder_unlock(tag);
> >>   mutex_unlock(&binder_main_lock);
> >> + preempt_enable();
> >> +}
> >> +
> >> +static inline void *kzalloc_nopreempt(size_t size)
> >> +{
> >> + void *ptr;
> >> +
> >> + ptr = kzalloc(size, GFP_NOWAIT);
> >> + if (ptr)
> >> + return ptr;
> >> +
> >> + preempt_enable_no_resched();
> >> + ptr = kzalloc(size, GFP_KERNEL);
> >> + preempt_disable();
> >
> > Doesn't the allocator retry if the first one fails anyway?  Why not
> > GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
> > usage?
> 
> I suspect we have hit the second, since we do get into cases where
> direct reclaim is needed. I can't confirm since I haven't instrumented
> this case. As you say, if we use GFP_ATOMIC instead, maybe we
> wouldn't, but even then I'd be concerned that we could deplete the
> memory reserved for atomic. The general idea of trying for a fast,
> nowait allocation and then enabling preempt for the rare potentially
> blocking allocation seems reasonable, doesn't it?

Yes it is, so much so that I think there's a generic kernel function for
it already.  Adding in the linux-mm mailing list to be told that I'm
wrong about this :)

> >> + return ptr;
> >> +}
> >> +
> >> +static inline long copy_to_user_nopreempt(void __user *to,
> >> +   const void *from, long n)
> >> +{
> >> + long ret;
> >> +
> >> + preempt_enable_no_resched();
> >> + ret = copy_to_user(to, from, n);
> >> + preempt_disable();
> >> + return ret;
> >> +}
> >> +
> >> +static inline long copy_from_user_nopreempt(void *to,
> >> + const void __user *from,
> >> + long n)
> >> +{
> >> + long ret;
> >> +
> >> + preempt_enable_no_resched();
> >> + ret = copy_from_user(to, from, n);
> >> + preempt_disable();
> >> + return ret;
> >>  }
> >>
> >> +#define get_user_nopreempt(x, ptr)   \
> >> +({   \
> >> + int __ret;  \
> >> + preempt_enable_no_resched();\
> >> + __ret = get_user(x, ptr);   \
> >> + preempt_

Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-09 Thread Todd Kjos
On Fri, Sep 9, 2016 at 8:44 AM, Greg KH  wrote:
> On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
>> From: Todd Kjos 
>>
>> In Android systems, the display pipeline relies on low
>> latency binder transactions and is therefore sensitive to
>> delays caused by contention for the global binder lock.
>> Jank is significantly reduced by disabling preemption
>> while the global binder lock is held.
>
> What is the technical definition of "Jank"?  :)

I'll rephrase in the next version to "dropped or delayed frames".

>
>>
>> This patch was originated by Riley Andrews 
>> with tweaks and forward-porting by me.
>>
>> Originally-from: Riley Andrews 
>> Signed-off-by: Todd Kjos 
>> ---
>>  drivers/android/binder.c | 194 
>> +++
>>  1 file changed, 146 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
>> index 16288e7..c36e420 100644
>> --- a/drivers/android/binder.c
>> +++ b/drivers/android/binder.c
>> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc 
>> *proc, int flags)
>>   struct files_struct *files = proc->files;
>>   unsigned long rlim_cur;
>>   unsigned long irqs;
>> + int ret;
>>
>>   if (files == NULL)
>>   return -ESRCH;
>> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc 
>> *proc, int flags)
>>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>>   unlock_task_sighand(proc->tsk, &irqs);
>>
>> - return __alloc_fd(files, 0, rlim_cur, flags);
>> + preempt_enable_no_resched();
>> + ret = __alloc_fd(files, 0, rlim_cur, flags);
>> + preempt_disable();
>> +
>> + return ret;
>>  }
>>
>>  /*
>> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc 
>> *proc, int flags)
>>  static void task_fd_install(
>>   struct binder_proc *proc, unsigned int fd, struct file *file)
>>  {
>> - if (proc->files)
>> + if (proc->files) {
>> + preempt_enable_no_resched();
>>   __fd_install(proc->files, fd, file);
>> + preempt_disable();
>> + }
>>  }
>>
>>  /*
>> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
>>  {
>>   trace_binder_lock(tag);
>>   mutex_lock(&binder_main_lock);
>> + preempt_disable();
>>   trace_binder_locked(tag);
>>  }
>>
>> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
>>  {
>>   trace_binder_unlock(tag);
>>   mutex_unlock(&binder_main_lock);
>> + preempt_enable();
>> +}
>> +
>> +static inline void *kzalloc_nopreempt(size_t size)
>> +{
>> + void *ptr;
>> +
>> + ptr = kzalloc(size, GFP_NOWAIT);
>> + if (ptr)
>> + return ptr;
>> +
>> + preempt_enable_no_resched();
>> + ptr = kzalloc(size, GFP_KERNEL);
>> + preempt_disable();
>
> Doesn't the allocator retry if the first one fails anyway?  Why not
> GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
> usage?

I suspect we have hit the second, since we do get into cases where
direct reclaim is needed. I can't confirm since I haven't instrumented
this case. As you say, if we use GFP_ATOMIC instead, maybe we
wouldn't, but even then I'd be concerned that we could deplete the
memory reserved for atomic. The general idea of trying for a fast,
nowait allocation and then enabling preempt for the rare potentially
blocking allocation seems reasonable, doesn't it?

>> +
>> + return ptr;
>> +}
>> +
>> +static inline long copy_to_user_nopreempt(void __user *to,
>> +   const void *from, long n)
>> +{
>> + long ret;
>> +
>> + preempt_enable_no_resched();
>> + ret = copy_to_user(to, from, n);
>> + preempt_disable();
>> + return ret;
>> +}
>> +
>> +static inline long copy_from_user_nopreempt(void *to,
>> + const void __user *from,
>> + long n)
>> +{
>> + long ret;
>> +
>> + preempt_enable_no_resched();
>> + ret = copy_from_user(to, from, n);
>> + preempt_disable();
>> + return ret;
>>  }
>>
>> +#define get_user_nopreempt(x, ptr)   \
>> +({   \
>> + int __ret;  \
>> + preempt_enable_no_resched();\
>> + __ret = get_user(x, ptr);   \
>> + preempt_disable();  \
>> + __ret;  \
>> +})
>> +
>> +#define put_user_nopreempt(x, ptr)   \
>> +({   \
>> + int __ret;  \
>> + preempt_enable_no_resched();\
>> + __ret = put_user(x, ptr);   \
>> + preempt_disable();  \
>> + __ret;  \
>> +})
>
> Any reason some of these are #defines and some are static inline
> functions?

Not that I know of. I'll change to stat

Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-09 Thread kbuild test robot
Hi Todd,

[auto build test WARNING on staging/staging-testing]
[also build test WARNING on v4.8-rc5 next-20160909]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]
[Suggest to use git(>=2.9.0) format-patch --base= (or --base=auto for 
convenience) to record what (public, well-known) commit your patch series was 
built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:
https://github.com/0day-ci/linux/commits/Todd-Kjos/android-binder-Disable-preemption-while-holding-the-global-binder-lock/20160909-23
config: x86_64-randconfig-x008-201636 (attached as .config)
compiler: gcc-6 (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   drivers/android/binder.c: In function 'binder_thread_read':
>> drivers/android/binder.c:2432:4: warning: this 'else' clause does not 
>> guard... [-Wmisleading-indentation]
   else
   ^~~~
   drivers/android/binder.c:2434:5: note: ...this statement, but the latter is 
misleadingly indented as if it is guarded by the 'else'
if (put_user_nopreempt(cmd,
^~

vim +/else +2432 drivers/android/binder.c

da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21  2416   
 proc->pid, thread->pid,
da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21  2417   
 node->debug_id,
da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21  2418   
 (u64)node->ptr,
da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21  2419   
 (u64)node->cookie);
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2420   
}
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2421   
}
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2422   
} break;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2423   
case BINDER_WORK_DEAD_BINDER:
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2424   
case BINDER_WORK_DEAD_BINDER_AND_CLEAR:
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2425   
case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: {
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2426   
struct binder_ref_death *death;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2427   
uint32_t cmd;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2428  
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2429   
death = container_of(w, struct binder_ref_death, work);
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2430   
if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION)
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2431   
cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 @2432   
else
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2433   
cmd = BR_DEAD_BINDER;
ddd4adb7 drivers/android/binder.c Todd Kjos  2016-09-09  2434   
if (put_user_nopreempt(cmd,
ddd4adb7 drivers/android/binder.c Todd Kjos  2016-09-09  2435   
   (uint32_t __user *) ptr))
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2436   
return -EFAULT;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2437   
ptr += sizeof(uint32_t);
ddd4adb7 drivers/android/binder.c Todd Kjos  2016-09-09  2438   
if (put_user_nopreempt(death->cookie,
da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21  2439   
   (binder_uintptr_t __user *) ptr))
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2440   
return -EFAULT;

:: The code at line 2432 was first introduced by commit
:: 355b0502f6efea0ff9492753888772c96972d2a3 Revert "Staging: android: 
delete android drivers"

:: TO: Greg Kroah-Hartman 
:: CC: Greg Kroah-Hartman 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-09 Thread Greg KH
On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
> From: Todd Kjos 
> 
> In Android systems, the display pipeline relies on low
> latency binder transactions and is therefore sensitive to
> delays caused by contention for the global binder lock.
> Jank is significantly reduced by disabling preemption
> while the global binder lock is held.

What is the technical definition of "Jank"?  :)

> 
> This patch was originated by Riley Andrews 
> with tweaks and forward-porting by me.
> 
> Originally-from: Riley Andrews 
> Signed-off-by: Todd Kjos 
> ---
>  drivers/android/binder.c | 194 
> +++
>  1 file changed, 146 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 16288e7..c36e420 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc 
> *proc, int flags)
>   struct files_struct *files = proc->files;
>   unsigned long rlim_cur;
>   unsigned long irqs;
> + int ret;
>  
>   if (files == NULL)
>   return -ESRCH;
> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc 
> *proc, int flags)
>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>   unlock_task_sighand(proc->tsk, &irqs);
>  
> - return __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_enable_no_resched();
> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_disable();
> +
> + return ret;
>  }
>  
>  /*
> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc 
> *proc, int flags)
>  static void task_fd_install(
>   struct binder_proc *proc, unsigned int fd, struct file *file)
>  {
> - if (proc->files)
> + if (proc->files) {
> + preempt_enable_no_resched();
>   __fd_install(proc->files, fd, file);
> + preempt_disable();
> + }
>  }
>  
>  /*
> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
>  {
>   trace_binder_lock(tag);
>   mutex_lock(&binder_main_lock);
> + preempt_disable();
>   trace_binder_locked(tag);
>  }
>  
> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
>  {
>   trace_binder_unlock(tag);
>   mutex_unlock(&binder_main_lock);
> + preempt_enable();
> +}
> +
> +static inline void *kzalloc_nopreempt(size_t size)
> +{
> + void *ptr;
> +
> + ptr = kzalloc(size, GFP_NOWAIT);
> + if (ptr)
> + return ptr;
> +
> + preempt_enable_no_resched();
> + ptr = kzalloc(size, GFP_KERNEL);
> + preempt_disable();

Doesn't the allocator retry if the first one fails anyway?  Why not
GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
usage?

> +
> + return ptr;
> +}
> +
> +static inline long copy_to_user_nopreempt(void __user *to,
> +   const void *from, long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_to_user(to, from, n);
> + preempt_disable();
> + return ret;
> +}
> +
> +static inline long copy_from_user_nopreempt(void *to,
> + const void __user *from,
> + long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_from_user(to, from, n);
> + preempt_disable();
> + return ret;
>  }
>  
> +#define get_user_nopreempt(x, ptr)   \
> +({   \
> + int __ret;  \
> + preempt_enable_no_resched();\
> + __ret = get_user(x, ptr);   \
> + preempt_disable();  \
> + __ret;  \
> +})
> +
> +#define put_user_nopreempt(x, ptr)   \
> +({   \
> + int __ret;  \
> + preempt_enable_no_resched();\
> + __ret = put_user(x, ptr);   \
> + preempt_disable();  \
> + __ret;  \
> +})

Any reason some of these are #defines and some are static inline
functions?

Anyway, these all seem a bit strange to me, what type of latency spikes
are you seeing that these changes resolve?  Shouldn't that be an issue
with the scheduler more than just the binder driver?

I don't know of any other driver or IPC that does this type of thing
with the scheduler in order to make things "go faster", so it feels
wrong to me, and is probably why we don't have global functions like
put_user_nopreempt() :)

And is enabling and disabling preemption around single byte copies
to/from userspace really a good idea?  That seems like a lot of overhead
you are now adding to your "fastpath" that you need to go even faster.

And finally, I'm guessing this has passed the binder test suite that is
out there for testing binder

Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-08 Thread Greg Kroah-Hartman
On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> In Android systems, the display pipeline relies on low
> latency binder transactions and is therefore sensitive to
> delays caused by contention for the global binder lock.
> Jank is siginificantly reduced by disabling preemption
> while the global binder lock is held.
> 
> Originally-from: Riley Andrews 

So should the "From: " line be Riley?  Did Riley sign off on this?

> Signed-off-by: Todd Kjos 
> ---
>  drivers/android/binder.c | 194 
> +++
>  1 file changed, 146 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 16288e7..c36e420 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   struct files_struct *files = proc->files;
>   unsigned long rlim_cur;
>   unsigned long irqs;
> + int ret;
> 
>   if (files == NULL)
>   return -ESRCH;
> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>   unlock_task_sighand(proc->tsk, &irqs);
> 
> - return __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_enable_no_resched();
> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_disable();
> +
> + return ret;
>  }

Your tabs are all eaten and broke the patch, so it can't be applied :(

Can you try again?

thanks,

greg k-h


Re: [PATCH] android: binder: Disable preemption while holding the global binder lock

2016-09-08 Thread Todd Kjos
This was introduced in the 2015 Nexus devices and should have been
submitted to the kernel then since we keep forward porting it to each
new device.

On Thu, Sep 8, 2016 at 9:12 AM, Todd Kjos  wrote:
> In Android systems, the display pipeline relies on low
> latency binder transactions and is therefore sensitive to
> delays caused by contention for the global binder lock.
> Jank is siginificantly reduced by disabling preemption
> while the global binder lock is held.
>
> Originally-from: Riley Andrews 
> Signed-off-by: Todd Kjos 
> ---
>  drivers/android/binder.c | 194 
> +++
>  1 file changed, 146 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 16288e7..c36e420 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   struct files_struct *files = proc->files;
>   unsigned long rlim_cur;
>   unsigned long irqs;
> + int ret;
>
>   if (files == NULL)
>   return -ESRCH;
> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>   unlock_task_sighand(proc->tsk, &irqs);
>
> - return __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_enable_no_resched();
> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_disable();
> +
> + return ret;
>  }
>
>  /*
> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>  static void task_fd_install(
>   struct binder_proc *proc, unsigned int fd, struct file *file)
>  {
> - if (proc->files)
> + if (proc->files) {
> + preempt_enable_no_resched();
>   __fd_install(proc->files, fd, file);
> + preempt_disable();
> + }
>  }
>
>  /*
> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
>  {
>   trace_binder_lock(tag);
>   mutex_lock(&binder_main_lock);
> + preempt_disable();
>   trace_binder_locked(tag);
>  }
>
> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
>  {
>   trace_binder_unlock(tag);
>   mutex_unlock(&binder_main_lock);
> + preempt_enable();
> +}
> +
> +static inline void *kzalloc_nopreempt(size_t size)
> +{
> + void *ptr;
> +
> + ptr = kzalloc(size, GFP_NOWAIT);
> + if (ptr)
> + return ptr;
> +
> + preempt_enable_no_resched();
> + ptr = kzalloc(size, GFP_KERNEL);
> + preempt_disable();
> +
> + return ptr;
> +}
> +
> +static inline long copy_to_user_nopreempt(void __user *to,
> +  const void *from, long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_to_user(to, from, n);
> + preempt_disable();
> + return ret;
> +}
> +
> +static inline long copy_from_user_nopreempt(void *to,
> +const void __user *from,
> +long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_from_user(to, from, n);
> + preempt_disable();
> + return ret;
>  }
>
> +#define get_user_nopreempt(x, ptr) \
> +({ \
> + int __ret; \
> + preempt_enable_no_resched(); \
> + __ret = get_user(x, ptr); \
> + preempt_disable(); \
> + __ret; \
> +})
> +
> +#define put_user_nopreempt(x, ptr) \
> +({ \
> + int __ret; \
> + preempt_enable_no_resched(); \
> + __ret = put_user(x, ptr); \
> + preempt_disable(); \
> + __ret; \
> +})
> +
>  static void binder_set_nice(long nice)
>  {
>   long min_nice;
> @@ -568,6 +634,8 @@ static int binder_update_page_range(struct
> binder_proc *proc, int allocate,
>   else
>   mm = get_task_mm(proc->tsk);
>
> + preempt_enable_no_resched();
> +
>   if (mm) {
>   down_write(&mm->mmap_sem);
>   vma = proc->vma;
> @@ -622,6 +690,9 @@ static int binder_update_page_range(struct
> binder_proc *proc, int allocate,
>   up_write(&mm->mmap_sem);
>   mmput(mm);
>   }
> +
> + preempt_disable();
> +
>   return 0;
>
>  free_range:
> @@ -644,6 +715,9 @@ err_no_vma:
>   up_write(&mm->mmap_sem);
>   mmput(mm);
>   }
> +
> + preempt_disable();
> +
>   return -ENOMEM;
>  }
>
> @@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct
> binder_proc *proc,
>   return NULL;
>   }
>
> - node = kzalloc(sizeof(*node), GFP_KERNEL);
> + node = kzalloc_nopreempt(sizeof(*node));
>   if (node == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_NODE);
> @@ -1040,7 +1114,7 @@ static struct binder_ref
> *binder_get_ref_for_node(struct binder_proc *proc,
>   else
>   return ref;
>   }
> - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
> + new_ref = kzalloc_nopreempt(sizeof(*ref));
>   if (new_ref == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_REF);
> @@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc 
> *proc,
>   e->to_proc = target_proc->pid;
>
>   /* TODO: reuse incoming transaction for reply */
> - t = kzalloc(sizeof(*t), GFP_KERNEL);
> + t = kzalloc_nopreempt(sizeof(*t));
>   if (t == NULL) {
>   return_error = BR_FAILED_REPLY;
>   goto err_alloc_t_failed;
>   }
>   binder_stats_created(BINDER_STAT_TRANSACTION