Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
Thanks Peter. We'll give that patch a try as part of our refactoring. Looking at finer-grained locking and we'll try going back to rt_mutex plus this patch. On Wed, Sep 14, 2016 at 9:55 AM, Peter Zijlstra wrote: > On Wed, Sep 14, 2016 at 06:13:40PM +0200, Peter Zijlstra wrote: >> On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote: >> > On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote: >> > > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra >> > > wrote: >> > > > cgroups should be irrelevant, PI is unaware of them. >> > > >> > > I don't think cgroups are irrelevant. PI being unaware of them >> > > explains the problem I described. If the task that holds the lock is >> > > in a cgroup that has a low cpu.shares value, then boosting the task's >> > > priority within that group does necessarily make it any more likely to >> > > run. >> > >> > Thing is, for FIFO/DL the important parameters (prio and deadline resp.) >> > are not cgroup dependent. >> > >> > For CFS you're right, and as per usual, cgroups will be a royal pain. >> > While we can compute the total weight in the block chain, getting that >> > back to a task which is stuck in a cgroup is just not going to work >> > well. >> >> Not to mention the fact that the weight depends on the current running >> state. Having those tasks block insta changes the actual weight. >> >> > /me curses @ cgroups.. bloody stupid things. >> >> More of that. > > > Something a little like so... completely untested, and has a fair number > of corner cases still left open I think.. > > > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1691,8 +1691,8 @@ struct task_struct { > struct seccomp seccomp; > > /* Thread group tracking */ > - u32 parent_exec_id; > - u32 self_exec_id; > + u32 parent_exec_id; > + u32 self_exec_id; > /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, > * mempolicy */ > spinlock_t alloc_lock; > @@ -1710,6 +1710,11 @@ struct task_struct { > struct task_struct *pi_top_task; > /* Deadlock detection and priority inheritance handling */ > struct rt_mutex_waiter *pi_blocked_on; > + unsigned long pi_weight; > +#ifdef CONFIG_CGROUP_SCHED > + struct task_group *orig_task_group; > + unsigned long normal_weight; > +#endif > #endif > > #ifdef CONFIG_DEBUG_MUTEXES > @@ -1977,6 +1982,8 @@ static inline int tsk_nr_cpus_allowed(st > return p->nr_cpus_allowed; > } > > +extern unsigned long task_pi_weight(struct task_struct *p); > + > #define TNF_MIGRATED 0x01 > #define TNF_NO_GROUP 0x02 > #define TNF_SHARED 0x04 > --- a/kernel/locking/rtmutex.c > +++ b/kernel/locking/rtmutex.c > @@ -20,6 +20,19 @@ > #include "rtmutex_common.h" > > /* > + * !(rt_prio || dl_prio) > + */ > +static inline bool other_prio(int prio) > +{ > + return prio >= MAX_RT_PRIO; > +} > + > +static inline bool other_task(struct task_struct *task) > +{ > + return other_prio(task->prio); > +} > + > +/* > * lock->owner state tracking: > * > * lock->owner holds the task_struct pointer of the owner. Bit 0 > @@ -226,6 +239,11 @@ rt_mutex_enqueue(struct rt_mutex *lock, > > rb_link_node(&waiter->tree_entry, parent, link); > rb_insert_color(&waiter->tree_entry, &lock->waiters); > + /* > +* We want the lock->waiter_weight to reflect the sum of all queued > +* waiters. > +*/ > + lock->waiters_weight += waiter->weight; > } > > static void > @@ -239,6 +257,7 @@ rt_mutex_dequeue(struct rt_mutex *lock, > > rb_erase(&waiter->tree_entry, &lock->waiters); > RB_CLEAR_NODE(&waiter->tree_entry); > + lock->waiters_weight += waiter->weight; > } > > static void > @@ -265,6 +284,18 @@ rt_mutex_enqueue_pi(struct task_struct * > > rb_link_node(&waiter->pi_tree_entry, parent, link); > rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters); > + > + /* > +* Since a task can own multiple locks, and we have one pi_waiter > +* for every lock, propagate the lock->waiter_weight, which is the sum > +* of all weights queued on that lock, into the top waiter, and add > +* that to the owning task's pi_weight. > +* > +* This results in pi_weight being the sum of all blocked waiters > +* on this task. > +*/ > + waiter->top_weight = waiter->lock->waiters_weight; > + task->pi_weight += waiter->top_weight; > } > > static void > @@ -278,6 +309,7 @@ rt_mutex_dequeue_pi(struct task_struct * > > rb_erase(&waiter->pi_tree_entry, &task->pi_waiters); > RB_CLEAR_NODE(&waiter->pi_tree_entry); > + task->pi_weight -= waiter->top_weight; > } > > static void rt_mutex_adjust_prio(struct task_struct *p) > @@ -497,7 +529,7 @@ static int rt_mutex_adjust_prio_chain(st > * detection is enabled we continue, but stop the > * requeueing in
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Wed, Sep 14, 2016 at 06:13:40PM +0200, Peter Zijlstra wrote: > On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote: > > On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote: > > > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra > > > wrote: > > > > cgroups should be irrelevant, PI is unaware of them. > > > > > > I don't think cgroups are irrelevant. PI being unaware of them > > > explains the problem I described. If the task that holds the lock is > > > in a cgroup that has a low cpu.shares value, then boosting the task's > > > priority within that group does necessarily make it any more likely to > > > run. > > > > Thing is, for FIFO/DL the important parameters (prio and deadline resp.) > > are not cgroup dependent. > > > > For CFS you're right, and as per usual, cgroups will be a royal pain. > > While we can compute the total weight in the block chain, getting that > > back to a task which is stuck in a cgroup is just not going to work > > well. > > Not to mention the fact that the weight depends on the current running > state. Having those tasks block insta changes the actual weight. > > > /me curses @ cgroups.. bloody stupid things. > > More of that. Something a little like so... completely untested, and has a fair number of corner cases still left open I think.. --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1691,8 +1691,8 @@ struct task_struct { struct seccomp seccomp; /* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; + u32 parent_exec_id; + u32 self_exec_id; /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, * mempolicy */ spinlock_t alloc_lock; @@ -1710,6 +1710,11 @@ struct task_struct { struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling */ struct rt_mutex_waiter *pi_blocked_on; + unsigned long pi_weight; +#ifdef CONFIG_CGROUP_SCHED + struct task_group *orig_task_group; + unsigned long normal_weight; +#endif #endif #ifdef CONFIG_DEBUG_MUTEXES @@ -1977,6 +1982,8 @@ static inline int tsk_nr_cpus_allowed(st return p->nr_cpus_allowed; } +extern unsigned long task_pi_weight(struct task_struct *p); + #define TNF_MIGRATED 0x01 #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -20,6 +20,19 @@ #include "rtmutex_common.h" /* + * !(rt_prio || dl_prio) + */ +static inline bool other_prio(int prio) +{ + return prio >= MAX_RT_PRIO; +} + +static inline bool other_task(struct task_struct *task) +{ + return other_prio(task->prio); +} + +/* * lock->owner state tracking: * * lock->owner holds the task_struct pointer of the owner. Bit 0 @@ -226,6 +239,11 @@ rt_mutex_enqueue(struct rt_mutex *lock, rb_link_node(&waiter->tree_entry, parent, link); rb_insert_color(&waiter->tree_entry, &lock->waiters); + /* +* We want the lock->waiter_weight to reflect the sum of all queued +* waiters. +*/ + lock->waiters_weight += waiter->weight; } static void @@ -239,6 +257,7 @@ rt_mutex_dequeue(struct rt_mutex *lock, rb_erase(&waiter->tree_entry, &lock->waiters); RB_CLEAR_NODE(&waiter->tree_entry); + lock->waiters_weight += waiter->weight; } static void @@ -265,6 +284,18 @@ rt_mutex_enqueue_pi(struct task_struct * rb_link_node(&waiter->pi_tree_entry, parent, link); rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters); + + /* +* Since a task can own multiple locks, and we have one pi_waiter +* for every lock, propagate the lock->waiter_weight, which is the sum +* of all weights queued on that lock, into the top waiter, and add +* that to the owning task's pi_weight. +* +* This results in pi_weight being the sum of all blocked waiters +* on this task. +*/ + waiter->top_weight = waiter->lock->waiters_weight; + task->pi_weight += waiter->top_weight; } static void @@ -278,6 +309,7 @@ rt_mutex_dequeue_pi(struct task_struct * rb_erase(&waiter->pi_tree_entry, &task->pi_waiters); RB_CLEAR_NODE(&waiter->pi_tree_entry); + task->pi_weight -= waiter->top_weight; } static void rt_mutex_adjust_prio(struct task_struct *p) @@ -497,7 +529,7 @@ static int rt_mutex_adjust_prio_chain(st * detection is enabled we continue, but stop the * requeueing in the chain walk. */ - if (top_waiter != task_top_pi_waiter(task)) { + if (top_waiter != task_top_pi_waiter(task) && !other_task(task)) { if (!detect_deadlock) goto out_unlock_pi; else @@ -512,7 +544,7 @@ static int rt_mutex_adjust_prio_chain(st * enabled we continue, but stop the requeueing in the
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote: > On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote: > > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra > > wrote: > > > cgroups should be irrelevant, PI is unaware of them. > > > > I don't think cgroups are irrelevant. PI being unaware of them > > explains the problem I described. If the task that holds the lock is > > in a cgroup that has a low cpu.shares value, then boosting the task's > > priority within that group does necessarily make it any more likely to > > run. > > Thing is, for FIFO/DL the important parameters (prio and deadline resp.) > are not cgroup dependent. > > For CFS you're right, and as per usual, cgroups will be a royal pain. > While we can compute the total weight in the block chain, getting that > back to a task which is stuck in a cgroup is just not going to work > well. Not to mention the fact that the weight depends on the current running state. Having those tasks block insta changes the actual weight. > /me curses @ cgroups.. bloody stupid things. More of that.
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote: > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra wrote: > > cgroups should be irrelevant, PI is unaware of them. > > I don't think cgroups are irrelevant. PI being unaware of them > explains the problem I described. If the task that holds the lock is > in a cgroup that has a low cpu.shares value, then boosting the task's > priority within that group does necessarily make it any more likely to > run. Thing is, for FIFO/DL the important parameters (prio and deadline resp.) are not cgroup dependent. For CFS you're right, and as per usual, cgroups will be a royal pain. While we can compute the total weight in the block chain, getting that back to a task which is stuck in a cgroup is just not going to work well. The only 'solution' I can come up with in a hurry is, when the task is boosted, move it to the root cgroup. That of course has a whole heap of problems all on its own. /me curses @ cgroups.. bloody stupid things.
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Wed, Sep 14, 2016 at 09:10:01AM +0200, Peter Zijlstra wrote: > On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote: > > Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything > > in the rtmutex code or documentation that indicates that they don't > > work for normal tasks. From what I can tell the priority gets boosted > > in every case. This may not work as well for CFS tasks as for realtime > > tasks, but it should at least help when there is a large priority > > difference. > > It does something (it used to explicitly ignore OTHER) but its not > something well defined or usable. I looked again, and while it updates the ->prio field for OTHER tasks, that does not seem to cause a change to the actual weight field (which is derived from ->static_prio). So it really should not do anything.. as I remebered it.
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Wed, Sep 14, 2016 at 09:10:01AM +0200, Peter Zijlstra wrote: > We could define a meaningful something for CFS and implement that, but > it isn't currently done. So the generalization of the Priority Inheritance Protocol is Proxy Execution Protocol, which basically lets the boosted task run _as_ the task on the block chain as picked by the schedule function (treating all of them as runnable). Where 'as' means it really consumes scheduling resources of the (blocked) donor task. Since the scheduling function for FIFO is: pick the highest prio one and go for it, this trivially reduces to PI for FIFO. Now, Proxy Execution Protocol is 'trivial' to implement on UP, but has a few wobbles on SMP. But we can use it to define a sensible definition for a WFQ class scheduler (like CFS). For these the scheduling function basically runs the boosted task as every donor task on the block chain gets their slice. Alternatively, since it treats all 'blocked' tasks as runnable, the total weight of the boosted task is its own weight plus the sum of weight on the block chain. Which is something that shouldn't be too hard to implement, but very much isn't what happens today.
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote: > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra wrote: > > On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote: > > > >> A previous attempt to fix this problem, changed the lock to use > >> rt_mutex instead of mutex, but this apparently did not work as well as > >> this patch. I believe the added overhead was noticeable, and it did > >> not work when the preempted thread was in a different cgroup (I don't > >> know if this is still the case). > > > > Do you actually have RR/FIFO/DL tasks? Currently PI isn't > > defined/implemented for OTHER. > > > > Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything > in the rtmutex code or documentation that indicates that they don't > work for normal tasks. From what I can tell the priority gets boosted > in every case. This may not work as well for CFS tasks as for realtime > tasks, but it should at least help when there is a large priority > difference. It does something (it used to explicitly ignore OTHER) but its not something well defined or usable. > > cgroups should be irrelevant, PI is unaware of them. > > I don't think cgroups are irrelevant. PI being unaware of them > explains the problem I described. If the task that holds the lock is > in a cgroup that has a low cpu.shares value, then boosting the task's > priority within that group does necessarily make it any more likely to > run. See, the problem is that 'priority' is a meaningless concept for OTHER/CFS. In any case, typically only RT tasks care about PI, and the traditional Priority Inheritance algorithm only really works correctly for FIFO. As is RR has issues and DL is a crude hack, CFS is really just an accident by not explicitly exempting it. We could define a meaningful something for CFS and implement that, but it isn't currently done.
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra wrote: > On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote: > >> A previous attempt to fix this problem, changed the lock to use >> rt_mutex instead of mutex, but this apparently did not work as well as >> this patch. I believe the added overhead was noticeable, and it did >> not work when the preempted thread was in a different cgroup (I don't >> know if this is still the case). > > Do you actually have RR/FIFO/DL tasks? Currently PI isn't > defined/implemented for OTHER. > Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything in the rtmutex code or documentation that indicates that they don't work for normal tasks. From what I can tell the priority gets boosted in every case. This may not work as well for CFS tasks as for realtime tasks, but it should at least help when there is a large priority difference. > cgroups should be irrelevant, PI is unaware of them. I don't think cgroups are irrelevant. PI being unaware of them explains the problem I described. If the task that holds the lock is in a cgroup that has a low cpu.shares value, then boosting the task's priority within that group does necessarily make it any more likely to run. -- Arve Hjønnevåg
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Mon, Sep 12, 2016 at 11:42 PM, Greg Kroah-Hartman wrote: > On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote: >> On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman >> wrote: >> > On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote: >> >> On Sat, 10 Sep 2016, Peter Zijlstra wrote: >> >> >> >> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote: >> >> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: >> >> > > > In Android systems, the display pipeline relies on low >> >> > > > latency binder transactions and is therefore sensitive to >> >> > > > delays caused by contention for the global binder lock. >> >> > > > Jank is siginificantly reduced by disabling preemption >> >> > > > while the global binder lock is held. >> >> > > >> >> > > That's now how preempt_disable is supposed to use. It is for critical >> >> > >> >> > not, that's supposed to be _not_. Just to be absolutely clear, this is >> >> > NOT how you're supposed to use preempt_disable(). >> >> > >> >> > > sections that use per-cpu or similar resources. >> >> > > >> >> > > > >> >> > > > Originally-from: Riley Andrews >> >> > > > Signed-off-by: Todd Kjos >> >> > >> >> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct >> >> > > > binder_proc *proc, int flags) >> >> > > > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); >> >> > > > unlock_task_sighand(proc->tsk, &irqs); >> >> > > > >> >> > > > - return __alloc_fd(files, 0, rlim_cur, flags); >> >> > > > + preempt_enable_no_resched(); >> >> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags); >> >> > > > + preempt_disable(); >> >> > >> >> > And the fact that people want to use preempt_enable_no_resched() shows >> >> > that they're absolutely clueless. >> >> > >> >> > This is so broken its not funny. >> >> > >> >> > NAK NAK NAK >> >> >> >> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place >> >> documents clearly that this is tinkering and not proper software >> >> engineering. >> > >> > I have pointed out in the other thread for this patch (the one that had >> > a patch that could be applied) that the single lock in the binder code >> > is the main problem here, it should be solved instead of this messing >> > around with priorities. >> > >> >> While removing the single lock in the binder driver would help reduce >> the problem that this patch tries to work around, it would not fix it. >> The largest problems occur when a very low priority thread gets >> preempted while holding the lock. When a high priority thread then >> needs the same lock it can't get it. Changing the driver to use more >> fine-grained locking would reduce the set of threads that can trigger >> this problem, but there are processes that receive work from both high >> and low priority threads and could still end up in the same situation. > > Ok, but how would this patch solve the problem? It only reduces the > window of when the preemption could occur, it doesn't stop it from > happening entirely. > I agree, this patch does not _solve_ the problem either. It only reduces it, as there are many places where it re-enables preemption for significant work. >> A previous attempt to fix this problem, changed the lock to use >> rt_mutex instead of mutex, but this apparently did not work as well as >> this patch. I believe the added overhead was noticeable, and it did >> not work when the preempted thread was in a different cgroup (I don't >> know if this is still the case). > > Why would rt_mutex solve this? If the task that holds the lock would run when you try to get the lock, there would be no long delay to get the lock. rt_mutex seems to be designed to solve this problem. > > I worry that this is only going to get worse as more portions of the > Android userspace/HAL get rewritten to use binder more and more. What > about trying to get rid of the lock entirely? That way the task > priority levels would work "properly" here. > I think a good first step would be to switch to locks with a smaller scope. I'm not how useful it would be to eliminate locks in this driver since it calls into other code that uses locks. > thanks, > > greg k-h -- Arve Hjønnevåg
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote: > A previous attempt to fix this problem, changed the lock to use > rt_mutex instead of mutex, but this apparently did not work as well as > this patch. I believe the added overhead was noticeable, and it did > not work when the preempted thread was in a different cgroup (I don't > know if this is still the case). Do you actually have RR/FIFO/DL tasks? Currently PI isn't defined/implemented for OTHER. cgroups should be irrelevant, PI is unaware of them.
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote: > On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman > wrote: > > On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote: > >> On Sat, 10 Sep 2016, Peter Zijlstra wrote: > >> > >> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote: > >> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: > >> > > > In Android systems, the display pipeline relies on low > >> > > > latency binder transactions and is therefore sensitive to > >> > > > delays caused by contention for the global binder lock. > >> > > > Jank is siginificantly reduced by disabling preemption > >> > > > while the global binder lock is held. > >> > > > >> > > That's now how preempt_disable is supposed to use. It is for critical > >> > > >> > not, that's supposed to be _not_. Just to be absolutely clear, this is > >> > NOT how you're supposed to use preempt_disable(). > >> > > >> > > sections that use per-cpu or similar resources. > >> > > > >> > > > > >> > > > Originally-from: Riley Andrews > >> > > > Signed-off-by: Todd Kjos > >> > > >> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > >> > > > binder_proc *proc, int flags) > >> > > > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > >> > > > unlock_task_sighand(proc->tsk, &irqs); > >> > > > > >> > > > - return __alloc_fd(files, 0, rlim_cur, flags); > >> > > > + preempt_enable_no_resched(); > >> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags); > >> > > > + preempt_disable(); > >> > > >> > And the fact that people want to use preempt_enable_no_resched() shows > >> > that they're absolutely clueless. > >> > > >> > This is so broken its not funny. > >> > > >> > NAK NAK NAK > >> > >> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place > >> documents clearly that this is tinkering and not proper software > >> engineering. > > > > I have pointed out in the other thread for this patch (the one that had > > a patch that could be applied) that the single lock in the binder code > > is the main problem here, it should be solved instead of this messing > > around with priorities. > > > > While removing the single lock in the binder driver would help reduce > the problem that this patch tries to work around, it would not fix it. > The largest problems occur when a very low priority thread gets > preempted while holding the lock. When a high priority thread then > needs the same lock it can't get it. Changing the driver to use more > fine-grained locking would reduce the set of threads that can trigger > this problem, but there are processes that receive work from both high > and low priority threads and could still end up in the same situation. Ok, but how would this patch solve the problem? It only reduces the window of when the preemption could occur, it doesn't stop it from happening entirely. > A previous attempt to fix this problem, changed the lock to use > rt_mutex instead of mutex, but this apparently did not work as well as > this patch. I believe the added overhead was noticeable, and it did > not work when the preempted thread was in a different cgroup (I don't > know if this is still the case). Why would rt_mutex solve this? I worry that this is only going to get worse as more portions of the Android userspace/HAL get rewritten to use binder more and more. What about trying to get rid of the lock entirely? That way the task priority levels would work "properly" here. thanks, greg k-h
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman wrote: > On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote: >> On Sat, 10 Sep 2016, Peter Zijlstra wrote: >> >> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote: >> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: >> > > > In Android systems, the display pipeline relies on low >> > > > latency binder transactions and is therefore sensitive to >> > > > delays caused by contention for the global binder lock. >> > > > Jank is siginificantly reduced by disabling preemption >> > > > while the global binder lock is held. >> > > >> > > That's now how preempt_disable is supposed to use. It is for critical >> > >> > not, that's supposed to be _not_. Just to be absolutely clear, this is >> > NOT how you're supposed to use preempt_disable(). >> > >> > > sections that use per-cpu or similar resources. >> > > >> > > > >> > > > Originally-from: Riley Andrews >> > > > Signed-off-by: Todd Kjos >> > >> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct >> > > > binder_proc *proc, int flags) >> > > > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); >> > > > unlock_task_sighand(proc->tsk, &irqs); >> > > > >> > > > - return __alloc_fd(files, 0, rlim_cur, flags); >> > > > + preempt_enable_no_resched(); >> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags); >> > > > + preempt_disable(); >> > >> > And the fact that people want to use preempt_enable_no_resched() shows >> > that they're absolutely clueless. >> > >> > This is so broken its not funny. >> > >> > NAK NAK NAK >> >> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place >> documents clearly that this is tinkering and not proper software >> engineering. > > I have pointed out in the other thread for this patch (the one that had > a patch that could be applied) that the single lock in the binder code > is the main problem here, it should be solved instead of this messing > around with priorities. > While removing the single lock in the binder driver would help reduce the problem that this patch tries to work around, it would not fix it. The largest problems occur when a very low priority thread gets preempted while holding the lock. When a high priority thread then needs the same lock it can't get it. Changing the driver to use more fine-grained locking would reduce the set of threads that can trigger this problem, but there are processes that receive work from both high and low priority threads and could still end up in the same situation. A previous attempt to fix this problem, changed the lock to use rt_mutex instead of mutex, but this apparently did not work as well as this patch. I believe the added overhead was noticeable, and it did not work when the preempted thread was in a different cgroup (I don't know if this is still the case). It would be useful to generic solution to this problem. > So don't worry, I'm not taking this change :) > > thanks, > > greg k-h -- Arve Hjønnevåg
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
Thanks for the reviews. We'll come up with a different solution. On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman wrote: > On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote: >> On Sat, 10 Sep 2016, Peter Zijlstra wrote: >> >> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote: >> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: >> > > > In Android systems, the display pipeline relies on low >> > > > latency binder transactions and is therefore sensitive to >> > > > delays caused by contention for the global binder lock. >> > > > Jank is siginificantly reduced by disabling preemption >> > > > while the global binder lock is held. >> > > >> > > That's now how preempt_disable is supposed to use. It is for critical >> > >> > not, that's supposed to be _not_. Just to be absolutely clear, this is >> > NOT how you're supposed to use preempt_disable(). >> > >> > > sections that use per-cpu or similar resources. >> > > >> > > > >> > > > Originally-from: Riley Andrews >> > > > Signed-off-by: Todd Kjos >> > >> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct >> > > > binder_proc *proc, int flags) >> > > > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); >> > > > unlock_task_sighand(proc->tsk, &irqs); >> > > > >> > > > - return __alloc_fd(files, 0, rlim_cur, flags); >> > > > + preempt_enable_no_resched(); >> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags); >> > > > + preempt_disable(); >> > >> > And the fact that people want to use preempt_enable_no_resched() shows >> > that they're absolutely clueless. >> > >> > This is so broken its not funny. >> > >> > NAK NAK NAK >> >> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place >> documents clearly that this is tinkering and not proper software >> engineering. > > I have pointed out in the other thread for this patch (the one that had > a patch that could be applied) that the single lock in the binder code > is the main problem here, it should be solved instead of this messing > around with priorities. > > So don't worry, I'm not taking this change :) > > thanks, > > greg k-h
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote: > On Sat, 10 Sep 2016, Peter Zijlstra wrote: > > > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote: > > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: > > > > In Android systems, the display pipeline relies on low > > > > latency binder transactions and is therefore sensitive to > > > > delays caused by contention for the global binder lock. > > > > Jank is siginificantly reduced by disabling preemption > > > > while the global binder lock is held. > > > > > > That's now how preempt_disable is supposed to use. It is for critical > > > > not, that's supposed to be _not_. Just to be absolutely clear, this is > > NOT how you're supposed to use preempt_disable(). > > > > > sections that use per-cpu or similar resources. > > > > > > > > > > > Originally-from: Riley Andrews > > > > Signed-off-by: Todd Kjos > > > > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > > > > binder_proc *proc, int flags) > > > > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > > > > unlock_task_sighand(proc->tsk, &irqs); > > > > > > > > - return __alloc_fd(files, 0, rlim_cur, flags); > > > > + preempt_enable_no_resched(); > > > > + ret = __alloc_fd(files, 0, rlim_cur, flags); > > > > + preempt_disable(); > > > > And the fact that people want to use preempt_enable_no_resched() shows > > that they're absolutely clueless. > > > > This is so broken its not funny. > > > > NAK NAK NAK > > Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place > documents clearly that this is tinkering and not proper software > engineering. I have pointed out in the other thread for this patch (the one that had a patch that could be applied) that the single lock in the binder code is the main problem here, it should be solved instead of this messing around with priorities. So don't worry, I'm not taking this change :) thanks, greg k-h
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Sat, 10 Sep 2016, Peter Zijlstra wrote: > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote: > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: > > > In Android systems, the display pipeline relies on low > > > latency binder transactions and is therefore sensitive to > > > delays caused by contention for the global binder lock. > > > Jank is siginificantly reduced by disabling preemption > > > while the global binder lock is held. > > > > That's now how preempt_disable is supposed to use. It is for critical > > not, that's supposed to be _not_. Just to be absolutely clear, this is > NOT how you're supposed to use preempt_disable(). > > > sections that use per-cpu or similar resources. > > > > > > > > Originally-from: Riley Andrews > > > Signed-off-by: Todd Kjos > > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > > > binder_proc *proc, int flags) > > > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > > > unlock_task_sighand(proc->tsk, &irqs); > > > > > > - return __alloc_fd(files, 0, rlim_cur, flags); > > > + preempt_enable_no_resched(); > > > + ret = __alloc_fd(files, 0, rlim_cur, flags); > > > + preempt_disable(); > > And the fact that people want to use preempt_enable_no_resched() shows > that they're absolutely clueless. > > This is so broken its not funny. > > NAK NAK NAK Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place documents clearly that this is tinkering and not proper software engineering. NAK from my side as well. Thanks, Thomas
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote: > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: > > In Android systems, the display pipeline relies on low > > latency binder transactions and is therefore sensitive to > > delays caused by contention for the global binder lock. > > Jank is siginificantly reduced by disabling preemption > > while the global binder lock is held. > > That's now how preempt_disable is supposed to use. It is for critical not, that's supposed to be _not_. Just to be absolutely clear, this is NOT how you're supposed to use preempt_disable(). > sections that use per-cpu or similar resources. > > > > > Originally-from: Riley Andrews > > Signed-off-by: Todd Kjos > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > > binder_proc *proc, int flags) > > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > > unlock_task_sighand(proc->tsk, &irqs); > > > > - return __alloc_fd(files, 0, rlim_cur, flags); > > + preempt_enable_no_resched(); > > + ret = __alloc_fd(files, 0, rlim_cur, flags); > > + preempt_disable(); And the fact that people want to use preempt_enable_no_resched() shows that they're absolutely clueless. This is so broken its not funny. NAK NAK NAK
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: > In Android systems, the display pipeline relies on low > latency binder transactions and is therefore sensitive to > delays caused by contention for the global binder lock. > Jank is siginificantly reduced by disabling preemption > while the global binder lock is held. That's now how preempt_disable is supposed to use. It is for critical sections that use per-cpu or similar resources. > > Originally-from: Riley Andrews > Signed-off-by: Todd Kjos > --- > drivers/android/binder.c | 194 > +++ > 1 file changed, 146 insertions(+), 48 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 16288e7..c36e420 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > struct files_struct *files = proc->files; > unsigned long rlim_cur; > unsigned long irqs; > + int ret; > > if (files == NULL) > return -ESRCH; > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > unlock_task_sighand(proc->tsk, &irqs); > > - return __alloc_fd(files, 0, rlim_cur, flags); > + preempt_enable_no_resched(); > + ret = __alloc_fd(files, 0, rlim_cur, flags); > + preempt_disable(); > + > + return ret; > } > > /* > @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > static void task_fd_install( > struct binder_proc *proc, unsigned int fd, struct file *file) > { > - if (proc->files) > + if (proc->files) { > + preempt_enable_no_resched(); > __fd_install(proc->files, fd, file); > + preempt_disable(); > + } > } > > /* > @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag) > { > trace_binder_lock(tag); > mutex_lock(&binder_main_lock); > + preempt_disable(); > trace_binder_locked(tag); > } > > @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag) > { > trace_binder_unlock(tag); > mutex_unlock(&binder_main_lock); > + preempt_enable(); > +} > + > +static inline void *kzalloc_nopreempt(size_t size) > +{ > + void *ptr; > + > + ptr = kzalloc(size, GFP_NOWAIT); > + if (ptr) > + return ptr; > + > + preempt_enable_no_resched(); > + ptr = kzalloc(size, GFP_KERNEL); > + preempt_disable(); > + > + return ptr; > +} > + > +static inline long copy_to_user_nopreempt(void __user *to, > + const void *from, long n) > +{ > + long ret; > + > + preempt_enable_no_resched(); > + ret = copy_to_user(to, from, n); > + preempt_disable(); > + return ret; > +} > + > +static inline long copy_from_user_nopreempt(void *to, > +const void __user *from, > +long n) > +{ > + long ret; > + > + preempt_enable_no_resched(); > + ret = copy_from_user(to, from, n); > + preempt_disable(); > + return ret; > } > > +#define get_user_nopreempt(x, ptr) \ > +({ \ > + int __ret; \ > + preempt_enable_no_resched(); \ > + __ret = get_user(x, ptr); \ > + preempt_disable(); \ > + __ret; \ > +}) > + > +#define put_user_nopreempt(x, ptr) \ > +({ \ > + int __ret; \ > + preempt_enable_no_resched(); \ > + __ret = put_user(x, ptr); \ > + preempt_disable(); \ > + __ret; \ > +}) > + > static void binder_set_nice(long nice) > { > long min_nice; > @@ -568,6 +634,8 @@ static int binder_update_page_range(struct > binder_proc *proc, int allocate, > else > mm = get_task_mm(proc->tsk); > > + preempt_enable_no_resched(); > + > if (mm) { > down_write(&mm->mmap_sem); > vma = proc->vma; > @@ -622,6 +690,9 @@ static int binder_update_page_range(struct > binder_proc *proc, int allocate, > up_write(&mm->mmap_sem); > mmput(mm); > } > + > + preempt_disable(); > + > return 0; > > free_range: > @@ -644,6 +715,9 @@ err_no_vma: > up_write(&mm->mmap_sem); > mmput(mm); > } > + > + preempt_disable(); > + > return -ENOMEM; > } > > @@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct > binder_proc *proc, > return NULL; > } > > - node = kzalloc(sizeof(*node), GFP_KERNEL); > + node = kzalloc_nopreempt(sizeof(*node)); > if (node == NULL) > return NULL; > binder_stats_created(BINDER_STAT_NODE); > @@ -1040,7 +1114,7 @@ static struct binder_ref > *binder_get_ref_for_node(struct binder_proc *proc, > else > return ref; > } > - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); > + new_ref = kzalloc_nopreempt(sizeof(*ref)); > if (new_ref == NULL) > return NULL; > binder_stats_created(BINDER_STAT_REF); > @@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc > *proc, > e->to_proc = target_proc->pid; > > /* TODO: reuse incoming transaction for reply */ > - t = kzalloc(sizeof(*t), GFP_KERNEL); > + t = kzalloc_nopreempt(sizeof(*t)); > if (t == NULL) { > return_error = BR_FAILED_REPLY; > goto err_alloc_t_failed; > } > binder_stats_created(BINDER_STAT_TRANSACTION); > >
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Sat, Sep 10, 2016 at 01:18:47PM +0200, Greg KH wrote: > On Fri, Sep 09, 2016 at 10:39:32AM -0700, Todd Kjos wrote: > > On Fri, Sep 9, 2016 at 8:44 AM, Greg KH wrote: > > > On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote: > > >> From: Todd Kjos > > >> > > >> In Android systems, the display pipeline relies on low > > >> latency binder transactions and is therefore sensitive to > > >> delays caused by contention for the global binder lock. > > >> Jank is significantly reduced by disabling preemption > > >> while the global binder lock is held. > > > > > > What is the technical definition of "Jank"? :) > > > > I'll rephrase in the next version to "dropped or delayed frames". > > Heh, thanks :) > > Also in the next version can you fix the errors found by the 0-day build > bot? > > > >> This patch was originated by Riley Andrews > > >> with tweaks and forward-porting by me. > > >> > > >> Originally-from: Riley Andrews > > >> Signed-off-by: Todd Kjos > > >> --- > > >> drivers/android/binder.c | 194 > > >> +++ > > >> 1 file changed, 146 insertions(+), 48 deletions(-) > > >> > > >> diff --git a/drivers/android/binder.c b/drivers/android/binder.c > > >> index 16288e7..c36e420 100644 > > >> --- a/drivers/android/binder.c > > >> +++ b/drivers/android/binder.c > > >> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct > > >> binder_proc *proc, int flags) > > >> struct files_struct *files = proc->files; > > >> unsigned long rlim_cur; > > >> unsigned long irqs; > > >> + int ret; > > >> > > >> if (files == NULL) > > >> return -ESRCH; > > >> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > > >> binder_proc *proc, int flags) > > >> rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > > >> unlock_task_sighand(proc->tsk, &irqs); > > >> > > >> - return __alloc_fd(files, 0, rlim_cur, flags); > > >> + preempt_enable_no_resched(); > > >> + ret = __alloc_fd(files, 0, rlim_cur, flags); > > >> + preempt_disable(); > > >> + > > >> + return ret; > > >> } > > >> > > >> /* > > >> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct > > >> binder_proc *proc, int flags) > > >> static void task_fd_install( > > >> struct binder_proc *proc, unsigned int fd, struct file *file) > > >> { > > >> - if (proc->files) > > >> + if (proc->files) { > > >> + preempt_enable_no_resched(); > > >> __fd_install(proc->files, fd, file); > > >> + preempt_disable(); > > >> + } > > >> } > > >> > > >> /* > > >> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag) > > >> { > > >> trace_binder_lock(tag); > > >> mutex_lock(&binder_main_lock); > > >> + preempt_disable(); > > >> trace_binder_locked(tag); > > >> } > > >> > > >> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag) > > >> { > > >> trace_binder_unlock(tag); > > >> mutex_unlock(&binder_main_lock); > > >> + preempt_enable(); > > >> +} > > >> + > > >> +static inline void *kzalloc_nopreempt(size_t size) > > >> +{ > > >> + void *ptr; > > >> + > > >> + ptr = kzalloc(size, GFP_NOWAIT); > > >> + if (ptr) > > >> + return ptr; > > >> + > > >> + preempt_enable_no_resched(); > > >> + ptr = kzalloc(size, GFP_KERNEL); > > >> + preempt_disable(); > > > > > > Doesn't the allocator retry if the first one fails anyway? Why not > > > GFP_NOIO or GFP_ATOMIC? Have you really hit the second GFP_KERNEL > > > usage? > > > > I suspect we have hit the second, since we do get into cases where > > direct reclaim is needed. I can't confirm since I haven't instrumented > > this case. As you say, if we use GFP_ATOMIC instead, maybe we > > wouldn't, but even then I'd be concerned that we could deplete the > > memory reserved for atomic. The general idea of trying for a fast, > > nowait allocation and then enabling preempt for the rare potentially > > blocking allocation seems reasonable, doesn't it? > > Yes it is, so much so that I think there's a generic kernel function for > it already. Adding in the linux-mm mailing list to be told that I'm > wrong about this :) Ok, adding the correct linux-mm list address this time... greg k-h
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Fri, Sep 09, 2016 at 10:39:32AM -0700, Todd Kjos wrote: > On Fri, Sep 9, 2016 at 8:44 AM, Greg KH wrote: > > On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote: > >> From: Todd Kjos > >> > >> In Android systems, the display pipeline relies on low > >> latency binder transactions and is therefore sensitive to > >> delays caused by contention for the global binder lock. > >> Jank is significantly reduced by disabling preemption > >> while the global binder lock is held. > > > > What is the technical definition of "Jank"? :) > > I'll rephrase in the next version to "dropped or delayed frames". Heh, thanks :) Also in the next version can you fix the errors found by the 0-day build bot? > >> This patch was originated by Riley Andrews > >> with tweaks and forward-porting by me. > >> > >> Originally-from: Riley Andrews > >> Signed-off-by: Todd Kjos > >> --- > >> drivers/android/binder.c | 194 > >> +++ > >> 1 file changed, 146 insertions(+), 48 deletions(-) > >> > >> diff --git a/drivers/android/binder.c b/drivers/android/binder.c > >> index 16288e7..c36e420 100644 > >> --- a/drivers/android/binder.c > >> +++ b/drivers/android/binder.c > >> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc > >> *proc, int flags) > >> struct files_struct *files = proc->files; > >> unsigned long rlim_cur; > >> unsigned long irqs; > >> + int ret; > >> > >> if (files == NULL) > >> return -ESRCH; > >> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > >> binder_proc *proc, int flags) > >> rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > >> unlock_task_sighand(proc->tsk, &irqs); > >> > >> - return __alloc_fd(files, 0, rlim_cur, flags); > >> + preempt_enable_no_resched(); > >> + ret = __alloc_fd(files, 0, rlim_cur, flags); > >> + preempt_disable(); > >> + > >> + return ret; > >> } > >> > >> /* > >> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct > >> binder_proc *proc, int flags) > >> static void task_fd_install( > >> struct binder_proc *proc, unsigned int fd, struct file *file) > >> { > >> - if (proc->files) > >> + if (proc->files) { > >> + preempt_enable_no_resched(); > >> __fd_install(proc->files, fd, file); > >> + preempt_disable(); > >> + } > >> } > >> > >> /* > >> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag) > >> { > >> trace_binder_lock(tag); > >> mutex_lock(&binder_main_lock); > >> + preempt_disable(); > >> trace_binder_locked(tag); > >> } > >> > >> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag) > >> { > >> trace_binder_unlock(tag); > >> mutex_unlock(&binder_main_lock); > >> + preempt_enable(); > >> +} > >> + > >> +static inline void *kzalloc_nopreempt(size_t size) > >> +{ > >> + void *ptr; > >> + > >> + ptr = kzalloc(size, GFP_NOWAIT); > >> + if (ptr) > >> + return ptr; > >> + > >> + preempt_enable_no_resched(); > >> + ptr = kzalloc(size, GFP_KERNEL); > >> + preempt_disable(); > > > > Doesn't the allocator retry if the first one fails anyway? Why not > > GFP_NOIO or GFP_ATOMIC? Have you really hit the second GFP_KERNEL > > usage? > > I suspect we have hit the second, since we do get into cases where > direct reclaim is needed. I can't confirm since I haven't instrumented > this case. As you say, if we use GFP_ATOMIC instead, maybe we > wouldn't, but even then I'd be concerned that we could deplete the > memory reserved for atomic. The general idea of trying for a fast, > nowait allocation and then enabling preempt for the rare potentially > blocking allocation seems reasonable, doesn't it? Yes it is, so much so that I think there's a generic kernel function for it already. Adding in the linux-mm mailing list to be told that I'm wrong about this :) > >> + return ptr; > >> +} > >> + > >> +static inline long copy_to_user_nopreempt(void __user *to, > >> + const void *from, long n) > >> +{ > >> + long ret; > >> + > >> + preempt_enable_no_resched(); > >> + ret = copy_to_user(to, from, n); > >> + preempt_disable(); > >> + return ret; > >> +} > >> + > >> +static inline long copy_from_user_nopreempt(void *to, > >> + const void __user *from, > >> + long n) > >> +{ > >> + long ret; > >> + > >> + preempt_enable_no_resched(); > >> + ret = copy_from_user(to, from, n); > >> + preempt_disable(); > >> + return ret; > >> } > >> > >> +#define get_user_nopreempt(x, ptr) \ > >> +({ \ > >> + int __ret; \ > >> + preempt_enable_no_resched();\ > >> + __ret = get_user(x, ptr); \ > >> + preempt_
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Fri, Sep 9, 2016 at 8:44 AM, Greg KH wrote: > On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote: >> From: Todd Kjos >> >> In Android systems, the display pipeline relies on low >> latency binder transactions and is therefore sensitive to >> delays caused by contention for the global binder lock. >> Jank is significantly reduced by disabling preemption >> while the global binder lock is held. > > What is the technical definition of "Jank"? :) I'll rephrase in the next version to "dropped or delayed frames". > >> >> This patch was originated by Riley Andrews >> with tweaks and forward-porting by me. >> >> Originally-from: Riley Andrews >> Signed-off-by: Todd Kjos >> --- >> drivers/android/binder.c | 194 >> +++ >> 1 file changed, 146 insertions(+), 48 deletions(-) >> >> diff --git a/drivers/android/binder.c b/drivers/android/binder.c >> index 16288e7..c36e420 100644 >> --- a/drivers/android/binder.c >> +++ b/drivers/android/binder.c >> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc >> *proc, int flags) >> struct files_struct *files = proc->files; >> unsigned long rlim_cur; >> unsigned long irqs; >> + int ret; >> >> if (files == NULL) >> return -ESRCH; >> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc >> *proc, int flags) >> rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); >> unlock_task_sighand(proc->tsk, &irqs); >> >> - return __alloc_fd(files, 0, rlim_cur, flags); >> + preempt_enable_no_resched(); >> + ret = __alloc_fd(files, 0, rlim_cur, flags); >> + preempt_disable(); >> + >> + return ret; >> } >> >> /* >> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc >> *proc, int flags) >> static void task_fd_install( >> struct binder_proc *proc, unsigned int fd, struct file *file) >> { >> - if (proc->files) >> + if (proc->files) { >> + preempt_enable_no_resched(); >> __fd_install(proc->files, fd, file); >> + preempt_disable(); >> + } >> } >> >> /* >> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag) >> { >> trace_binder_lock(tag); >> mutex_lock(&binder_main_lock); >> + preempt_disable(); >> trace_binder_locked(tag); >> } >> >> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag) >> { >> trace_binder_unlock(tag); >> mutex_unlock(&binder_main_lock); >> + preempt_enable(); >> +} >> + >> +static inline void *kzalloc_nopreempt(size_t size) >> +{ >> + void *ptr; >> + >> + ptr = kzalloc(size, GFP_NOWAIT); >> + if (ptr) >> + return ptr; >> + >> + preempt_enable_no_resched(); >> + ptr = kzalloc(size, GFP_KERNEL); >> + preempt_disable(); > > Doesn't the allocator retry if the first one fails anyway? Why not > GFP_NOIO or GFP_ATOMIC? Have you really hit the second GFP_KERNEL > usage? I suspect we have hit the second, since we do get into cases where direct reclaim is needed. I can't confirm since I haven't instrumented this case. As you say, if we use GFP_ATOMIC instead, maybe we wouldn't, but even then I'd be concerned that we could deplete the memory reserved for atomic. The general idea of trying for a fast, nowait allocation and then enabling preempt for the rare potentially blocking allocation seems reasonable, doesn't it? >> + >> + return ptr; >> +} >> + >> +static inline long copy_to_user_nopreempt(void __user *to, >> + const void *from, long n) >> +{ >> + long ret; >> + >> + preempt_enable_no_resched(); >> + ret = copy_to_user(to, from, n); >> + preempt_disable(); >> + return ret; >> +} >> + >> +static inline long copy_from_user_nopreempt(void *to, >> + const void __user *from, >> + long n) >> +{ >> + long ret; >> + >> + preempt_enable_no_resched(); >> + ret = copy_from_user(to, from, n); >> + preempt_disable(); >> + return ret; >> } >> >> +#define get_user_nopreempt(x, ptr) \ >> +({ \ >> + int __ret; \ >> + preempt_enable_no_resched();\ >> + __ret = get_user(x, ptr); \ >> + preempt_disable(); \ >> + __ret; \ >> +}) >> + >> +#define put_user_nopreempt(x, ptr) \ >> +({ \ >> + int __ret; \ >> + preempt_enable_no_resched();\ >> + __ret = put_user(x, ptr); \ >> + preempt_disable(); \ >> + __ret; \ >> +}) > > Any reason some of these are #defines and some are static inline > functions? Not that I know of. I'll change to stat
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
Hi Todd, [auto build test WARNING on staging/staging-testing] [also build test WARNING on v4.8-rc5 next-20160909] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] [Suggest to use git(>=2.9.0) format-patch --base= (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on] [Check https://git-scm.com/docs/git-format-patch for more information] url: https://github.com/0day-ci/linux/commits/Todd-Kjos/android-binder-Disable-preemption-while-holding-the-global-binder-lock/20160909-23 config: x86_64-randconfig-x008-201636 (attached as .config) compiler: gcc-6 (Debian 6.1.1-9) 6.1.1 20160705 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All warnings (new ones prefixed by >>): drivers/android/binder.c: In function 'binder_thread_read': >> drivers/android/binder.c:2432:4: warning: this 'else' clause does not >> guard... [-Wmisleading-indentation] else ^~~~ drivers/android/binder.c:2434:5: note: ...this statement, but the latter is misleadingly indented as if it is guarded by the 'else' if (put_user_nopreempt(cmd, ^~ vim +/else +2432 drivers/android/binder.c da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21 2416 proc->pid, thread->pid, da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21 2417 node->debug_id, da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21 2418 (u64)node->ptr, da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21 2419 (u64)node->cookie); 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2420 } 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2421 } 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2422 } break; 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2423 case BINDER_WORK_DEAD_BINDER: 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2424 case BINDER_WORK_DEAD_BINDER_AND_CLEAR: 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2425 case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2426 struct binder_ref_death *death; 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2427 uint32_t cmd; 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2428 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2429 death = container_of(w, struct binder_ref_death, work); 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2430 if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2431 cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE; 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 @2432 else 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2433 cmd = BR_DEAD_BINDER; ddd4adb7 drivers/android/binder.c Todd Kjos 2016-09-09 2434 if (put_user_nopreempt(cmd, ddd4adb7 drivers/android/binder.c Todd Kjos 2016-09-09 2435 (uint32_t __user *) ptr)) 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2436 return -EFAULT; 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2437 ptr += sizeof(uint32_t); ddd4adb7 drivers/android/binder.c Todd Kjos 2016-09-09 2438 if (put_user_nopreempt(death->cookie, da49889d drivers/staging/android/binder.c Arve Hjønnevåg 2014-02-21 2439 (binder_uintptr_t __user *) ptr)) 355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 2440 return -EFAULT; :: The code at line 2432 was first introduced by commit :: 355b0502f6efea0ff9492753888772c96972d2a3 Revert "Staging: android: delete android drivers" :: TO: Greg Kroah-Hartman :: CC: Greg Kroah-Hartman --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: Binary data
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote: > From: Todd Kjos > > In Android systems, the display pipeline relies on low > latency binder transactions and is therefore sensitive to > delays caused by contention for the global binder lock. > Jank is significantly reduced by disabling preemption > while the global binder lock is held. What is the technical definition of "Jank"? :) > > This patch was originated by Riley Andrews > with tweaks and forward-porting by me. > > Originally-from: Riley Andrews > Signed-off-by: Todd Kjos > --- > drivers/android/binder.c | 194 > +++ > 1 file changed, 146 insertions(+), 48 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 16288e7..c36e420 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc > *proc, int flags) > struct files_struct *files = proc->files; > unsigned long rlim_cur; > unsigned long irqs; > + int ret; > > if (files == NULL) > return -ESRCH; > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc > *proc, int flags) > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > unlock_task_sighand(proc->tsk, &irqs); > > - return __alloc_fd(files, 0, rlim_cur, flags); > + preempt_enable_no_resched(); > + ret = __alloc_fd(files, 0, rlim_cur, flags); > + preempt_disable(); > + > + return ret; > } > > /* > @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc > *proc, int flags) > static void task_fd_install( > struct binder_proc *proc, unsigned int fd, struct file *file) > { > - if (proc->files) > + if (proc->files) { > + preempt_enable_no_resched(); > __fd_install(proc->files, fd, file); > + preempt_disable(); > + } > } > > /* > @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag) > { > trace_binder_lock(tag); > mutex_lock(&binder_main_lock); > + preempt_disable(); > trace_binder_locked(tag); > } > > @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag) > { > trace_binder_unlock(tag); > mutex_unlock(&binder_main_lock); > + preempt_enable(); > +} > + > +static inline void *kzalloc_nopreempt(size_t size) > +{ > + void *ptr; > + > + ptr = kzalloc(size, GFP_NOWAIT); > + if (ptr) > + return ptr; > + > + preempt_enable_no_resched(); > + ptr = kzalloc(size, GFP_KERNEL); > + preempt_disable(); Doesn't the allocator retry if the first one fails anyway? Why not GFP_NOIO or GFP_ATOMIC? Have you really hit the second GFP_KERNEL usage? > + > + return ptr; > +} > + > +static inline long copy_to_user_nopreempt(void __user *to, > + const void *from, long n) > +{ > + long ret; > + > + preempt_enable_no_resched(); > + ret = copy_to_user(to, from, n); > + preempt_disable(); > + return ret; > +} > + > +static inline long copy_from_user_nopreempt(void *to, > + const void __user *from, > + long n) > +{ > + long ret; > + > + preempt_enable_no_resched(); > + ret = copy_from_user(to, from, n); > + preempt_disable(); > + return ret; > } > > +#define get_user_nopreempt(x, ptr) \ > +({ \ > + int __ret; \ > + preempt_enable_no_resched();\ > + __ret = get_user(x, ptr); \ > + preempt_disable(); \ > + __ret; \ > +}) > + > +#define put_user_nopreempt(x, ptr) \ > +({ \ > + int __ret; \ > + preempt_enable_no_resched();\ > + __ret = put_user(x, ptr); \ > + preempt_disable(); \ > + __ret; \ > +}) Any reason some of these are #defines and some are static inline functions? Anyway, these all seem a bit strange to me, what type of latency spikes are you seeing that these changes resolve? Shouldn't that be an issue with the scheduler more than just the binder driver? I don't know of any other driver or IPC that does this type of thing with the scheduler in order to make things "go faster", so it feels wrong to me, and is probably why we don't have global functions like put_user_nopreempt() :) And is enabling and disabling preemption around single byte copies to/from userspace really a good idea? That seems like a lot of overhead you are now adding to your "fastpath" that you need to go even faster. And finally, I'm guessing this has passed the binder test suite that is out there for testing binder
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote: > In Android systems, the display pipeline relies on low > latency binder transactions and is therefore sensitive to > delays caused by contention for the global binder lock. > Jank is siginificantly reduced by disabling preemption > while the global binder lock is held. > > Originally-from: Riley Andrews So should the "From: " line be Riley? Did Riley sign off on this? > Signed-off-by: Todd Kjos > --- > drivers/android/binder.c | 194 > +++ > 1 file changed, 146 insertions(+), 48 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 16288e7..c36e420 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > struct files_struct *files = proc->files; > unsigned long rlim_cur; > unsigned long irqs; > + int ret; > > if (files == NULL) > return -ESRCH; > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > unlock_task_sighand(proc->tsk, &irqs); > > - return __alloc_fd(files, 0, rlim_cur, flags); > + preempt_enable_no_resched(); > + ret = __alloc_fd(files, 0, rlim_cur, flags); > + preempt_disable(); > + > + return ret; > } Your tabs are all eaten and broke the patch, so it can't be applied :( Can you try again? thanks, greg k-h
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
This was introduced in the 2015 Nexus devices and should have been submitted to the kernel then since we keep forward porting it to each new device. On Thu, Sep 8, 2016 at 9:12 AM, Todd Kjos wrote: > In Android systems, the display pipeline relies on low > latency binder transactions and is therefore sensitive to > delays caused by contention for the global binder lock. > Jank is siginificantly reduced by disabling preemption > while the global binder lock is held. > > Originally-from: Riley Andrews > Signed-off-by: Todd Kjos > --- > drivers/android/binder.c | 194 > +++ > 1 file changed, 146 insertions(+), 48 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 16288e7..c36e420 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > struct files_struct *files = proc->files; > unsigned long rlim_cur; > unsigned long irqs; > + int ret; > > if (files == NULL) > return -ESRCH; > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > unlock_task_sighand(proc->tsk, &irqs); > > - return __alloc_fd(files, 0, rlim_cur, flags); > + preempt_enable_no_resched(); > + ret = __alloc_fd(files, 0, rlim_cur, flags); > + preempt_disable(); > + > + return ret; > } > > /* > @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > static void task_fd_install( > struct binder_proc *proc, unsigned int fd, struct file *file) > { > - if (proc->files) > + if (proc->files) { > + preempt_enable_no_resched(); > __fd_install(proc->files, fd, file); > + preempt_disable(); > + } > } > > /* > @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag) > { > trace_binder_lock(tag); > mutex_lock(&binder_main_lock); > + preempt_disable(); > trace_binder_locked(tag); > } > > @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag) > { > trace_binder_unlock(tag); > mutex_unlock(&binder_main_lock); > + preempt_enable(); > +} > + > +static inline void *kzalloc_nopreempt(size_t size) > +{ > + void *ptr; > + > + ptr = kzalloc(size, GFP_NOWAIT); > + if (ptr) > + return ptr; > + > + preempt_enable_no_resched(); > + ptr = kzalloc(size, GFP_KERNEL); > + preempt_disable(); > + > + return ptr; > +} > + > +static inline long copy_to_user_nopreempt(void __user *to, > + const void *from, long n) > +{ > + long ret; > + > + preempt_enable_no_resched(); > + ret = copy_to_user(to, from, n); > + preempt_disable(); > + return ret; > +} > + > +static inline long copy_from_user_nopreempt(void *to, > +const void __user *from, > +long n) > +{ > + long ret; > + > + preempt_enable_no_resched(); > + ret = copy_from_user(to, from, n); > + preempt_disable(); > + return ret; > } > > +#define get_user_nopreempt(x, ptr) \ > +({ \ > + int __ret; \ > + preempt_enable_no_resched(); \ > + __ret = get_user(x, ptr); \ > + preempt_disable(); \ > + __ret; \ > +}) > + > +#define put_user_nopreempt(x, ptr) \ > +({ \ > + int __ret; \ > + preempt_enable_no_resched(); \ > + __ret = put_user(x, ptr); \ > + preempt_disable(); \ > + __ret; \ > +}) > + > static void binder_set_nice(long nice) > { > long min_nice; > @@ -568,6 +634,8 @@ static int binder_update_page_range(struct > binder_proc *proc, int allocate, > else > mm = get_task_mm(proc->tsk); > > + preempt_enable_no_resched(); > + > if (mm) { > down_write(&mm->mmap_sem); > vma = proc->vma; > @@ -622,6 +690,9 @@ static int binder_update_page_range(struct > binder_proc *proc, int allocate, > up_write(&mm->mmap_sem); > mmput(mm); > } > + > + preempt_disable(); > + > return 0; > > free_range: > @@ -644,6 +715,9 @@ err_no_vma: > up_write(&mm->mmap_sem); > mmput(mm); > } > + > + preempt_disable(); > + > return -ENOMEM; > } > > @@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct > binder_proc *proc, > return NULL; > } > > - node = kzalloc(sizeof(*node), GFP_KERNEL); > + node = kzalloc_nopreempt(sizeof(*node)); > if (node == NULL) > return NULL; > binder_stats_created(BINDER_STAT_NODE); > @@ -1040,7 +1114,7 @@ static struct binder_ref > *binder_get_ref_for_node(struct binder_proc *proc, > else > return ref; > } > - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); > + new_ref = kzalloc_nopreempt(sizeof(*ref)); > if (new_ref == NULL) > return NULL; > binder_stats_created(BINDER_STAT_REF); > @@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc > *proc, > e->to_proc = target_proc->pid; > > /* TODO: reuse incoming transaction for reply */ > - t = kzalloc(sizeof(*t), GFP_KERNEL); > + t = kzalloc_nopreempt(sizeof(*t)); > if (t == NULL) { > return_error = BR_FAILED_REPLY; > goto err_alloc_t_failed; > } > binder_stats_created(BINDER_STAT_TRANSACTION