The patch titled
pi-futex fixes
has been added to the -mm tree. Its filename is
pi-futex-fixes.patch
*** Remember to use Documentation/SubmitChecklist when testing your code ***
See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this
------------------------------------------------------
Subject: pi-futex fixes
From: Alexey Kuznetsov <[EMAIL PROTECTED]>
1. New entries can be added to tsk->pi_state_list after task completed
exit_pi_state_list(). The result is memory leakage and deadlocks.
2. handle_mm_fault() is called under spinlock. The result is obvious.
3. results in self-inflicted deadlock inside glibc.
Sometimes futex_lock_pi returns -ESRCH, when it is not expected
and glibc enters to for(;;) sleep() to simulate deadlock. This problem
is quite obvious and I think the patch is right. Though it looks like
each "if" in futex_lock_pi() got some stupid special case "else if". :-)
4. sometimes futex_lock_pi() returns -EDEADLK,
when nobody has the lock. The reason is also obvious (see comment
in the patch), but correct fix is far beyond my comprehension.
I guess someone already saw this, the chunk:
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
ret = 0;
is obviously from the same opera. But it does not work, because the
rtmutex is really taken at this point: wake_futex_pi() of previous
owner reassigned it to us. My fix works. But it looks very stupid.
I would think about removal of shift of ownership in wake_futex_pi()
and making all the work in context of process taking lock.
(akpm: this doesn't vaguely apply to 2.6.21, but 2.6.21 need fixing!)
Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
Cc: Thomas Gleixner <[EMAIL PROTECTED]>
Cc: <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
---
kernel/futex.c | 73 ++++++++++++++++++++++++++++++++++++++---------
1 files changed, 60 insertions(+), 13 deletions(-)
diff -puN kernel/futex.c~pi-futex-fixes kernel/futex.c
--- a/kernel/futex.c~pi-futex-fixes
+++ a/kernel/futex.c
@@ -430,10 +430,6 @@ static struct task_struct * futex_find_g
p = NULL;
goto out_unlock;
}
- if (p->exit_state != 0) {
- p = NULL;
- goto out_unlock;
- }
get_task_struct(p);
out_unlock:
rcu_read_unlock();
@@ -540,6 +536,19 @@ lookup_pi_state(u32 uval, struct futex_h
if (!p)
return -ESRCH;
+ /*
+ * Task state transitions are protected only by tasklist_lock
+ * and by nothing else. Better solution would be
+ * to use ->pi_lock. But this should be done in do_exit() as well.
+ * So, I use tasklist_lock for now.
+ */
+ read_lock(&tasklist_lock);
+ if (p->exit_state) {
+ read_unlock(&tasklist_lock);
+ put_task_struct(p);
+ return -ESRCH;
+ }
+
pi_state = alloc_pi_state();
/*
@@ -557,6 +566,8 @@ lookup_pi_state(u32 uval, struct futex_h
pi_state->owner = p;
spin_unlock_irq(&p->pi_lock);
+ read_unlock(&tasklist_lock);
+
put_task_struct(p);
*ps = pi_state;
@@ -1174,7 +1185,7 @@ static int futex_requeue(u32 __user *uad
#ifdef CONFIG_DEBUG_PI_LIST
this->list.plist.lock = &hb2->lock;
#endif
- }
+ }
this->key = key2;
get_futex_key_refs(&key2);
drop_count++;
@@ -1371,7 +1382,7 @@ static int fixup_pi_state_owner(u32 __us
curval = futex_atomic_cmpxchg_inatomic(uaddr,
uval, newval);
if (curval == -EFAULT)
- ret = -EFAULT;
+ ret = -EFAULT;
if (curval == uval)
break;
uval = curval;
@@ -1709,6 +1720,7 @@ static int futex_lock_pi(u32 __user *uad
if (unlikely(ret != 0))
goto out_release_sem;
+ retry_unlocked:
hb = queue_lock(&q, -1, NULL);
retry_locked:
@@ -1813,6 +1825,19 @@ static int futex_lock_pi(u32 __user *uad
if (unlikely(curval != uval))
goto retry_locked;
ret = 0;
+ } else if (ret == -ESRCH) {
+ /*
+ * Process could exit right now, so that robust
+ * list was processed after we got uval. Retry:
+ */
+ pagefault_disable();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr,
+ uval, uval);
+ pagefault_enable();
+ if (unlikely(curval == -EFAULT))
+ goto uaddr_faulted;
+ if (unlikely(curval != uval))
+ goto retry_locked;
}
goto out_unlock_release_sem;
}
@@ -1861,6 +1886,22 @@ static int futex_lock_pi(u32 __user *uad
if (ret && q.pi_state->owner == curr) {
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
ret = 0;
+ /*
+ * Holy crap... Now futex lock returns -EDEADLK
+ * sometimes, because ownership was passed to us while
+ * unlock of previous owner. Who wrote this?
+ * Please, fix this correctly. For now:
+ */
+ if (ret == -EDEADLK) {
+ pagefault_disable();
+ uval = futex_atomic_cmpxchg_inatomic(uaddr,
+ 0, 0);
+ pagefault_enable();
+ if (uval != -EFAULT &&
+ (uval & FUTEX_TID_MASK) == current->pid) {
+ ret = 0;
+ }
+ }
}
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
@@ -1887,16 +1928,19 @@ static int futex_lock_pi(u32 __user *uad
* non-atomically. Therefore, if get_user below is not
* enough, we need to handle the fault ourselves, while
* still holding the mmap_sem.
+ *
+ * ... and hb->lock. :-) --ANK
*/
+ queue_unlock(&q, hb);
+
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, fshared,
attempt);
if (ret)
- goto out_unlock_release_sem;
- goto retry_locked;
+ goto out_release_sem;
+ goto retry_unlocked;
}
- queue_unlock(&q, hb);
if (fshared)
up_read(fshared);
@@ -1940,9 +1984,9 @@ retry:
goto out;
hb = hash_futex(&key);
+retry_unlocked:
spin_lock(&hb->lock);
-retry_locked:
/*
* To avoid races, try to do the TID -> 0 atomic transition
* again. If it succeeds then we can return without waking
@@ -2005,16 +2049,19 @@ pi_faulted:
* non-atomically. Therefore, if get_user below is not
* enough, we need to handle the fault ourselves, while
* still holding the mmap_sem.
+ *
+ * ... and hb->lock. --ANK
*/
+ spin_unlock(&hb->lock);
+
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, fshared,
attempt);
if (ret)
- goto out_unlock;
- goto retry_locked;
+ goto out;
+ goto retry_unlocked;
}
- spin_unlock(&hb->lock);
if (fshared)
up_read(fshared);
_
Patches currently in -mm which might be from [EMAIL PROTECTED] are
pi-futex-fixes.patch
-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html