Hi,
The new locking code in 2.4.0 contains a number of semaphore
deadlocks since it fails to drop semaphores when calling the
filesystem notification routines.
Basically, the fact that it needs to do local management using
the posix_*() exported API means that knfsd will currently
deadlock on the file_lock_sem. In addition, the fact that the client
nfs is being called when this semaphore is held means that any
attempt to use locking on a loopback NFS mount deadlocks, since the
lockd daemon (which is supposed to service the request) cannot grab
file_lock_sem.
In addition, there seems to be some confusion as to who is notifying
who. I'd like to remind that the file->f_op->lock() routine should
only be called whenever we want the nfs client to
request/relinquish/test a lock from the *external server*.
Once that has been done, we call the posix_*() routines in order to
notify the file locking code of the result. It is therefore wrong for
the posix routines to be calling file->f_op->lock() every time that
fs/locks.c does some internal merging/splitting of the file_lock
structures.
The following patch attempts to address these 2 issues. Comments?
Cheers,
Trond
--- linux-2.4.0-test10-pre5/fs/locks.c Tue Oct 24 21:09:24 2000
+++ linux-2.4.0-test10-fix_locks/fs/locks.c Wed Oct 25 02:36:30 2000
@@ -436,21 +436,28 @@
{
while (!list_empty(&blocker->fl_block)) {
struct file_lock *waiter = list_entry(blocker->fl_block.next, struct
file_lock, fl_block);
+
+ if (!wait) {
+ /* Remove waiter from the block list, because by the
+ * time it wakes up blocker won't exist any more.
+ */
+ locks_delete_block(waiter);
+ }
/* N.B. Is it possible for the notify function to block?? */
- if (waiter->fl_notify)
+ if (waiter->fl_notify) {
+ release_fl_sem();
waiter->fl_notify(waiter);
- wake_up(&waiter->fl_wait);
+ acquire_fl_sem();
+ } else
+ wake_up(&waiter->fl_wait);
if (wait) {
/* Let the blocked process remove waiter from the
* block list when it gets scheduled.
*/
current->policy |= SCHED_YIELD;
+ release_fl_sem();
schedule();
- } else {
- /* Remove waiter from the block list, because by the
- * time it wakes up blocker won't exist any more.
- */
- locks_delete_block(waiter);
+ acquire_fl_sem();
}
}
}
@@ -466,8 +473,11 @@
fl->fl_next = *pos;
*pos = fl;
- if (fl->fl_insert)
+ if (fl->fl_insert) {
+ release_fl_sem();
fl->fl_insert(fl);
+ acquire_fl_sem();
+ }
}
/* Delete a lock and then free it.
@@ -477,7 +487,6 @@
*/
static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait)
{
- int (*lock)(struct file *, int, struct file_lock *);
struct file_lock *fl = *thisfl_p;
*thisfl_p = fl->fl_next;
@@ -492,16 +501,32 @@
fl->fl_fasync = NULL;
}
- if (fl->fl_remove)
+ if (fl->fl_remove) {
+ release_fl_sem();
fl->fl_remove(fl);
+ acquire_fl_sem();
+ }
locks_wake_up_blocks(fl, wait);
- lock = fl->fl_file->f_op->lock;
- if (lock) {
+ locks_free_lock(fl);
+}
+
+/*
+ * Call back client filesystem in order to get it to unregister a lock,
+ * then delete lock. Essentially useful only in locks_remove_*().
+ */
+static inline void locks_unlock_delete(struct file_lock **thisfl_p)
+{
+ struct file_lock *fl = *thisfl_p;
+ int (*lock)(struct file *, int, struct file_lock *);
+
+ if ((lock = fl->fl_file->f_op->lock) != NULL) {
fl->fl_type = F_UNLCK;
+ release_fl_sem();
lock(fl->fl_file, F_SETLK, fl);
+ acquire_fl_sem();
}
- locks_free_lock(fl);
+ locks_delete_lock(thisfl_p, 0);
}
/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
@@ -1644,11 +1669,12 @@
return;
}
acquire_fl_sem();
+ again:
before = &inode->i_flock;
while ((fl = *before) != NULL) {
if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) {
- locks_delete_lock(before, 0);
- continue;
+ locks_unlock_delete(before);
+ goto again;
}
before = &fl->fl_next;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]