On Thu, Jul 20, 2023 at 2:22 PM Alexander Aring <aahri...@redhat.com> wrote: > This patch fixes the current handling of F_CANCELLK by not just doing a > unlock as we need to try to cancel a lock at first. A unlock makes sense > on a non-blocking lock request but if it's a blocking lock request we > need to cancel the request until it's not granted yet. This patch is fixing > this behaviour by first try to cancel a lock request and if it's failed > it's unlocking the lock which seems to be granted.
I don't like how this is implemented, but the problem already starts in commit 57e2c2f2d94c ("fs: dlm: fix mismatch of plock results from userspace"). That commit relies on how dlm_controld is implemented internally; it requires dlm_controld to keep all replies to non-blocking requests in perfect order. The correctness of that approach is more difficult to argue for than it should be, the code might break in non-obvious ways, and it limits the kinds of things dlm_controld will be able to do in the future. And this change relies on the same flawed design. Instead, when noticing that we don't have a way to uniquely identify requests, such a way should just have been introduced. The CANCEL request would then carry the same unique identifier as the original locking request, dlm_controld would reply either to the original locking request or to the cancel request, and the kernel would have no problem matching the reply to the request. But without unique request identifiers, we now end up with those redundant -ENOENT replies to CANCEL requests and with those essentially duplicate entries for the same request on recv_list. Not great. Andreas > Note: currently the nfs locking handling was disabled by commit > 40595cdc93ed ("nfs: block notification on fs with its own ->lock"). > However DLM was never being updated regarding to this change. Future > patches will try to fix lockd lock requests for DLM. This patch is > currently assuming the upstream DLM lockd handling is correct. > --- > fs/dlm/plock.c | 103 +++++++++++++++++++++++++++++++++----- > fs/gfs2/file.c | 9 ++-- > fs/ocfs2/stack_user.c | 13 ++--- > include/linux/dlm_plock.h | 2 + > 4 files changed, 98 insertions(+), 29 deletions(-) > > diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c > index a8ffa0760913..943d9f8e5564 100644 > --- a/fs/dlm/plock.c > +++ b/fs/dlm/plock.c > @@ -42,6 +42,27 @@ static inline void set_version(struct dlm_plock_info *info) > info->version[2] = DLM_PLOCK_VERSION_PATCH; > } > > +static struct plock_op *plock_lookup_waiter(const struct dlm_plock_info > *info) > +{ > + struct plock_op *op = NULL, *iter; > + > + list_for_each_entry(iter, &recv_list, list) { > + if (iter->info.fsid == info->fsid && > + iter->info.number == info->number && > + iter->info.owner == info->owner && > + iter->info.pid == info->pid && > + iter->info.start == info->start && > + iter->info.end == info->end && > + iter->info.ex == info->ex && > + iter->info.wait) { > + op = iter; > + break; > + } > + } > + > + return op; > +} > + > static int check_version(struct dlm_plock_info *info) > { > if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || > @@ -334,6 +355,74 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 > number, struct file *file, > } > EXPORT_SYMBOL_GPL(dlm_posix_unlock); > > +/* > + * NOTE: This implementation can only handle async lock requests as nfs > + * do it. It cannot handle cancellation of a pending lock request sitting > + * in wait_event(), but for now only nfs is the only user local kernel > + * user. > + */ > +int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file > *file, > + struct file_lock *fl) > +{ > + struct dlm_plock_info info; > + struct plock_op *op; > + struct dlm_ls *ls; > + int rv; > + > + /* this only works for async request for now and nfs is the only > + * kernel user right now. > + */ > + if (WARN_ON_ONCE(!fl->fl_lmops || !fl->fl_lmops->lm_grant)) > + return -EOPNOTSUPP; > + > + ls = dlm_find_lockspace_local(lockspace); > + if (!ls) > + return -EINVAL; > + > + memset(&info, 0, sizeof(info)); > + info.pid = fl->fl_pid; > + info.ex = (fl->fl_type == F_WRLCK); > + info.fsid = ls->ls_global_id; > + dlm_put_lockspace(ls); > + info.number = number; > + info.start = fl->fl_start; > + info.end = fl->fl_end; > + info.owner = (__u64)fl->fl_pid; > + > + rv = do_lock_cancel(&info); > + switch (rv) { > + case 0: > + spin_lock(&ops_lock); > + /* lock request to cancel must be on recv_list because > + * do_lock_cancel() synchronizes it. > + */ > + op = plock_lookup_waiter(&info); > + if (WARN_ON_ONCE(!op)) { > + rv = -ENOLCK; > + break; > + } > + > + list_del(&op->list); > + spin_unlock(&ops_lock); > + WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK); > + op->data->callback(op->data->fl, -EINTR); > + dlm_release_plock_op(op); > + rv = -EINTR; > + break; > + case -ENOENT: > + /* if cancel wasn't successful we probably were to late > + * or it was a non-blocking lock request, so just unlock it. > + */ > + rv = dlm_posix_unlock(lockspace, number, file, fl); > + break; > + default: > + break; > + } > + > + return rv; > +} > +EXPORT_SYMBOL_GPL(dlm_posix_cancel); > + > int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, > struct file_lock *fl) > { > @@ -457,19 +546,7 @@ static ssize_t dev_write(struct file *file, const char > __user *u, size_t count, > */ > spin_lock(&ops_lock); > if (info.wait) { > - list_for_each_entry(iter, &recv_list, list) { > - if (iter->info.fsid == info.fsid && > - iter->info.number == info.number && > - iter->info.owner == info.owner && > - iter->info.pid == info.pid && > - iter->info.start == info.start && > - iter->info.end == info.end && > - iter->info.ex == info.ex && > - iter->info.wait) { > - op = iter; > - break; > - } > - } > + op = plock_lookup_waiter(&info); > } else { > list_for_each_entry(iter, &recv_list, list) { > if (!iter->info.wait) { > diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c > index 1bf3c4453516..386eceb2f574 100644 > --- a/fs/gfs2/file.c > +++ b/fs/gfs2/file.c > @@ -1436,17 +1436,14 @@ static int gfs2_lock(struct file *file, int cmd, > struct file_lock *fl) > > if (!(fl->fl_flags & FL_POSIX)) > return -ENOLCK; > - if (cmd == F_CANCELLK) { > - /* Hack: */ > - cmd = F_SETLK; > - fl->fl_type = F_UNLCK; > - } > if (unlikely(gfs2_withdrawn(sdp))) { > if (fl->fl_type == F_UNLCK) > locks_lock_file_wait(file, fl); > return -EIO; > } > - if (IS_GETLK(cmd)) > + if (cmd == F_CANCELLK) > + return dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl); > + else if (IS_GETLK(cmd)) > return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); > else if (fl->fl_type == F_UNLCK) > return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); > diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c > index 05d4414d0c33..9b76ee66aeb2 100644 > --- a/fs/ocfs2/stack_user.c > +++ b/fs/ocfs2/stack_user.c > @@ -738,18 +738,11 @@ static int user_plock(struct ocfs2_cluster_connection > *conn, > * > * Internally, fs/dlm will pass these to a misc device, which > * a userspace daemon will read and write to. > - * > - * For now, cancel requests (which happen internally only), > - * are turned into unlocks. Most of this function taken from > - * gfs2_lock. > */ > > - if (cmd == F_CANCELLK) { > - cmd = F_SETLK; > - fl->fl_type = F_UNLCK; > - } > - > - if (IS_GETLK(cmd)) > + if (cmd == F_CANCELLK) > + return dlm_posix_cancel(conn->cc_lockspace, ino, file, fl); > + else if (IS_GETLK(cmd)) > return dlm_posix_get(conn->cc_lockspace, ino, file, fl); > else if (fl->fl_type == F_UNLCK) > return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); > diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h > index e6d76e8715a6..15fc856d198c 100644 > --- a/include/linux/dlm_plock.h > +++ b/include/linux/dlm_plock.h > @@ -11,6 +11,8 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, > struct file *file, > int cmd, struct file_lock *fl); > int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file > *file, > struct file_lock *fl); > +int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file > *file, > + struct file_lock *fl); > int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, > struct file_lock *fl); > #endif > -- > 2.31.1 >