On Sun, 10 Jan 2010, Michael S. Tsirkin wrote:
> On Sun, Jan 10, 2010 at 09:27:34AM -0800, Davide Libenzi wrote:
> > On Sun, 10 Jan 2010, Michael S. Tsirkin wrote:
> >
> > > Not sure what you mean by irqfd locking. IMO we must take waitqueue
> > > lock, otherwise we can not prevent wait queue callback from being
> > > invoked, right? Note that if we take our own lock under wait queue
> > > callback, we won't be able to use this lock around remove_wait_queue.
> > > Also note how fs/evetfd.c uses __remove_wait_queue after taking wait
> > > queue lock - we'd like to do the same with our wait queue.
> > >
> > > Could you clarify pls?
> >
> > Wait a second. Looking at the current code in Linus tree, when you
> > deassign an irqfd, you are actually destroying it, so the idea of saving
> > the counter on deassign is not going to work.
> >
> >
> >
> > - Davide
>
> Yes, the only context passed between deassign and assign is the eventfd
> itself. So I think the following code for deassign would work provided
> eventfd_ctx_read_locked works with wait queue lock taken:
>
> spin_lock_irqsave(&irqfd->wqh.lock, flags);
> eventfd_ctx_read_locked(ctx->eventfd, 1, &ucnt);
> __remove_wait_queue(irqfd->wqh, &irqfd->wait);
> spin_lock_irqrestore(&irqfd->wqh.lock, flags);
As I said, you cannot do that from KVM, since write-witers will nedd to be
woke up.
Use the eventfd_ctx_remove_wait_queue() below (NOTE: compiled, not tested).
- Davide
---
fs/eventfd.c | 88 +++++++++++++++++++++++++++++++++++++++---------
include/linux/eventfd.h | 15 ++++++++
2 files changed, 88 insertions(+), 15 deletions(-)
Index: linux-2.6.mod/fs/eventfd.c
===================================================================
--- linux-2.6.mod.orig/fs/eventfd.c 2010-01-06 18:33:27.000000000 -0800
+++ linux-2.6.mod/fs/eventfd.c 2010-01-10 11:02:00.000000000 -0800
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct
return events;
}
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
- loff_t *ppos)
+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+{
+ *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1: ctx->count;
+ ctx->count -= *cnt;
+}
+
+/**
+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait
queue.
+ * @ctx: [in] Pointer to eventfd context.
+ * @wait: [in] Wait queue to be removed.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN : The operation would have blocked.
+ *
+ * This is used to atomically remove a wait queue entry from the eventfd wait
+ * queue head, and read/reset the counter value.
+ */
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+ __u64 *cnt)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->wqh.lock, flags);
+ eventfd_ctx_do_read(ctx, cnt);
+ __remove_wait_queue(&ctx->wqh, wait);
+ if (*cnt != 0 && waitqueue_active(&ctx->wqh))
+ wake_up_locked_poll(&ctx->wqh, POLLOUT);
+ spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+
+ return *cnt != 0 ? 0: -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
+
+/**
+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
+ * @ctx: [in] Pointer to eventfd context.
+ * @no_wait: [in] Different from zero if the operation should not block.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN : The operation would have blocked but @no_wait was nonzero.
+ * -ERESTARTSYS : A signal interrupted the wait operation.
+ *
+ * If @no_wait is zero, the function might sleep until the eventfd internal
+ * counter becomes greater than zero.
+ */
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
{
- struct eventfd_ctx *ctx = file->private_data;
ssize_t res;
- __u64 ucnt = 0;
DECLARE_WAITQUEUE(wait, current);
- if (count < sizeof(ucnt))
- return -EINVAL;
spin_lock_irq(&ctx->wqh.lock);
+ *cnt = 0;
res = -EAGAIN;
if (ctx->count > 0)
- res = sizeof(ucnt);
- else if (!(file->f_flags & O_NONBLOCK)) {
+ res = 0;
+ else if (!no_wait) {
__add_wait_queue(&ctx->wqh, &wait);
- for (res = 0;;) {
+ for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (ctx->count > 0) {
- res = sizeof(ucnt);
+ res = 0;
break;
}
if (signal_pending(current)) {
@@ -168,18 +213,31 @@ static ssize_t eventfd_read(struct file
__remove_wait_queue(&ctx->wqh, &wait);
__set_current_state(TASK_RUNNING);
}
- if (likely(res > 0)) {
- ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
- ctx->count -= ucnt;
+ if (likely(res == 0)) {
+ eventfd_ctx_do_read(ctx, cnt);
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, POLLOUT);
}
spin_unlock_irq(&ctx->wqh.lock);
- if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
- return -EFAULT;
return res;
}
+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
+
+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct eventfd_ctx *ctx = file->private_data;
+ ssize_t res;
+ __u64 cnt;
+
+ if (count < sizeof(cnt))
+ return -EINVAL;
+ if ((res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt)) < 0)
+ return res;
+
+ return put_user(cnt, (__u64 __user *) buf) ? -EFAULT: sizeof(cnt);
+}
static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t
count,
loff_t *ppos)
Index: linux-2.6.mod/include/linux/eventfd.h
===================================================================
--- linux-2.6.mod.orig/include/linux/eventfd.h 2010-01-06 18:33:27.000000000
-0800
+++ linux-2.6.mod/include/linux/eventfd.h 2010-01-10 10:57:06.000000000
-0800
@@ -34,6 +34,9 @@ struct file *eventfd_fget(int fd);
struct eventfd_ctx *eventfd_ctx_fdget(int fd);
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
int eventfd_signal(struct eventfd_ctx *ctx, int n);
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+ __u64 *cnt);
#else /* CONFIG_EVENTFD */
@@ -61,6 +64,18 @@ static inline void eventfd_ctx_put(struc
}
+static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
+ __u64 *cnt)
+{
+ return -ENOSYS;
+}
+
+static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
+ wait_queue_t *wait, __u64 *cnt)
+{
+ return -ENOSYS;
+}
+
#endif
#endif /* _LINUX_EVENTFD_H */
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html