On Tue, Sep 01, 2009 at 07:24:24AM -0700, Davide Libenzi wrote:
> On Tue, 1 Sep 2009, Avi Kivity wrote:
> 
> > On 09/01/2009 02:45 AM, Davide Libenzi wrote:
> > > On Thu, 27 Aug 2009, Davide Libenzi wrote:
> > > 
> > >    
> > > > On Thu, 27 Aug 2009, Michael S. Tsirkin wrote:
> > > > 
> > > >      
> > > > > Oh, I stopped pushing EFD_STATE since we have a solution.
> > > > >        
> > > > Do you guys need the kernel-side eventfd_ctx_read() I posted or not?
> > > > Because if nobody uses it, I'm not going to push it.
> > > >      
> > > Guys, I did not get a reply on this. Do you need me to push it, or you're
> > > not going to use it at the end?
> > >    
> > 
> > We'll use it eventually, but we're still some ways from it.
> 
> OK, then bug me when you're going to need it. I won't push it before that.
> 
> 
> - Davide

So, it turns out that we need this: be thought we don't because
currently kvm does not zero eventfd counter when it polls eventfd.  But
this causes spurious interrupts when we disconnect irqfd from kvm and
re-connect it back.

However, since kvm does its own thing with the wait queue, and might
read the counter from wait queue callback (which might be from
interrupt context), a simpler, lower-level interface would be better for
us.  Does the following (build tested only) look palatable?

Thanks!


diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402f..e350ffd 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,6 +135,17 @@ static unsigned int eventfd_poll(struct file *file, 
poll_table *wait)
        return events;
 }
 
+/* Caller must have wait queue head lock. */
+ssize_t _eventfd_read_ctx(struct eventfd_ctx *ctx, u64 *ucnt)
+{
+       if (!ctx->count)
+               return -EAGAIN;
+       *ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+       ctx->count -= *ucnt;
+       return sizeof *ucnt;
+}
+EXPORT_SYMBOL_GPL(_eventfd_read_ctx);
+
 static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
                            loff_t *ppos)
 {
@@ -146,17 +157,14 @@ static ssize_t eventfd_read(struct file *file, char 
__user *buf, size_t count,
        if (count < sizeof(ucnt))
                return -EINVAL;
        spin_lock_irq(&ctx->wqh.lock);
-       res = -EAGAIN;
-       if (ctx->count > 0)
-               res = sizeof(ucnt);
-       else if (!(file->f_flags & O_NONBLOCK)) {
+       res = _eventfd_read_ctx(ctx, &ucnt);
+       if (res < 0 && !(file->f_flags & O_NONBLOCK)) {
                __add_wait_queue(&ctx->wqh, &wait);
                for (res = 0;;) {
                        set_current_state(TASK_INTERRUPTIBLE);
-                       if (ctx->count > 0) {
-                               res = sizeof(ucnt);
+                       res = _eventfd_read_ctx(ctx, &ucnt);
+                       if (res > 0)
                                break;
-                       }
                        if (signal_pending(current)) {
                                res = -ERESTARTSYS;
                                break;
@@ -169,8 +177,6 @@ static ssize_t eventfd_read(struct file *file, char __user 
*buf, size_t count,
                __set_current_state(TASK_RUNNING);
        }
        if (likely(res > 0)) {
-               ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
-               ctx->count -= ucnt;
                if (waitqueue_active(&ctx->wqh))
                        wake_up_locked_poll(&ctx->wqh, POLLOUT);
        }
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 94dd103..a3d0ce9 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -34,6 +34,7 @@ struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 int eventfd_signal(struct eventfd_ctx *ctx, int n);
+ssize_t _eventfd_read_ctx(struct eventfd_ctx *ctx, u64 *ucnt);
 
 #else /* CONFIG_EVENTFD */
 
@@ -61,6 +62,11 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
 
 }
 
+static inline ssize_t _eventfd_read_ctx(struct eventfd_ctx *ctx, u64 *ucnt)
+{
+       return -ENOSYS;
+}
+
 #endif
 
 #endif /* _LINUX_EVENTFD_H */


-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to