Author: wulf
Date: Tue Dec  3 23:11:40 2019
New Revision: 355372
URL: https://svnweb.freebsd.org/changeset/base/355372

Log:
  MFC r355065 - r355068: Linux epoll improvements.
  
  r355065:
  Linux epoll: Don't deregister file descriptor after EPOLLONESHOT is fired
  
  Linux epoll does not remove descriptor after one-shot event has been 
triggered.
  Set EV_DISPATCH kqueue flag rather then EV_ONESHOT to get the same behavior.
  
  Required by Linux Steam client.
  
  PR:           240590
  Reported by:  Alex S <iwt...@gmail.com>
  Reviewed by:  emaste, imp
  Differential Revision:        https://reviews.freebsd.org/D22513
  
  r355066:
  Linux epoll: Check both read and write kqueue events existence in 
EPOLL_CTL_ADD
  
  Linux epoll EPOLL_CTL_ADD op handler should always check registration
  of both EVFILT_READ and EVFILT_WRITE kevents to deceide if supplied
  file descriptor fd is already registered with epoll instance.
  
  Reviewed by:  emaste
  Differential Revision:        https://reviews.freebsd.org/D22515
  
  r355067:
  Linux epoll: Register events with zero event mask
  
  Such an events are legal and should be interpreted as EPOLLERR | EPOLLHUP.
  Register a disabled kqueue event in that case as we do not support EPOLLHUP 
yet.
  
  Required by Linux Steam client.
  
  PR:           240590
  Reported by:  Alex S <iwt...@gmail.com>
  Reviewed by:  emaste
  Differential Revision:        https://reviews.freebsd.org/D22516
  
  r355068:
  Linux epoll: Allow passing of any negative timeout value to epoll_wait
  
  Linux epoll allow passing of any negative timeout value to epoll_wait()
  to cause unbound blocking
  
  Reviewed by:  emaste
  Differential Revision:        https://reviews.freebsd.org/D22517

Modified:
  stable/12/sys/compat/linux/linux_event.c
  stable/12/sys/compat/linux/linux_event.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/compat/linux/linux_event.c
==============================================================================
--- stable/12/sys/compat/linux/linux_event.c    Tue Dec  3 23:07:09 2019        
(r355371)
+++ stable/12/sys/compat/linux/linux_event.c    Tue Dec  3 23:11:40 2019        
(r355372)
@@ -98,14 +98,16 @@ __attribute__((packed))
 #define        LINUX_MAX_EVENTS        (INT_MAX / sizeof(struct epoll_event))
 
 static void    epoll_fd_install(struct thread *td, int fd, epoll_udata_t 
udata);
-static int     epoll_to_kevent(struct thread *td, struct file *epfp,
-                   int fd, struct epoll_event *l_event, int *kev_flags,
-                   struct kevent *kevent, int *nkevents);
+static int     epoll_to_kevent(struct thread *td, int fd,
+                   struct epoll_event *l_event, struct kevent *kevent,
+                   int *nkevents);
 static void    kevent_to_epoll(struct kevent *kevent, struct epoll_event 
*l_event);
 static int     epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
 static int     epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
-static int     epoll_delete_event(struct thread *td, struct file *epfp,
-                   int fd, int filter);
+static int     epoll_register_kevent(struct thread *td, struct file *epfp,
+                   int fd, int filter, unsigned int flags);
+static int     epoll_fd_registered(struct thread *td, struct file *epfp,
+                   int fd);
 static int     epoll_delete_all_events(struct thread *td, struct file *epfp,
                    int fd);
 
@@ -296,33 +298,38 @@ linux_epoll_create1(struct thread *td, struct linux_ep
 
 /* Structure converting function from epoll to kevent. */
 static int
-epoll_to_kevent(struct thread *td, struct file *epfp,
-    int fd, struct epoll_event *l_event, int *kev_flags,
+epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event,
     struct kevent *kevent, int *nkevents)
 {
        uint32_t levents = l_event->events;
        struct linux_pemuldata *pem;
        struct proc *p;
+       unsigned short kev_flags = EV_ADD | EV_ENABLE;
 
        /* flags related to how event is registered */
        if ((levents & LINUX_EPOLLONESHOT) != 0)
-               *kev_flags |= EV_ONESHOT;
+               kev_flags |= EV_DISPATCH;
        if ((levents & LINUX_EPOLLET) != 0)
-               *kev_flags |= EV_CLEAR;
+               kev_flags |= EV_CLEAR;
        if ((levents & LINUX_EPOLLERR) != 0)
-               *kev_flags |= EV_ERROR;
+               kev_flags |= EV_ERROR;
        if ((levents & LINUX_EPOLLRDHUP) != 0)
-               *kev_flags |= EV_EOF;
+               kev_flags |= EV_EOF;
 
        /* flags related to what event is registered */
        if ((levents & LINUX_EPOLL_EVRD) != 0) {
-               EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0);
+               EV_SET(kevent++, fd, EVFILT_READ, kev_flags, 0, 0, 0);
                ++(*nkevents);
        }
        if ((levents & LINUX_EPOLL_EVWR) != 0) {
-               EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0);
+               EV_SET(kevent++, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
                ++(*nkevents);
        }
+       /* zero event mask is legal */
+       if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) {
+               EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0);
+               ++(*nkevents);
+       }
 
        if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
                p = td->td_proc;
@@ -451,7 +458,6 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_
                                        epoll_kev_copyin};
        struct epoll_event le;
        cap_rights_t rights;
-       int kev_flags;
        int nchanges = 0;
        int error;
 
@@ -484,9 +490,7 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_
        ciargs.changelist = kev;
 
        if (args->op != LINUX_EPOLL_CTL_DEL) {
-               kev_flags = EV_ADD | EV_ENABLE;
-               error = epoll_to_kevent(td, epfp, args->fd, &le,
-                   &kev_flags, kev, &nchanges);
+               error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges);
                if (error != 0)
                        goto leave0;
        }
@@ -499,18 +503,10 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_
                break;
 
        case LINUX_EPOLL_CTL_ADD:
-               /*
-                * kqueue_register() return ENOENT if event does not exists
-                * and the EV_ADD flag is not set.
-                */
-               kev[0].flags &= ~EV_ADD;
-               error = kqfd_register(args->epfd, &kev[0], td, M_WAITOK);
-               if (error != ENOENT) {
+               if (epoll_fd_registered(td, epfp, args->fd)) {
                        error = EEXIST;
                        goto leave0;
                }
-               error = 0;
-               kev[0].flags |= EV_ADD;
                break;
 
        case LINUX_EPOLL_CTL_DEL:
@@ -561,13 +557,13 @@ linux_epoll_wait_common(struct thread *td, int epfd, s
                return (error);
        if (epfp->f_type != DTYPE_KQUEUE) {
                error = EINVAL;
-               goto leave1;
+               goto leave;
        }
        if (uset != NULL) {
                error = kern_sigprocmask(td, SIG_SETMASK, uset,
                    &omask, 0);
                if (error != 0)
-                       goto leave1;
+                       goto leave;
                td->td_pflags |= TDP_OLDMASK;
                /*
                 * Make sure that ast() is called on return to
@@ -585,11 +581,12 @@ linux_epoll_wait_common(struct thread *td, int epfd, s
        coargs.count = 0;
        coargs.error = 0;
 
-       if (timeout != -1) {
-               if (timeout < 0) {
-                       error = EINVAL;
-                       goto leave0;
-               }
+       /*
+        * Linux epoll_wait(2) man page states that timeout of -1 causes caller
+        * to block indefinitely. Real implementation does it if any negative
+        * timeout value is passed.
+        */
+       if (timeout >= 0) {
                /* Convert from milliseconds to timespec. */
                ts.tv_sec = timeout / 1000;
                ts.tv_nsec = (timeout % 1000) * 1000000;
@@ -609,11 +606,10 @@ linux_epoll_wait_common(struct thread *td, int epfd, s
        if (error == 0)
                td->td_retval[0] = coargs.count;
 
-leave0:
        if (uset != NULL)
                error = kern_sigprocmask(td, SIG_SETMASK, &omask,
                    NULL, 0);
-leave1:
+leave:
        fdrop(epfp, td);
        return (error);
 }
@@ -650,7 +646,8 @@ linux_epoll_pwait(struct thread *td, struct linux_epol
 }
 
 static int
-epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter)
+epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter,
+    unsigned int flags)
 {
        struct epoll_copyin_args ciargs;
        struct kevent kev;
@@ -659,18 +656,36 @@ epoll_delete_event(struct thread *td, struct file *epf
                                        epoll_kev_copyin};
 
        ciargs.changelist = &kev;
-       EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0);
+       EV_SET(&kev, fd, filter, flags, 0, 0, 0);
 
        return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL));
 }
 
 static int
+epoll_fd_registered(struct thread *td, struct file *epfp, int fd)
+{
+       /*
+        * Set empty filter flags to avoid accidental modification of already
+        * registered events. In the case of event re-registration:
+        * 1. If event does not exists kevent() does nothing and returns ENOENT
+        * 2. If event does exists, it's enabled/disabled state is preserved
+        *    but fflags, data and udata fields are overwritten. So we can not
+        *    set socket lowats and store user's context pointer in udata.
+        */
+       if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT ||
+           epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT)
+               return (1);
+
+       return (0);
+}
+
+static int
 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
 {
        int error1, error2;
 
-       error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ);
-       error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE);
+       error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE);
+       error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE);
 
        /* return 0 if at least one result positive */
        return (error1 == 0 ? 0 : error2);

Modified: stable/12/sys/compat/linux/linux_event.h
==============================================================================
--- stable/12/sys/compat/linux/linux_event.h    Tue Dec  3 23:07:09 2019        
(r355371)
+++ stable/12/sys/compat/linux/linux_event.h    Tue Dec  3 23:11:40 2019        
(r355372)
@@ -45,10 +45,10 @@
 #define        LINUX_EPOLLONESHOT      1u<<30
 #define        LINUX_EPOLLET           1u<<31
 
-#define        LINUX_EPOLL_EVRD        (LINUX_EPOLLIN|LINUX_EPOLLRDNORM        
\
-               |LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI)
+#define        LINUX_EPOLL_EVRD        (LINUX_EPOLLIN|LINUX_EPOLLRDNORM)
 #define        LINUX_EPOLL_EVWR        (LINUX_EPOLLOUT|LINUX_EPOLLWRNORM)
 #define        LINUX_EPOLL_EVSUP       (LINUX_EPOLLET|LINUX_EPOLLONESHOT       
\
+               |LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI           \
                |LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR|LINUX_EPOLLRDHUP)
 
 #define        LINUX_EPOLL_CTL_ADD     1
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to