The branch main has been updated by imp:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=af93fea710385b2b11f0cabd377e7ed6f3d97c34

commit af93fea710385b2b11f0cabd377e7ed6f3d97c34
Author:     Jake Freeland <jf...@freebsd.org>
AuthorDate: 2023-08-24 04:39:54 +0000
Commit:     Warner Losh <i...@freebsd.org>
CommitDate: 2023-08-24 20:28:56 +0000

    timerfd: Move implementation from linux compat to sys/kern
    
    Move the timerfd impelemntation from linux compat code to sys/kern. Use
    it to implement the new system calls for timerfd. Add a hook to kern_tc
    to allow timerfd to know when the system time has stepped. Add kqueue
    support to timerfd. Adjust a few names to be less Linux centric.
    
    RelNotes: YES
    Reviewed by: markj (on irc), imp, kib (with reservations), jhb (slack)
    Differential Revision: https://reviews.freebsd.org/D38459
---
 lib/libc/sys/Symbol.map                        |   3 +
 sys/bsm/audit_kevents.h                        |   1 +
 sys/compat/freebsd32/freebsd32_proto.h         |  14 +
 sys/compat/freebsd32/freebsd32_syscall.h       |   5 +-
 sys/compat/freebsd32/freebsd32_syscalls.c      |   3 +
 sys/compat/freebsd32/freebsd32_sysent.c        |   3 +
 sys/compat/freebsd32/freebsd32_systrace_args.c |  86 ++++
 sys/compat/linux/linux_event.c                 | 443 ++---------------
 sys/compat/linux/linux_event.h                 |  11 -
 sys/conf/files                                 |   1 +
 sys/kern/init_sysent.c                         |   3 +
 sys/kern/kern_descrip.c                        |   4 +-
 sys/kern/kern_tc.c                             |   2 +
 sys/kern/sys_timerfd.c                         | 632 +++++++++++++++++++++++++
 sys/kern/syscalls.c                            |   3 +
 sys/kern/syscalls.master                       |  20 +
 sys/kern/systrace_args.c                       |  86 ++++
 sys/sys/file.h                                 |   2 +-
 sys/sys/syscall.h                              |   5 +-
 sys/sys/syscall.mk                             |   5 +-
 sys/sys/sysproto.h                             |  20 +
 sys/sys/timerfd.h                              |  66 +++
 sys/sys/user.h                                 |   6 +
 23 files changed, 999 insertions(+), 425 deletions(-)

diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
index 9a07bb457eb8..7937661e3787 100644
--- a/lib/libc/sys/Symbol.map
+++ b/lib/libc/sys/Symbol.map
@@ -421,6 +421,9 @@ FBSD_1.7 {
        kqueuex;
        membarrier;
        swapoff;
+       timerfd_create;
+       timerfd_gettime;
+       timerfd_settime;
 };
 
 FBSDprivate_1.0 {
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
index a6b50a67ee6a..d06381837aad 100644
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -661,6 +661,7 @@
 #define        AUE_AIO_WRITEV          43267   /* FreeBSD-specific. */
 #define        AUE_AIO_READV           43268   /* FreeBSD-specific. */
 #define        AUE_FSPACECTL           43269   /* FreeBSD-specific. */
+#define        AUE_TIMERFD             43270   /* FreeBSD/Linux. */
 
 /*
  * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/compat/freebsd32/freebsd32_proto.h 
b/sys/compat/freebsd32/freebsd32_proto.h
index bb333e0321a0..50448b6dce16 100644
--- a/sys/compat/freebsd32/freebsd32_proto.h
+++ b/sys/compat/freebsd32/freebsd32_proto.h
@@ -684,6 +684,16 @@ struct freebsd32_aio_writev_args {
 struct freebsd32_aio_readv_args {
        char aiocbp_l_[PADL_(struct aiocb32 *)]; struct aiocb32 * aiocbp; char 
aiocbp_r_[PADR_(struct aiocb32 *)];
 };
+struct freebsd32_timerfd_gettime_args {
+       char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+       char curr_value_l_[PADL_(struct itimerspec32 *)]; struct itimerspec32 * 
curr_value; char curr_value_r_[PADR_(struct itimerspec32 *)];
+};
+struct freebsd32_timerfd_settime_args {
+       char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+       char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+       char new_value_l_[PADL_(const struct itimerspec32 *)]; const struct 
itimerspec32 * new_value; char new_value_r_[PADR_(const struct itimerspec32 *)];
+       char old_value_l_[PADL_(struct itimerspec32 *)]; struct itimerspec32 * 
old_value; char old_value_r_[PADR_(struct itimerspec32 *)];
+};
 int    freebsd32_wait4(struct thread *, struct freebsd32_wait4_args *);
 int    freebsd32_ptrace(struct thread *, struct freebsd32_ptrace_args *);
 int    freebsd32_recvmsg(struct thread *, struct freebsd32_recvmsg_args *);
@@ -799,6 +809,8 @@ int freebsd32_cpuset_setdomain(struct thread *, struct 
freebsd32_cpuset_setdomai
 int    freebsd32___sysctlbyname(struct thread *, struct 
freebsd32___sysctlbyname_args *);
 int    freebsd32_aio_writev(struct thread *, struct freebsd32_aio_writev_args 
*);
 int    freebsd32_aio_readv(struct thread *, struct freebsd32_aio_readv_args *);
+int    freebsd32_timerfd_gettime(struct thread *, struct 
freebsd32_timerfd_gettime_args *);
+int    freebsd32_timerfd_settime(struct thread *, struct 
freebsd32_timerfd_settime_args *);
 
 #ifdef COMPAT_43
 
@@ -1292,6 +1304,8 @@ int       freebsd11_freebsd32_fstatat(struct thread *, 
struct freebsd11_freebsd32_fsta
 #define        FREEBSD32_SYS_AUE_freebsd32___sysctlbyname      AUE_SYSCTL
 #define        FREEBSD32_SYS_AUE_freebsd32_aio_writev  AUE_AIO_WRITEV
 #define        FREEBSD32_SYS_AUE_freebsd32_aio_readv   AUE_AIO_READV
+#define        FREEBSD32_SYS_AUE_freebsd32_timerfd_gettime     AUE_TIMERFD
+#define        FREEBSD32_SYS_AUE_freebsd32_timerfd_settime     AUE_TIMERFD
 
 #undef PAD_
 #undef PADL_
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h 
b/sys/compat/freebsd32/freebsd32_syscall.h
index c3d8617abf4b..e3777730be1c 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -502,4 +502,7 @@
 #define        FREEBSD32_SYS_swapoff   582
 #define        FREEBSD32_SYS_kqueuex   583
 #define        FREEBSD32_SYS_membarrier        584
-#define        FREEBSD32_SYS_MAXSYSCALL        585
+#define        FREEBSD32_SYS_timerfd_create    585
+#define        FREEBSD32_SYS_freebsd32_timerfd_gettime 586
+#define        FREEBSD32_SYS_freebsd32_timerfd_settime 587
+#define        FREEBSD32_SYS_MAXSYSCALL        588
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c 
b/sys/compat/freebsd32/freebsd32_syscalls.c
index 19d454743c55..ccc910ee5ca9 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -590,4 +590,7 @@ const char *freebsd32_syscallnames[] = {
        "swapoff",                      /* 582 = swapoff */
        "kqueuex",                      /* 583 = kqueuex */
        "membarrier",                   /* 584 = membarrier */
+       "timerfd_create",                       /* 585 = timerfd_create */
+       "freebsd32_timerfd_gettime",                    /* 586 = 
freebsd32_timerfd_gettime */
+       "freebsd32_timerfd_settime",                    /* 587 = 
freebsd32_timerfd_settime */
 };
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c 
b/sys/compat/freebsd32/freebsd32_sysent.c
index 971f06a643c5..fec6f4a47bd6 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -646,4 +646,7 @@ struct sysent freebsd32_sysent[] = {
        { .sy_narg = AS(swapoff_args), .sy_call = (sy_call_t *)sys_swapoff, 
.sy_auevent = AUE_SWAPOFF, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },     /* 
582 = swapoff */
        { .sy_narg = AS(kqueuex_args), .sy_call = (sy_call_t *)sys_kqueuex, 
.sy_auevent = AUE_KQUEUE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = 
SY_THR_STATIC }, /* 583 = kqueuex */
        { .sy_narg = AS(membarrier_args), .sy_call = (sy_call_t 
*)sys_membarrier, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, 
.sy_thrcnt = SY_THR_STATIC },     /* 584 = membarrier */
+       { .sy_narg = AS(timerfd_create_args), .sy_call = (sy_call_t 
*)sys_timerfd_create, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, 
.sy_thrcnt = SY_THR_STATIC },  /* 585 = timerfd_create */
+       { .sy_narg = AS(freebsd32_timerfd_gettime_args), .sy_call = (sy_call_t 
*)freebsd32_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = 
SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },        /* 586 = 
freebsd32_timerfd_gettime */
+       { .sy_narg = AS(freebsd32_timerfd_settime_args), .sy_call = (sy_call_t 
*)freebsd32_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = 
SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },        /* 587 = 
freebsd32_timerfd_settime */
 };
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c 
b/sys/compat/freebsd32/freebsd32_systrace_args.c
index 5dfc82c30b7b..2c26a0ddab2f 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3336,6 +3336,32 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, 
int *n_args)
                *n_args = 3;
                break;
        }
+       /* timerfd_create */
+       case 585: {
+               struct timerfd_create_args *p = params;
+               iarg[a++] = p->clockid; /* int */
+               iarg[a++] = p->flags; /* int */
+               *n_args = 2;
+               break;
+       }
+       /* freebsd32_timerfd_gettime */
+       case 586: {
+               struct freebsd32_timerfd_gettime_args *p = params;
+               iarg[a++] = p->fd; /* int */
+               uarg[a++] = (intptr_t)p->curr_value; /* struct itimerspec32 * */
+               *n_args = 2;
+               break;
+       }
+       /* freebsd32_timerfd_settime */
+       case 587: {
+               struct freebsd32_timerfd_settime_args *p = params;
+               iarg[a++] = p->fd; /* int */
+               iarg[a++] = p->flags; /* int */
+               uarg[a++] = (intptr_t)p->new_value; /* const struct 
itimerspec32 * */
+               uarg[a++] = (intptr_t)p->old_value; /* struct itimerspec32 * */
+               *n_args = 4;
+               break;
+       }
        default:
                *n_args = 0;
                break;
@@ -9005,6 +9031,51 @@ systrace_entry_setargdesc(int sysnum, int ndx, char 
*desc, size_t descsz)
                        break;
                };
                break;
+       /* timerfd_create */
+       case 585:
+               switch (ndx) {
+               case 0:
+                       p = "int";
+                       break;
+               case 1:
+                       p = "int";
+                       break;
+               default:
+                       break;
+               };
+               break;
+       /* freebsd32_timerfd_gettime */
+       case 586:
+               switch (ndx) {
+               case 0:
+                       p = "int";
+                       break;
+               case 1:
+                       p = "userland struct itimerspec32 *";
+                       break;
+               default:
+                       break;
+               };
+               break;
+       /* freebsd32_timerfd_settime */
+       case 587:
+               switch (ndx) {
+               case 0:
+                       p = "int";
+                       break;
+               case 1:
+                       p = "int";
+                       break;
+               case 2:
+                       p = "userland const struct itimerspec32 *";
+                       break;
+               case 3:
+                       p = "userland struct itimerspec32 *";
+                       break;
+               default:
+                       break;
+               };
+               break;
        default:
                break;
        };
@@ -10873,6 +10944,21 @@ systrace_return_setargdesc(int sysnum, int ndx, char 
*desc, size_t descsz)
                if (ndx == 0 || ndx == 1)
                        p = "int";
                break;
+       /* timerfd_create */
+       case 585:
+               if (ndx == 0 || ndx == 1)
+                       p = "int";
+               break;
+       /* freebsd32_timerfd_gettime */
+       case 586:
+               if (ndx == 0 || ndx == 1)
+                       p = "int";
+               break;
+       /* freebsd32_timerfd_settime */
+       case 587:
+               if (ndx == 0 || ndx == 1)
+                       p = "int";
+               break;
        default:
                break;
        };
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
index a7db8516e5f0..816c68a90f1d 100644
--- a/sys/compat/linux/linux_event.c
+++ b/sys/compat/linux/linux_event.c
@@ -44,6 +44,7 @@
 #include <sys/specialfd.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
+#include <sys/timerfd.h>
 #include <sys/timespec.h>
 #include <sys/user.h>
 
@@ -99,55 +100,6 @@ struct epoll_copyout_args {
        int                     error;
 };
 
-/* timerfd */
-typedef uint64_t       timerfd_t;
-
-static fo_rdwr_t       timerfd_read;
-static fo_ioctl_t      timerfd_ioctl;
-static fo_poll_t       timerfd_poll;
-static fo_kqfilter_t   timerfd_kqfilter;
-static fo_stat_t       timerfd_stat;
-static fo_close_t      timerfd_close;
-static fo_fill_kinfo_t timerfd_fill_kinfo;
-
-static struct fileops timerfdops = {
-       .fo_read = timerfd_read,
-       .fo_write = invfo_rdwr,
-       .fo_truncate = invfo_truncate,
-       .fo_ioctl = timerfd_ioctl,
-       .fo_poll = timerfd_poll,
-       .fo_kqfilter = timerfd_kqfilter,
-       .fo_stat = timerfd_stat,
-       .fo_close = timerfd_close,
-       .fo_chmod = invfo_chmod,
-       .fo_chown = invfo_chown,
-       .fo_sendfile = invfo_sendfile,
-       .fo_fill_kinfo = timerfd_fill_kinfo,
-       .fo_flags = DFLAG_PASSABLE
-};
-
-static void    filt_timerfddetach(struct knote *kn);
-static int     filt_timerfdread(struct knote *kn, long hint);
-
-static struct filterops timerfd_rfiltops = {
-       .f_isfd = 1,
-       .f_detach = filt_timerfddetach,
-       .f_event = filt_timerfdread
-};
-
-struct timerfd {
-       clockid_t       tfd_clockid;
-       struct itimerspec tfd_time;
-       struct callout  tfd_callout;
-       timerfd_t       tfd_count;
-       bool            tfd_canceled;
-       struct selinfo  tfd_sel;
-       struct mtx      tfd_lock;
-};
-
-static void    linux_timerfd_expire(void *);
-static void    linux_timerfd_curval(struct timerfd *, struct itimerspec *);
-
 static int
 epoll_create_common(struct thread *td, int flags)
 {
@@ -658,255 +610,14 @@ linux_eventfd2(struct thread *td, struct 
linux_eventfd2_args *args)
 int
 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args)
 {
-       struct timerfd *tfd;
-       struct file *fp;
        clockid_t clockid;
-       int fflags, fd, error;
-
-       if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0)
-               return (EINVAL);
-
-       error = linux_to_native_clockid(&clockid, args->clockid);
-       if (error != 0)
-               return (error);
-       if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
-               return (EINVAL);
-
-       fflags = 0;
-       if ((args->flags & LINUX_TFD_CLOEXEC) != 0)
-               fflags |= O_CLOEXEC;
-
-       error = falloc(td, &fp, &fd, fflags);
-       if (error != 0)
-               return (error);
-
-       tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO);
-       tfd->tfd_clockid = clockid;
-       mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
-
-       callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
-       knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
-
-       fflags = FREAD;
-       if ((args->flags & LINUX_O_NONBLOCK) != 0)
-               fflags |= FNONBLOCK;
-
-       finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops);
-       fdrop(fp, td);
-
-       td->td_retval[0] = fd;
-       return (error);
-}
-
-static int
-timerfd_close(struct file *fp, struct thread *td)
-{
-       struct timerfd *tfd;
-
-       tfd = fp->f_data;
-       if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
-               return (EINVAL);
-
-       timespecclear(&tfd->tfd_time.it_value);
-       timespecclear(&tfd->tfd_time.it_interval);
-
-       callout_drain(&tfd->tfd_callout);
-
-       seldrain(&tfd->tfd_sel);
-       knlist_destroy(&tfd->tfd_sel.si_note);
-
-       fp->f_ops = &badfileops;
-       mtx_destroy(&tfd->tfd_lock);
-       free(tfd, M_EPOLL);
-
-       return (0);
-}
-
-static int
-timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
-    int flags, struct thread *td)
-{
-       struct timerfd *tfd;
-       timerfd_t count;
-       int error;
-
-       tfd = fp->f_data;
-       if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
-               return (EINVAL);
-
-       if (uio->uio_resid < sizeof(timerfd_t))
-               return (EINVAL);
-
-       error = 0;
-       mtx_lock(&tfd->tfd_lock);
-retry:
-       if (tfd->tfd_canceled) {
-               tfd->tfd_count = 0;
-               mtx_unlock(&tfd->tfd_lock);
-               return (ECANCELED);
-       }
-       if (tfd->tfd_count == 0) {
-               if ((fp->f_flag & FNONBLOCK) != 0) {
-                       mtx_unlock(&tfd->tfd_lock);
-                       return (EAGAIN);
-               }
-               error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, 
"ltfdrd", 0);
-               if (error == 0)
-                       goto retry;
-       }
-       if (error == 0) {
-               count = tfd->tfd_count;
-               tfd->tfd_count = 0;
-               mtx_unlock(&tfd->tfd_lock);
-               error = uiomove(&count, sizeof(timerfd_t), uio);
-       } else
-               mtx_unlock(&tfd->tfd_lock);
-
-       return (error);
-}
-
-static int
-timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
-    struct thread *td)
-{
-       struct timerfd *tfd;
-       int revents = 0;
-
-       tfd = fp->f_data;
-       if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
-               return (POLLERR);
-
-       mtx_lock(&tfd->tfd_lock);
-       if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0)
-               revents |= events & (POLLIN|POLLRDNORM);
-       if (revents == 0)
-               selrecord(td, &tfd->tfd_sel);
-       mtx_unlock(&tfd->tfd_lock);
-
-       return (revents);
-}
-
-static int
-timerfd_kqfilter(struct file *fp, struct knote *kn)
-{
-       struct timerfd *tfd;
-
-       tfd = fp->f_data;
-       if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
-               return (EINVAL);
-
-       if (kn->kn_filter == EVFILT_READ)
-               kn->kn_fop = &timerfd_rfiltops;
-       else
-               return (EINVAL);
-
-       kn->kn_hook = tfd;
-       knlist_add(&tfd->tfd_sel.si_note, kn, 0);
-
-       return (0);
-}
-
-static void
-filt_timerfddetach(struct knote *kn)
-{
-       struct timerfd *tfd = kn->kn_hook;
-
-       mtx_lock(&tfd->tfd_lock);
-       knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
-       mtx_unlock(&tfd->tfd_lock);
-}
-
-static int
-filt_timerfdread(struct knote *kn, long hint)
-{
-       struct timerfd *tfd = kn->kn_hook;
-
-       return (tfd->tfd_count > 0);
-}
-
-static int
-timerfd_ioctl(struct file *fp, u_long cmd, void *data,
-    struct ucred *active_cred, struct thread *td)
-{
-
-       if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD)
-               return (EINVAL);
-
-       switch (cmd) {
-       case FIONBIO:
-       case FIOASYNC:
-               return (0);
-       }
-
-       return (ENOTTY);
-}
-
-static int
-timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred)
-{
-
-       return (ENXIO);
-}
-
-static int
-timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc 
*fdp)
-{
-
-       kif->kf_type = KF_TYPE_UNKNOWN;
-       return (0);
-}
-
-static void
-linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts)
-{
-
-       if (tfd->tfd_clockid == CLOCK_REALTIME)
-               getnanotime(ts);
-       else    /* CLOCK_MONOTONIC */
-               getnanouptime(ts);
-}
-
-static void
-linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots)
-{
-       struct timespec cts;
-
-       linux_timerfd_clocktime(tfd, &cts);
-       *ots = tfd->tfd_time;
-       if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) {
-               timespecsub(&ots->it_value, &cts, &ots->it_value);
-               if (ots->it_value.tv_sec < 0 ||
-                   (ots->it_value.tv_sec == 0 &&
-                    ots->it_value.tv_nsec == 0)) {
-                       ots->it_value.tv_sec  = 0;
-                       ots->it_value.tv_nsec = 1;
-               }
-       }
-}
-
-static int
-linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots)
-{
-       struct timerfd *tfd;
-       struct file *fp;
        int error;
 
-       error = fget(td, fd, &cap_read_rights, &fp);
+       error = linux_to_native_clockid(&clockid, args->clockid);
        if (error != 0)
                return (error);
-       tfd = fp->f_data;
-       if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
-               error = EINVAL;
-               goto out;
-       }
-
-       mtx_lock(&tfd->tfd_lock);
-       linux_timerfd_curval(tfd, ots);
-       mtx_unlock(&tfd->tfd_lock);
 
-out:
-       fdrop(fp, td);
-       return (error);
+       return (kern_timerfd_create(td, clockid, args->flags));
 }
 
 int
@@ -916,84 +627,14 @@ linux_timerfd_gettime(struct thread *td, struct 
linux_timerfd_gettime_args *args
        struct itimerspec ots;
        int error;
 
-       error = linux_timerfd_gettime_common(td, args->fd, &ots);
+       error = kern_timerfd_gettime(td, args->fd, &ots);
        if (error != 0)
                return (error);
-       error = native_to_linux_itimerspec(&lots, &ots);
-       if (error == 0)
-               error = copyout(&lots, args->old_value, sizeof(lots));
-       return (error);
-}
-
-#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
-int
-linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args 
*args)
-{
-       struct l_itimerspec64 lots;
-       struct itimerspec ots;
-       int error;
 
-       error = linux_timerfd_gettime_common(td, args->fd, &ots);
-       if (error != 0)
-               return (error);
-       error = native_to_linux_itimerspec64(&lots, &ots);
+       error = native_to_linux_itimerspec(&lots, &ots);
        if (error == 0)
                error = copyout(&lots, args->old_value, sizeof(lots));
-       return (error);
-}
-#endif
-
-static int
-linux_timerfd_settime_common(struct thread *td, int fd, int flags,
-    struct itimerspec *nts, struct itimerspec *oval)
-{
-       struct timespec cts, ts;
-       struct timerfd *tfd;
-       struct timeval tv;
-       struct file *fp;
-       int error;
-
-       if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0)
-               return (EINVAL);
-
-       error = fget(td, fd, &cap_write_rights, &fp);
-       if (error != 0)
-               return (error);
-       tfd = fp->f_data;
-       if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
-               error = EINVAL;
-               goto out;
-       }
-
-       mtx_lock(&tfd->tfd_lock);
-       if (!timespecisset(&nts->it_value))
-               timespecclear(&nts->it_interval);
-       if (oval != NULL)
-               linux_timerfd_curval(tfd, oval);
-
-       bcopy(nts, &tfd->tfd_time, sizeof(*nts));
-       tfd->tfd_count = 0;
-       if (timespecisset(&nts->it_value)) {
-               linux_timerfd_clocktime(tfd, &cts);
-               ts = nts->it_value;
-               if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) {
-                       timespecadd(&tfd->tfd_time.it_value, &cts,
-                               &tfd->tfd_time.it_value);
-               } else {
-                       timespecsub(&ts, &cts, &ts);
-               }
-               TIMESPEC_TO_TIMEVAL(&tv, &ts);
-               callout_reset(&tfd->tfd_callout, tvtohz(&tv),
-                       linux_timerfd_expire, tfd);
-               tfd->tfd_canceled = false;
-       } else {
-               tfd->tfd_canceled = true;
-               callout_stop(&tfd->tfd_callout);
-       }
-       mtx_unlock(&tfd->tfd_lock);
 
-out:
-       fdrop(fp, td);
        return (error);
 }
 
@@ -1001,7 +642,7 @@ int
 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args 
*args)
 {
        struct l_itimerspec lots;
-       struct itimerspec nts, ots, *pots;
+       struct itimerspec nts, ots;
        int error;
 
        error = copyin(args->new_value, &lots, sizeof(lots));
@@ -1010,23 +651,43 @@ linux_timerfd_settime(struct thread *td, struct 
linux_timerfd_settime_args *args
        error = linux_to_native_itimerspec(&nts, &lots);
        if (error != 0)
                return (error);
-       pots = (args->old_value != NULL ? &ots : NULL);
-       error = linux_timerfd_settime_common(td, args->fd, args->flags,
-           &nts, pots);
+       if (args->old_value == NULL)
+               error = kern_timerfd_settime(td, args->fd, args->flags, &nts, 
NULL);
+       else
+               error = kern_timerfd_settime(td, args->fd, args->flags, &nts, 
&ots);
        if (error == 0 && args->old_value != NULL) {
                error = native_to_linux_itimerspec(&lots, &ots);
                if (error == 0)
                        error = copyout(&lots, args->old_value, sizeof(lots));
        }
+
        return (error);
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
+int
+linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args 
*args)
+{
+       struct l_itimerspec64 lots;
+       struct itimerspec ots;
+       int error;
+
+       error = kern_timerfd_gettime(td, args->fd, &ots);
+       if (error != 0)
+               return (error);
+
+       error = native_to_linux_itimerspec64(&lots, &ots);
+       if (error == 0)
+               error = copyout(&lots, args->old_value, sizeof(lots));
+
+       return (error);
+}
+
 int
 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args 
*args)
 {
        struct l_itimerspec64 lots;
-       struct itimerspec nts, ots, *pots;
+       struct itimerspec nts, ots;
        int error;
 
        error = copyin(args->new_value, &lots, sizeof(lots));
@@ -1035,50 +696,16 @@ linux_timerfd_settime64(struct thread *td, struct 
linux_timerfd_settime64_args *
        error = linux_to_native_itimerspec64(&nts, &lots);
        if (error != 0)
                return (error);
-       pots = (args->old_value != NULL ? &ots : NULL);
-       error = linux_timerfd_settime_common(td, args->fd, args->flags,
-           &nts, pots);
+       if (args->old_value == NULL)
+               error = kern_timerfd_settime(td, args->fd, args->flags, &nts, 
NULL);
+       else
+               error = kern_timerfd_settime(td, args->fd, args->flags, &nts, 
&ots);
        if (error == 0 && args->old_value != NULL) {
                error = native_to_linux_itimerspec64(&lots, &ots);
                if (error == 0)
                        error = copyout(&lots, args->old_value, sizeof(lots));
        }
+
        return (error);
 }
 #endif
-
-static void
-linux_timerfd_expire(void *arg)
-{
-       struct timespec cts, ts;
-       struct timeval tv;
-       struct timerfd *tfd;
-
-       tfd = (struct timerfd *)arg;
-
-       linux_timerfd_clocktime(tfd, &cts);
-       if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) {
-               if (timespecisset(&tfd->tfd_time.it_interval))
-                       timespecadd(&tfd->tfd_time.it_value,
-                                   &tfd->tfd_time.it_interval,
-                                   &tfd->tfd_time.it_value);
-               else
-                       /* single shot timer */
-                       timespecclear(&tfd->tfd_time.it_value);
-               if (timespecisset(&tfd->tfd_time.it_value)) {
-                       timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
-                       TIMESPEC_TO_TIMEVAL(&tv, &ts);
-                       callout_reset(&tfd->tfd_callout, tvtohz(&tv),
-                               linux_timerfd_expire, tfd);
-               }
-               tfd->tfd_count++;
-               KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
-               selwakeup(&tfd->tfd_sel);
-               wakeup(&tfd->tfd_count);
-       } else if (timespecisset(&tfd->tfd_time.it_value)) {
-               timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
-               TIMESPEC_TO_TIMEVAL(&tv, &ts);
-               callout_reset(&tfd->tfd_callout, tvtohz(&tv),
-                   linux_timerfd_expire, tfd);
-       }
-}
diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h
index 32269b0070bc..fa63371b5170 100644
--- a/sys/compat/linux/linux_event.h
+++ b/sys/compat/linux/linux_event.h
@@ -54,15 +54,4 @@
 
 #define        LINUX_EFD_SEMAPHORE     (1 << 0)
 
-#define        LINUX_TFD_TIMER_ABSTIME (1 << 0)
-#define        LINUX_TFD_TIMER_CANCEL_ON_SET   (1 << 1)
-#define        LINUX_TFD_CLOEXEC       LINUX_O_CLOEXEC
-#define        LINUX_TFD_NONBLOCK      LINUX_O_NONBLOCK
-
-#define        LINUX_TFD_SHARED_FCNTL_FLAGS    (LINUX_TFD_CLOEXEC              
\
-               |LINUX_TFD_NONBLOCK)
-#define        LINUX_TFD_CREATE_FLAGS  LINUX_TFD_SHARED_FCNTL_FLAGS
-#define        LINUX_TFD_SETTIME_FLAGS (LINUX_TFD_TIMER_ABSTIME                
\
-               |LINUX_TFD_TIMER_CANCEL_ON_SET)
-
 #endif /* !_LINUX_EVENT_H_ */
diff --git a/sys/conf/files b/sys/conf/files
index 3f79ce752c80..8d38b9cc8a2e 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3908,6 +3908,7 @@ kern/sys_pipe.c                   standard
 kern/sys_procdesc.c            standard
 kern/sys_process.c             standard
 kern/sys_socket.c              standard
+kern/sys_timerfd.c             standard
 kern/syscalls.c                        standard
 kern/sysv_ipc.c                        standard
 kern/sysv_msg.c                        optional sysvmsg
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 1e62c46b8be0..d44fec54fcd7 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -645,4 +645,7 @@ struct sysent sysent[] = {
        { .sy_narg = AS(swapoff_args), .sy_call = (sy_call_t *)sys_swapoff, 
.sy_auevent = AUE_SWAPOFF, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },     /* 
582 = swapoff */
        { .sy_narg = AS(kqueuex_args), .sy_call = (sy_call_t *)sys_kqueuex, 
.sy_auevent = AUE_KQUEUE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = 
SY_THR_STATIC }, /* 583 = kqueuex */
        { .sy_narg = AS(membarrier_args), .sy_call = (sy_call_t 
*)sys_membarrier, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, 
.sy_thrcnt = SY_THR_STATIC },     /* 584 = membarrier */
+       { .sy_narg = AS(timerfd_create_args), .sy_call = (sy_call_t 
*)sys_timerfd_create, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, 
.sy_thrcnt = SY_THR_STATIC },  /* 585 = timerfd_create */
+       { .sy_narg = AS(timerfd_gettime_args), .sy_call = (sy_call_t 
*)sys_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, 
.sy_thrcnt = SY_THR_STATIC },        /* 586 = timerfd_gettime */
+       { .sy_narg = AS(timerfd_settime_args), .sy_call = (sy_call_t 
*)sys_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, 
.sy_thrcnt = SY_THR_STATIC },        /* 587 = timerfd_settime */
 };
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index c5226288afc5..35046c856d54 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -5001,8 +5001,8 @@ file_type_to_name(short type)
                return ("proc");
        case DTYPE_EVENTFD:
                return ("eventfd");
-       case DTYPE_LINUXTFD:
-               return ("ltimer");
+       case DTYPE_TIMERFD:
+               return ("timerfd");
        default:
                return ("unkn");
        }
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 170f35830923..26f09cb60260 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -34,6 +34,7 @@
 #include <sys/systm.h>
 #include <sys/timeffc.h>
 #include <sys/timepps.h>
+#include <sys/timerfd.h>
 #include <sys/timetc.h>
 #include <sys/timex.h>
 #include <sys/vdso.h>
@@ -1305,6 +1306,7 @@ tc_setclock(struct timespec *ts)
 
        /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */
        atomic_add_rel_int(&rtc_generation, 2);
+       timerfd_jumped();
        sleepq_chains_remove_matching(sleeping_on_old_rtc);
        if (timestepwarnings) {
                nanotime(&taft);
diff --git a/sys/kern/sys_timerfd.c b/sys/kern/sys_timerfd.c
new file mode 100644
index 000000000000..6948fa059b8c
--- /dev/null
+++ b/sys/kern/sys_timerfd.c
@@ -0,0 +1,632 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2014 Dmitry Chagin <dcha...@freebsd.org>
+ * Copyright (c) 2023 Jake Freeland <jf...@freebsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/callout.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/filio.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/selinfo.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/timerfd.h>
+#include <sys/timespec.h>
+#include <sys/uio.h>
+#include <sys/user.h>
+
+#include <security/audit/audit.h>
+
+#ifdef COMPAT_FREEBSD32
+#include <compat/freebsd32/freebsd32.h>
+#include <compat/freebsd32/freebsd32_proto.h>
+#endif
+
+static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures");
+static LIST_HEAD(, timerfd) timerfd_head;
+static struct unrhdr64 tfdino_unr;
+
+#define        TFD_NOJUMP      0       /* Realtime clock has not jumped. */
+#define        TFD_READ        1       /* Jumped, tfd has been read since. */
+#define        TFD_ZREAD       2       /* Jumped backwards, 
CANCEL_ON_SET=false. */
+#define        TFD_CANCELED    4       /* Jumped, CANCEL_ON_SET=true. */
+#define        TFD_JUMPED      (TFD_ZREAD | TFD_CANCELED)
+
+struct timerfd {
+       /* User specified. */
+       struct itimerspec tfd_time;     /* tfd timer */
+       clockid_t       tfd_clockid;    /* timing base */
+       int             tfd_flags;      /* creation flags */
+       int             tfd_timflags;   /* timer flags */
+
+       /* Used internally. */
+       timerfd_t       tfd_count;      /* expiration count since last read */
+       bool            tfd_expired;    /* true upon initial expiration */
+       struct mtx      tfd_lock;       /* mtx lock */
+       struct callout  tfd_callout;    /* expiration notification */
+       struct selinfo  tfd_sel;        /* I/O alerts */
+       struct timespec tfd_boottim;    /* cached boottime */
+       int             tfd_jumped;     /* timer jump status */
+       LIST_ENTRY(timerfd) entry;      /* entry in list */
+
+       /* For stat(2). */
+       ino_t           tfd_ino;        /* inode number */
+       struct timespec tfd_atim;       /* time of last read */
+       struct timespec tfd_mtim;       /* time of last settime */
+       struct timespec tfd_birthtim;   /* creation time */
+};
+
+static void
+timerfd_init(void *data)
+{
+       new_unrhdr64(&tfdino_unr, 1);
+}
+
+SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL);
+
+static inline void
+timerfd_getboottime(struct timespec *ts)
+{
+       struct timeval tv;
+       getboottime(&tv);
+       TIMEVAL_TO_TIMESPEC(&tv, ts);
+}
+
+/*
+ * Call when a discontinuous jump has occured in CLOCK_REALTIME and
+ * update timerfd's cached boottime. A jump can be triggered using
+ * functions like clock_settime(2) or settimeofday(2).
+ *
+ * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set
*** 850 LINES SKIPPED ***


Reply via email to