On Thu, Jan 09, 2020 at 04:10:03PM +0100, Martin Pieuchot wrote:
> SO_RCVTIMEO and SO_SNDTIMEO allow userland to specify a timeout value
> via a 'struct timeval'.  Internally the kernel keeps this time
> representation in ticks.  Diff below changes that to nanoseconds which
> allows us to use tsleep_nsec(9) in sosleep().
> 
> That means we need a new conversion routine: TIMEVAL_TO_NSEC().  Here
> again I screwed up the overflow check.  If somebody could fix my C, I'd
> be glad.
> 
> The rest of the diff is trivial.  Note that SO_RCVTIMEO & SO_SNDTIMEO no
> longer return EDOM since we now have enough space for storing the
> interval.  We could limit that if desired.
> 
> Comments?

I have been fussing with this conversion on and off for a few months
but got stuck on some of it.  Thankfully you seem to have plowed
through it.

Comments below.

> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.238
> diff -u -p -r1.238 uipc_socket.c
> --- kern/uipc_socket.c        31 Dec 2019 13:48:32 -0000      1.238
> +++ kern/uipc_socket.c        9 Jan 2020 14:42:18 -0000
> @@ -304,7 +304,7 @@ soclose(struct socket *so, int flags)
>                       while (so->so_state & SS_ISCONNECTED) {
>                               error = sosleep(so, &so->so_timeo,
>                                   PSOCK | PCATCH, "netcls",
> -                                 so->so_linger * hz);
> +                                 SEC_TO_NSEC(so->so_linger));
>                               if (error)
>                                       break;
>                       }
> @@ -1761,22 +1761,19 @@ sosetopt(struct socket *so, int level, i
>               case SO_RCVTIMEO:
>                   {
>                       struct timeval tv;
> -                     int val;
> +                     uint64_t nsecs;
> 
>                       if (m == NULL || m->m_len < sizeof (tv))
>                               return (EINVAL);
>                       memcpy(&tv, mtod(m, struct timeval *), sizeof tv);
> -                     val = tvtohz(&tv);
> -                     if (val > USHRT_MAX)
> -                             return (EDOM);
> -
> +                     nsecs = TIMEVAL_TO_NSEC(&tv);

First:

bluhm@ mentions that we ought to keep the range check.  I wasn't sure
about whether to keep it.

But okay, say we keep it.  I wonder if TIMEVAL_TO_NSEC() (and
TIMESPEC_TO_NSEC() from your other diff, too) should have a second,
optional parameter that, if non-NULL, is set when the input overflows
a uint64_t.

So you'd do:

        uint64_t oflow, nsecs;

        if (m == NULL || m->m_len < sizeof (tv))
                return (EINVAL);
        memcpy(&tv, mtod(m, struct timeval *), sizeof tv);
        if (!timerisvalid(&tv))
                return (EINVAL);
        nsecs = TIMEVAL_TO_NSEC(&tv, &oflow);
        if (oflow)
                return (EDOM);

in this case.  Or if you didn't care about overflow you'd do

        nsecs = TIMEVAL_TO_NSEC(&tv, NULL);

Second:

In the current code, an unset timeval, i.e. timerisset(&tv) == 0,
translates to zero ticks, which means "no timeout".

We need to check for that special case adjust accordingly.  So in
total the whole input conversion would be something like this:

        uint64_t oflow, nsecs;

        if (m == NULL || m->m_len < sizeof (tv))
                return (EINVAL);
        memcpy(&tv, mtod(m, struct timeval *), sizeof tv);
        if (!timerisvalid(&tv))
                return (EINVAL);
        nsecs = TIMEVAL_TO_NSEC(&tv, &oflow);
        if (oflow || nsecs == INFSLP)
                return (EDOM);
        if (nsecs == 0)
                nsecs = INFSLP;

>                       switch (optname) {
>  
>                       case SO_SNDTIMEO:
> -                             so->so_snd.sb_timeo = val;
> +                             so->so_snd.sb_nsecs = nsecs;
>                               break;
>                       case SO_RCVTIMEO:
> -                             so->so_rcv.sb_timeo = val;
> +                             so->so_rcv.sb_nsecs = nsecs;
>                               break;
>                       }
>                       break;
> @@ -1910,13 +1907,11 @@ sogetopt(struct socket *so, int level, i
>               case SO_RCVTIMEO:
>                   {
>                       struct timeval tv;
> -                     int val = (optname == SO_SNDTIMEO ?
> -                         so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
> +                     uint64_t nsecs = (optname == SO_SNDTIMEO ?
> +                         so->so_snd.sb_nsecs : so->so_rcv.sb_nsecs);
>  
>                       m->m_len = sizeof(struct timeval);
> -                     memset(&tv, 0, sizeof(tv));
> -                     tv.tv_sec = val / hz;
> -                     tv.tv_usec = (val % hz) * tick;
> +                     NSEC_TO_TIMEVAL(nsecs, &tv);

Much nicer, though again we need to account for the INFSLP case:

        if (nsecs != INFSLP)
                NSEC_TO_TIMEVAL(nsecs, &tv);

If nsecs == INFSLP we can just leave it, as we just zero'd the timeval
with memset(9).

>                       memcpy(mtod(m, struct timeval *), &tv, sizeof tv);
>                       break;
>                   }
> @@ -2129,7 +2124,7 @@ sobuf_print(struct sockbuf *sb,
>       (*pr)("\tsb_sel: ...\n");
>       (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr);
>       (*pr)("\tsb_flags: %i\n", sb->sb_flags);
> -     (*pr)("\tsb_timeo: %i\n", sb->sb_timeo);
> +     (*pr)("\tsb_nsecs: %llu\n", sb->sb_nsecs);
>  }
>  
>  void
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.101
> diff -u -p -r1.101 uipc_socket2.c
> --- kern/uipc_socket2.c       16 Apr 2019 13:15:32 -0000      1.101
> +++ kern/uipc_socket2.c       8 Jan 2020 16:34:41 -0000
> @@ -181,10 +181,10 @@ sonewconn(struct socket *head, int conns
>       }
>       so->so_snd.sb_wat = head->so_snd.sb_wat;
>       so->so_snd.sb_lowat = head->so_snd.sb_lowat;
> -     so->so_snd.sb_timeo = head->so_snd.sb_timeo;
> +     so->so_snd.sb_nsecs = head->so_snd.sb_nsecs;
>       so->so_rcv.sb_wat = head->so_rcv.sb_wat;
>       so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
> -     so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
> +     so->so_rcv.sb_nsecs = head->so_rcv.sb_nsecs;
>  
>       sigio_init(&so->so_sigio);
>       sigio_copy(&so->so_sigio, &head->so_sigio);
> @@ -332,14 +332,15 @@ soassertlocked(struct socket *so)
>  }
>  
>  int
> -sosleep(struct socket *so, void *ident, int prio, const char *wmesg, int 
> timo)
> +sosleep(struct socket *so, void *ident, int prio, const char *wmesg,
> +    uint64_t nsecs)
>  {
>       if ((so->so_proto->pr_domain->dom_family != PF_UNIX) &&
>           (so->so_proto->pr_domain->dom_family != PF_ROUTE) &&
>           (so->so_proto->pr_domain->dom_family != PF_KEY)) {
> -             return rwsleep(ident, &netlock, prio, wmesg, timo);
> +             return rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
>       } else
> -             return tsleep(ident, prio, wmesg, timo);
> +             return tsleep_nsec(ident, prio, wmesg, nsecs);
>  }
>  
>  /*
> @@ -353,7 +354,7 @@ sbwait(struct socket *so, struct sockbuf
>       soassertlocked(so);
>  
>       sb->sb_flags |= SB_WAIT;
> -     return (sosleep(so, &sb->sb_cc, prio, "netio", sb->sb_timeo));
> +     return (sosleep(so, &sb->sb_cc, prio, "netio", sb->sb_nsecs));
>  }
>  
>  int
> @@ -372,7 +373,7 @@ sblock(struct socket *so, struct sockbuf
>  
>       while (sb->sb_flags & SB_LOCK) {
>               sb->sb_flags |= SB_WANT;
> -             error = sosleep(so, &sb->sb_flags, prio, "netlck", 0);
> +             error = sosleep(so, &sb->sb_flags, prio, "netlck", INFSLP);
>               if (error)
>                       return (error);
>       }
> 
> Index: nfs/nfs_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/nfs/nfs_socket.c,v
> retrieving revision 1.134
> diff -u -p -r1.134 nfs_socket.c
> --- nfs/nfs_socket.c  5 Dec 2019 10:41:57 -0000       1.134
> +++ nfs/nfs_socket.c  9 Jan 2020 14:40:15 -0000
> @@ -333,11 +333,11 @@ nfs_connect(struct nfsmount *nmp, struct
>        * Always set receive timeout to detect server crash and reconnect.
>        * Otherwise, we can get stuck in soreceive forever.
>        */
> -     so->so_rcv.sb_timeo = (5 * hz);
> +     so->so_rcv.sb_nsecs = SEC_TO_NSEC(5);
>       if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT))
> -             so->so_snd.sb_timeo = (5 * hz);
> +             so->so_snd.sb_nsecs = SEC_TO_NSEC(5);
>       else
> -             so->so_snd.sb_timeo = 0;
> +             so->so_snd.sb_nsecs = 0;
>       if (nmp->nm_sotype == SOCK_DGRAM) {
>               sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
>               rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
> Index: nfs/nfs_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/nfs/nfs_syscalls.c,v
> retrieving revision 1.115
> diff -u -p -r1.115 nfs_syscalls.c
> --- nfs/nfs_syscalls.c        5 Dec 2019 10:41:57 -0000       1.115
> +++ nfs/nfs_syscalls.c        9 Jan 2020 14:39:50 -0000
> @@ -276,9 +276,9 @@ nfssvc_addsock(struct file *fp, struct m
>               m_freem(m);
>       }
>       so->so_rcv.sb_flags &= ~SB_NOINTR;
> -     so->so_rcv.sb_timeo = 0;
> +     so->so_rcv.sb_nsecs = 0;
>       so->so_snd.sb_flags &= ~SB_NOINTR;
> -     so->so_snd.sb_timeo = 0;
> +     so->so_snd.sb_nsecs = 0;

0 means "no timeout" for the original interfaces, so I think these
need to be set to INFSLP.

>       sounlock(so, s);
>       if (tslp)
>               slp = tslp;
> Index: sys/socketvar.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.90
> diff -u -p -r1.90 socketvar.h
> --- sys/socketvar.h   12 Dec 2019 16:33:02 -0000      1.90
> +++ sys/socketvar.h   8 Jan 2020 16:35:17 -0000
> @@ -110,7 +110,7 @@ struct socket {
>               struct mbuf *sb_mbtail; /* the last mbuf in the chain */
>               struct mbuf *sb_lastrecord;/* first mbuf of last record in
>                                             socket buffer */
> -             u_short sb_timeo;       /* timeout for read/write */
> +             uint64_t sb_nsecs;      /* timeout for read/write */

Maybe this member needs to be stuffed somewhere else in the struct for
optimal packing or alignment?

I'll let others decide on an appropriate name for the member, though I
agree in general we should avoid "timo" and "timeo" because they are
traditionally used for counts of ticks.

>               short   sb_flags;       /* flags, see below */
>  /* End area that is zeroed on flush. */
>  #define      sb_endzero      sb_flags
> @@ -338,7 +338,7 @@ void      sorwakeup(struct socket *);
>  void sowwakeup(struct socket *);
>  int  sockargs(struct mbuf **, const void *, size_t, int);
>  
> -int  sosleep(struct socket *, void *, int, const char *, int);
> +int  sosleep(struct socket *, void *, int, const char *, uint64_t);
>  int  solock(struct socket *);
>  void sounlock(struct socket *, int);
>  
> Index: sys/time.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/time.h,v
> retrieving revision 1.48
> diff -u -p -r1.48 time.h
> --- sys/time.h        1 Jan 2020 14:09:59 -0000       1.48
> +++ sys/time.h        9 Jan 2020 14:47:17 -0000
> @@ -332,6 +332,8 @@ void clock_secs_to_ymdhms(time_t, struct
>  /* Traditional POSIX base year */
>  #define POSIX_BASE_YEAR 1970
>  
> +#include <sys/stdint.h>
> +
>  static inline void
>  NSEC_TO_TIMEVAL(uint64_t ns, struct timeval *tv)
>  {
> @@ -339,6 +341,14 @@ NSEC_TO_TIMEVAL(uint64_t ns, struct time
>       tv->tv_usec = (ns % 1000000000L) / 1000;
>  }
>  
> +static inline uint64_t
> +TIMEVAL_TO_NSEC(const struct timeval *tv)
> +{
> +     if (tv->tv_sec > (UINT64_MAX - tv->tv_usec * 1000ULL) / 1000000000ULL)
> +             return UINT64_MAX;
> +     return tv->tv_sec * 1000000000ULL + tv->tv_usec * 1000ULL;
> +}
> +

At the risk of bikeshedding I find this easier to follow:

static inline uint64_t
TIMEVAL_TO_NSEC(const struct timeval *tv)
{
        uint64_t nsecs;

        if (tv->tv_sec > UINT64_MAX / 1000000000ULL) {
                return UINT64_MAX;
        nsecs = tv->tv_sec * 1000000000ULL;
        if (tv->tv_usec * 1000ULL > UINT64_MAX - nsecs)
                return UINT64_MAX;
        return nsecs + tv->tv_usec * 1000ULL;
}

>  static inline void
>  NSEC_TO_TIMESPEC(uint64_t ns, struct timespec *ts)
>  {
> @@ -346,8 +356,6 @@ NSEC_TO_TIMESPEC(uint64_t ns, struct tim
>       ts->tv_nsec = ns % 1000000000L;
>  }
>  
> -#include <sys/stdint.h>
> -
>  static inline uint64_t
>  SEC_TO_NSEC(uint64_t seconds)
>  {
> 

Reply via email to