On Thu, Jan 09, 2020 at 04:10:03PM +0100, Martin Pieuchot wrote:
> SO_RCVTIMEO and SO_SNDTIMEO allow userland to specify a timeout value
> via a 'struct timeval'. Internally the kernel keeps this time
> representation in ticks. Diff below changes that to nanoseconds which
> allows us to use tsleep_nsec(9) in sosleep().
>
> That means we need a new conversion routine: TIMEVAL_TO_NSEC(). Here
> again I screwed up the overflow check. If somebody could fix my C, I'd
> be glad.
>
> The rest of the diff is trivial. Note that SO_RCVTIMEO & SO_SNDTIMEO no
> longer return EDOM since we now have enough space for storing the
> interval. We could limit that if desired.
>
> Comments?
I have been fussing with this conversion on and off for a few months
but got stuck on some of it. Thankfully you seem to have plowed
through it.
Comments below.
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.238
> diff -u -p -r1.238 uipc_socket.c
> --- kern/uipc_socket.c 31 Dec 2019 13:48:32 -0000 1.238
> +++ kern/uipc_socket.c 9 Jan 2020 14:42:18 -0000
> @@ -304,7 +304,7 @@ soclose(struct socket *so, int flags)
> while (so->so_state & SS_ISCONNECTED) {
> error = sosleep(so, &so->so_timeo,
> PSOCK | PCATCH, "netcls",
> - so->so_linger * hz);
> + SEC_TO_NSEC(so->so_linger));
> if (error)
> break;
> }
> @@ -1761,22 +1761,19 @@ sosetopt(struct socket *so, int level, i
> case SO_RCVTIMEO:
> {
> struct timeval tv;
> - int val;
> + uint64_t nsecs;
>
> if (m == NULL || m->m_len < sizeof (tv))
> return (EINVAL);
> memcpy(&tv, mtod(m, struct timeval *), sizeof tv);
> - val = tvtohz(&tv);
> - if (val > USHRT_MAX)
> - return (EDOM);
> -
> + nsecs = TIMEVAL_TO_NSEC(&tv);
First:
bluhm@ mentions that we ought to keep the range check. I wasn't sure
about whether to keep it.
But okay, say we keep it. I wonder if TIMEVAL_TO_NSEC() (and
TIMESPEC_TO_NSEC() from your other diff, too) should have a second,
optional parameter that, if non-NULL, is set when the input overflows
a uint64_t.
So you'd do:
uint64_t oflow, nsecs;
if (m == NULL || m->m_len < sizeof (tv))
return (EINVAL);
memcpy(&tv, mtod(m, struct timeval *), sizeof tv);
if (!timerisvalid(&tv))
return (EINVAL);
nsecs = TIMEVAL_TO_NSEC(&tv, &oflow);
if (oflow)
return (EDOM);
in this case. Or if you didn't care about overflow you'd do
nsecs = TIMEVAL_TO_NSEC(&tv, NULL);
Second:
In the current code, an unset timeval, i.e. timerisset(&tv) == 0,
translates to zero ticks, which means "no timeout".
We need to check for that special case adjust accordingly. So in
total the whole input conversion would be something like this:
uint64_t oflow, nsecs;
if (m == NULL || m->m_len < sizeof (tv))
return (EINVAL);
memcpy(&tv, mtod(m, struct timeval *), sizeof tv);
if (!timerisvalid(&tv))
return (EINVAL);
nsecs = TIMEVAL_TO_NSEC(&tv, &oflow);
if (oflow || nsecs == INFSLP)
return (EDOM);
if (nsecs == 0)
nsecs = INFSLP;
> switch (optname) {
>
> case SO_SNDTIMEO:
> - so->so_snd.sb_timeo = val;
> + so->so_snd.sb_nsecs = nsecs;
> break;
> case SO_RCVTIMEO:
> - so->so_rcv.sb_timeo = val;
> + so->so_rcv.sb_nsecs = nsecs;
> break;
> }
> break;
> @@ -1910,13 +1907,11 @@ sogetopt(struct socket *so, int level, i
> case SO_RCVTIMEO:
> {
> struct timeval tv;
> - int val = (optname == SO_SNDTIMEO ?
> - so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
> + uint64_t nsecs = (optname == SO_SNDTIMEO ?
> + so->so_snd.sb_nsecs : so->so_rcv.sb_nsecs);
>
> m->m_len = sizeof(struct timeval);
> - memset(&tv, 0, sizeof(tv));
> - tv.tv_sec = val / hz;
> - tv.tv_usec = (val % hz) * tick;
> + NSEC_TO_TIMEVAL(nsecs, &tv);
Much nicer, though again we need to account for the INFSLP case:
if (nsecs != INFSLP)
NSEC_TO_TIMEVAL(nsecs, &tv);
If nsecs == INFSLP we can just leave it, as we just zero'd the timeval
with memset(9).
> memcpy(mtod(m, struct timeval *), &tv, sizeof tv);
> break;
> }
> @@ -2129,7 +2124,7 @@ sobuf_print(struct sockbuf *sb,
> (*pr)("\tsb_sel: ...\n");
> (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr);
> (*pr)("\tsb_flags: %i\n", sb->sb_flags);
> - (*pr)("\tsb_timeo: %i\n", sb->sb_timeo);
> + (*pr)("\tsb_nsecs: %llu\n", sb->sb_nsecs);
> }
>
> void
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.101
> diff -u -p -r1.101 uipc_socket2.c
> --- kern/uipc_socket2.c 16 Apr 2019 13:15:32 -0000 1.101
> +++ kern/uipc_socket2.c 8 Jan 2020 16:34:41 -0000
> @@ -181,10 +181,10 @@ sonewconn(struct socket *head, int conns
> }
> so->so_snd.sb_wat = head->so_snd.sb_wat;
> so->so_snd.sb_lowat = head->so_snd.sb_lowat;
> - so->so_snd.sb_timeo = head->so_snd.sb_timeo;
> + so->so_snd.sb_nsecs = head->so_snd.sb_nsecs;
> so->so_rcv.sb_wat = head->so_rcv.sb_wat;
> so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
> - so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
> + so->so_rcv.sb_nsecs = head->so_rcv.sb_nsecs;
>
> sigio_init(&so->so_sigio);
> sigio_copy(&so->so_sigio, &head->so_sigio);
> @@ -332,14 +332,15 @@ soassertlocked(struct socket *so)
> }
>
> int
> -sosleep(struct socket *so, void *ident, int prio, const char *wmesg, int
> timo)
> +sosleep(struct socket *so, void *ident, int prio, const char *wmesg,
> + uint64_t nsecs)
> {
> if ((so->so_proto->pr_domain->dom_family != PF_UNIX) &&
> (so->so_proto->pr_domain->dom_family != PF_ROUTE) &&
> (so->so_proto->pr_domain->dom_family != PF_KEY)) {
> - return rwsleep(ident, &netlock, prio, wmesg, timo);
> + return rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
> } else
> - return tsleep(ident, prio, wmesg, timo);
> + return tsleep_nsec(ident, prio, wmesg, nsecs);
> }
>
> /*
> @@ -353,7 +354,7 @@ sbwait(struct socket *so, struct sockbuf
> soassertlocked(so);
>
> sb->sb_flags |= SB_WAIT;
> - return (sosleep(so, &sb->sb_cc, prio, "netio", sb->sb_timeo));
> + return (sosleep(so, &sb->sb_cc, prio, "netio", sb->sb_nsecs));
> }
>
> int
> @@ -372,7 +373,7 @@ sblock(struct socket *so, struct sockbuf
>
> while (sb->sb_flags & SB_LOCK) {
> sb->sb_flags |= SB_WANT;
> - error = sosleep(so, &sb->sb_flags, prio, "netlck", 0);
> + error = sosleep(so, &sb->sb_flags, prio, "netlck", INFSLP);
> if (error)
> return (error);
> }
>
> Index: nfs/nfs_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/nfs/nfs_socket.c,v
> retrieving revision 1.134
> diff -u -p -r1.134 nfs_socket.c
> --- nfs/nfs_socket.c 5 Dec 2019 10:41:57 -0000 1.134
> +++ nfs/nfs_socket.c 9 Jan 2020 14:40:15 -0000
> @@ -333,11 +333,11 @@ nfs_connect(struct nfsmount *nmp, struct
> * Always set receive timeout to detect server crash and reconnect.
> * Otherwise, we can get stuck in soreceive forever.
> */
> - so->so_rcv.sb_timeo = (5 * hz);
> + so->so_rcv.sb_nsecs = SEC_TO_NSEC(5);
> if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT))
> - so->so_snd.sb_timeo = (5 * hz);
> + so->so_snd.sb_nsecs = SEC_TO_NSEC(5);
> else
> - so->so_snd.sb_timeo = 0;
> + so->so_snd.sb_nsecs = 0;
> if (nmp->nm_sotype == SOCK_DGRAM) {
> sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
> rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
> Index: nfs/nfs_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/nfs/nfs_syscalls.c,v
> retrieving revision 1.115
> diff -u -p -r1.115 nfs_syscalls.c
> --- nfs/nfs_syscalls.c 5 Dec 2019 10:41:57 -0000 1.115
> +++ nfs/nfs_syscalls.c 9 Jan 2020 14:39:50 -0000
> @@ -276,9 +276,9 @@ nfssvc_addsock(struct file *fp, struct m
> m_freem(m);
> }
> so->so_rcv.sb_flags &= ~SB_NOINTR;
> - so->so_rcv.sb_timeo = 0;
> + so->so_rcv.sb_nsecs = 0;
> so->so_snd.sb_flags &= ~SB_NOINTR;
> - so->so_snd.sb_timeo = 0;
> + so->so_snd.sb_nsecs = 0;
0 means "no timeout" for the original interfaces, so I think these
need to be set to INFSLP.
> sounlock(so, s);
> if (tslp)
> slp = tslp;
> Index: sys/socketvar.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.90
> diff -u -p -r1.90 socketvar.h
> --- sys/socketvar.h 12 Dec 2019 16:33:02 -0000 1.90
> +++ sys/socketvar.h 8 Jan 2020 16:35:17 -0000
> @@ -110,7 +110,7 @@ struct socket {
> struct mbuf *sb_mbtail; /* the last mbuf in the chain */
> struct mbuf *sb_lastrecord;/* first mbuf of last record in
> socket buffer */
> - u_short sb_timeo; /* timeout for read/write */
> + uint64_t sb_nsecs; /* timeout for read/write */
Maybe this member needs to be stuffed somewhere else in the struct for
optimal packing or alignment?
I'll let others decide on an appropriate name for the member, though I
agree in general we should avoid "timo" and "timeo" because they are
traditionally used for counts of ticks.
> short sb_flags; /* flags, see below */
> /* End area that is zeroed on flush. */
> #define sb_endzero sb_flags
> @@ -338,7 +338,7 @@ void sorwakeup(struct socket *);
> void sowwakeup(struct socket *);
> int sockargs(struct mbuf **, const void *, size_t, int);
>
> -int sosleep(struct socket *, void *, int, const char *, int);
> +int sosleep(struct socket *, void *, int, const char *, uint64_t);
> int solock(struct socket *);
> void sounlock(struct socket *, int);
>
> Index: sys/time.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/time.h,v
> retrieving revision 1.48
> diff -u -p -r1.48 time.h
> --- sys/time.h 1 Jan 2020 14:09:59 -0000 1.48
> +++ sys/time.h 9 Jan 2020 14:47:17 -0000
> @@ -332,6 +332,8 @@ void clock_secs_to_ymdhms(time_t, struct
> /* Traditional POSIX base year */
> #define POSIX_BASE_YEAR 1970
>
> +#include <sys/stdint.h>
> +
> static inline void
> NSEC_TO_TIMEVAL(uint64_t ns, struct timeval *tv)
> {
> @@ -339,6 +341,14 @@ NSEC_TO_TIMEVAL(uint64_t ns, struct time
> tv->tv_usec = (ns % 1000000000L) / 1000;
> }
>
> +static inline uint64_t
> +TIMEVAL_TO_NSEC(const struct timeval *tv)
> +{
> + if (tv->tv_sec > (UINT64_MAX - tv->tv_usec * 1000ULL) / 1000000000ULL)
> + return UINT64_MAX;
> + return tv->tv_sec * 1000000000ULL + tv->tv_usec * 1000ULL;
> +}
> +
At the risk of bikeshedding I find this easier to follow:
static inline uint64_t
TIMEVAL_TO_NSEC(const struct timeval *tv)
{
uint64_t nsecs;
if (tv->tv_sec > UINT64_MAX / 1000000000ULL) {
return UINT64_MAX;
nsecs = tv->tv_sec * 1000000000ULL;
if (tv->tv_usec * 1000ULL > UINT64_MAX - nsecs)
return UINT64_MAX;
return nsecs + tv->tv_usec * 1000ULL;
}
> static inline void
> NSEC_TO_TIMESPEC(uint64_t ns, struct timespec *ts)
> {
> @@ -346,8 +356,6 @@ NSEC_TO_TIMESPEC(uint64_t ns, struct tim
> ts->tv_nsec = ns % 1000000000L;
> }
>
> -#include <sys/stdint.h>
> -
> static inline uint64_t
> SEC_TO_NSEC(uint64_t seconds)
> {
>