Module Name: src Committed By: thorpej Date: Sat Oct 23 01:28:34 UTC 2021
Modified Files: src/distrib/sets/lists/debug: mi src/distrib/sets/lists/tests: mi src/lib/libc/sys: kqueue.2 src/sys/kern: kern_event.c uipc_socket.c src/sys/sys: event.h socketvar.h src/tests/kernel/kqueue: Makefile Added Files: src/tests/kernel/kqueue: t_empty.c Log Message: Add support for the EVFILT_EMPTY filter, which is activated when the write buffer associated with the file descriptor is empty. This is currently implemented only for sockets, and is intended primarily to provide visibility to applications that all previously written data has been acknowledged by the TCP layer on the receiver. Compatible with the same filter in FreeBSD. To generate a diff of this commit: cvs rdiff -u -r1.365 -r1.366 src/distrib/sets/lists/debug/mi cvs rdiff -u -r1.1153 -r1.1154 src/distrib/sets/lists/tests/mi cvs rdiff -u -r1.56 -r1.57 src/lib/libc/sys/kqueue.2 cvs rdiff -u -r1.136 -r1.137 src/sys/kern/kern_event.c cvs rdiff -u -r1.299 -r1.300 src/sys/kern/uipc_socket.c cvs rdiff -u -r1.50 -r1.51 src/sys/sys/event.h cvs rdiff -u -r1.163 -r1.164 src/sys/sys/socketvar.h cvs rdiff -u -r1.7 -r1.8 src/tests/kernel/kqueue/Makefile cvs rdiff -u -r0 -r1.1 src/tests/kernel/kqueue/t_empty.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/distrib/sets/lists/debug/mi diff -u src/distrib/sets/lists/debug/mi:1.365 src/distrib/sets/lists/debug/mi:1.366 --- src/distrib/sets/lists/debug/mi:1.365 Wed Oct 13 04:57:19 2021 +++ src/distrib/sets/lists/debug/mi Sat Oct 23 01:28:33 2021 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.365 2021/10/13 04:57:19 thorpej Exp $ +# $NetBSD: mi,v 1.366 2021/10/23 01:28:33 thorpej Exp $ ./etc/mtree/set.debug comp-sys-root ./usr/lib comp-sys-usr compatdir ./usr/lib/i18n/libBIG5_g.a comp-c-debuglib debuglib,compatfile @@ -1760,6 +1760,7 @@ ./usr/libdata/debug/usr/tests/kernel/kqueue/read/t_file2.debug tests-kernel-tests debug,atf,compattestfile ./usr/libdata/debug/usr/tests/kernel/kqueue/read/t_pipe.debug tests-kernel-tests debug,atf,compattestfile ./usr/libdata/debug/usr/tests/kernel/kqueue/read/t_ttypty.debug tests-kernel-tests debug,atf,compattestfile +./usr/libdata/debug/usr/tests/kernel/kqueue/t_empty.debug tests-kernel-tests debug,atf,compattestfile ./usr/libdata/debug/usr/tests/kernel/kqueue/t_ioctl.debug tests-kernel-tests debug,atf,compattestfile ./usr/libdata/debug/usr/tests/kernel/kqueue/t_proc1.debug tests-kernel-tests debug,atf,compattestfile ./usr/libdata/debug/usr/tests/kernel/kqueue/t_proc2.debug tests-kernel-tests debug,atf,compattestfile Index: src/distrib/sets/lists/tests/mi diff -u src/distrib/sets/lists/tests/mi:1.1153 src/distrib/sets/lists/tests/mi:1.1154 --- src/distrib/sets/lists/tests/mi:1.1153 Fri Oct 22 22:31:55 2021 +++ src/distrib/sets/lists/tests/mi Sat Oct 23 01:28:34 2021 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.1153 2021/10/22 22:31:55 rillig Exp $ +# $NetBSD: mi,v 1.1154 2021/10/23 01:28:34 thorpej Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. # @@ -2180,6 +2180,7 @@ ./usr/tests/kernel/kqueue/read/t_file2 tests-kernel-tests compattestfile,atf ./usr/tests/kernel/kqueue/read/t_pipe tests-kernel-tests compattestfile,atf ./usr/tests/kernel/kqueue/read/t_ttypty tests-kernel-tests compattestfile,atf +./usr/tests/kernel/kqueue/t_empty tests-kernel-tests compattestfile,atf ./usr/tests/kernel/kqueue/t_ioctl tests-kernel-tests compattestfile,atf ./usr/tests/kernel/kqueue/t_proc1 tests-kernel-tests compattestfile,atf ./usr/tests/kernel/kqueue/t_proc2 tests-kernel-tests compattestfile,atf Index: src/lib/libc/sys/kqueue.2 diff -u src/lib/libc/sys/kqueue.2:1.56 src/lib/libc/sys/kqueue.2:1.57 --- src/lib/libc/sys/kqueue.2:1.56 Wed Oct 20 03:26:20 2021 +++ src/lib/libc/sys/kqueue.2 Sat Oct 23 01:28:34 2021 @@ -1,4 +1,4 @@ -.\" $NetBSD: kqueue.2,v 1.56 2021/10/20 03:26:20 thorpej Exp $ +.\" $NetBSD: kqueue.2,v 1.57 2021/10/23 01:28:34 thorpej Exp $ .\" .\" Copyright (c) 2000 Jonathan Lemon .\" All rights reserved. @@ -32,7 +32,7 @@ .\" .\" $FreeBSD: src/lib/libc/sys/kqueue.2,v 1.22 2001/06/27 19:55:57 dd Exp $ .\" -.Dd October 15, 2021 +.Dd October 22, 2021 .Dt KQUEUE 2 .Os .Sh NAME @@ -406,6 +406,13 @@ Note that this filter is not supported f .Pp For sockets, the low water mark and socket error handling is identical to the EVFILT_READ case. +.It Dv EVFILT_EMPTY +Takes a descriptor as the identifier, and returns whenever +there is no remaining data in the write buffer. +This is currently implemented only for sockets. +It's primary purpose is to provide visibility to an application that all +previously written data has been acknowledged by the receiver at the TCP +layer. .It Dv EVFILT_AIO This is not implemented in .Nx . @@ -871,3 +878,8 @@ filter flags for .Dv EVFILT_VNODE was added in .Nx 10.0 . +.Pp +Support for +.Dv EVFILT_EMPTY +was added in +.Nx 10.0 . Index: src/sys/kern/kern_event.c diff -u src/sys/kern/kern_event.c:1.136 src/sys/kern/kern_event.c:1.137 --- src/sys/kern/kern_event.c:1.136 Fri Oct 22 04:49:24 2021 +++ src/sys/kern/kern_event.c Sat Oct 23 01:28:33 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_event.c,v 1.136 2021/10/22 04:49:24 thorpej Exp $ */ +/* $NetBSD: kern_event.c,v 1.137 2021/10/23 01:28:33 thorpej Exp $ */ /*- * Copyright (c) 2008, 2009, 2021 The NetBSD Foundation, Inc. @@ -63,7 +63,7 @@ #endif /* _KERNEL_OPT */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.136 2021/10/22 04:49:24 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.137 2021/10/23 01:28:33 thorpej Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -212,6 +212,7 @@ static struct kfilter sys_kfilters[] = { { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 }, { "EVFILT_FS", EVFILT_FS, 0, &fs_filtops, 0 }, { "EVFILT_USER", EVFILT_USER, 0, &user_filtops, 0 }, + { "EVFILT_EMPTY", EVFILT_EMPTY, 0, &file_filtops, 0 }, { NULL, 0, 0, NULL, 0 }, }; Index: src/sys/kern/uipc_socket.c diff -u src/sys/kern/uipc_socket.c:1.299 src/sys/kern/uipc_socket.c:1.300 --- src/sys/kern/uipc_socket.c:1.299 Mon Oct 11 01:07:36 2021 +++ src/sys/kern/uipc_socket.c Sat Oct 23 01:28:33 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.299 2021/10/11 01:07:36 thorpej Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.300 2021/10/23 01:28:33 thorpej Exp $ */ /* * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -71,7 +71,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.299 2021/10/11 01:07:36 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.300 2021/10/23 01:28:33 thorpej Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" @@ -2297,6 +2297,22 @@ filt_sowrite(struct knote *kn, long hint return rv; } +static int +filt_soempty(struct knote *kn, long hint) +{ + struct socket *so; + int rv; + + so = ((file_t *)kn->kn_obj)->f_socket; + if (hint != NOTE_SUBMIT) + solock(so); + rv = (kn->kn_data = sbused(&so->so_snd)) == 0 || + (so->so_options & SO_ACCEPTCONN) != 0; + if (hint != NOTE_SUBMIT) + sounlock(so); + return rv; +} + /*ARGSUSED*/ static int filt_solisten(struct knote *kn, long hint) @@ -2340,6 +2356,13 @@ static const struct filterops sowrite_fi .f_event = filt_sowrite, }; +static const struct filterops soempty_filtops = { + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, + .f_attach = NULL, + .f_detach = filt_sowdetach, + .f_event = filt_soempty, +}; + int soo_kqfilter(struct file *fp, struct knote *kn) { @@ -2360,6 +2383,10 @@ soo_kqfilter(struct file *fp, struct kno kn->kn_fop = &sowrite_filtops; sb = &so->so_snd; break; + case EVFILT_EMPTY: + kn->kn_fop = &soempty_filtops; + sb = &so->so_snd; + break; default: sounlock(so); return EINVAL; Index: src/sys/sys/event.h diff -u src/sys/sys/event.h:1.50 src/sys/sys/event.h:1.51 --- src/sys/sys/event.h:1.50 Thu Oct 21 02:34:04 2021 +++ src/sys/sys/event.h Sat Oct 23 01:28:33 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: event.h,v 1.50 2021/10/21 02:34:04 thorpej Exp $ */ +/* $NetBSD: event.h,v 1.51 2021/10/23 01:28:33 thorpej Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon <jle...@freebsd.org> @@ -45,7 +45,8 @@ #define EVFILT_TIMER 6U /* arbitrary timer (in ms) */ #define EVFILT_FS 7U /* filesystem events */ #define EVFILT_USER 8U /* user events */ -#define EVFILT_SYSCOUNT 9U /* number of filters */ +#define EVFILT_EMPTY 9U +#define EVFILT_SYSCOUNT 10U /* number of filters */ #ifdef EVFILT_NAMES static const char *evfiltnames[] = { @@ -58,6 +59,7 @@ static const char *evfiltnames[] = { "EVFILT_TIMER", "EVFILT_FS", "EVFILT_USER", + "EVFILT_EMPTY", }; #endif Index: src/sys/sys/socketvar.h diff -u src/sys/sys/socketvar.h:1.163 src/sys/sys/socketvar.h:1.164 --- src/sys/sys/socketvar.h:1.163 Mon Nov 23 00:52:53 2020 +++ src/sys/sys/socketvar.h Sat Oct 23 01:28:33 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: socketvar.h,v 1.163 2020/11/23 00:52:53 chs Exp $ */ +/* $NetBSD: socketvar.h,v 1.164 2021/10/23 01:28:33 thorpej Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -424,6 +424,17 @@ sbspace_oob(const struct sockbuf *sb) return lmin(hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt); } +/* + * How much socket buffer space has been used? + */ +static __inline u_long +sbused(const struct sockbuf *sb) +{ + + KASSERT(solocked(sb->sb_so)); + return sb->sb_cc; +} + /* do we have to send all at once on a socket? */ static __inline int sosendallatonce(const struct socket *so) Index: src/tests/kernel/kqueue/Makefile diff -u src/tests/kernel/kqueue/Makefile:1.7 src/tests/kernel/kqueue/Makefile:1.8 --- src/tests/kernel/kqueue/Makefile:1.7 Wed Oct 13 04:57:19 2021 +++ src/tests/kernel/kqueue/Makefile Sat Oct 23 01:28:33 2021 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.7 2021/10/13 04:57:19 thorpej Exp $ +# $NetBSD: Makefile,v 1.8 2021/10/23 01:28:33 thorpej Exp $ WARNS?=6 NOMAN= # defined @@ -10,7 +10,8 @@ TESTSDIR= ${TESTSBASE}/kernel/kqueue TESTS_SUBDIRS= read TESTS_SUBDIRS+= write -TESTS_C= t_ioctl +TESTS_C= t_empty +TESTS_C+= t_ioctl TESTS_C+= t_proc1 TESTS_C+= t_proc2 TESTS_C+= t_proc3 Added files: Index: src/tests/kernel/kqueue/t_empty.c diff -u /dev/null src/tests/kernel/kqueue/t_empty.c:1.1 --- /dev/null Sat Oct 23 01:28:34 2021 +++ src/tests/kernel/kqueue/t_empty.c Sat Oct 23 01:28:33 2021 @@ -0,0 +1,178 @@ +/* $NetBSD: t_empty.c,v 1.1 2021/10/23 01:28:33 thorpej Exp $ */ + +/*- + * Copyright (c) 2021 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: t_empty.c,v 1.1 2021/10/23 01:28:33 thorpej Exp $"); + +#include <sys/event.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> + +#include <netinet/in.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <atf-c.h> + +static void +test_empty(int readfd, int writefd, bool is_tcp) +{ + struct timespec ts = { 0, 0 }; + struct kevent event; + int kq, error, sndbufsize; + char buf[1024] = { 0 }; + ssize_t rv; + + ATF_REQUIRE((kq = kqueue()) >= 0); + + EV_SET(&event, writefd, EVFILT_EMPTY, EV_ADD, 0, 0, NULL); + ATF_REQUIRE(kevent(kq, &event, 1, NULL, 0, NULL) == 0); + + /* Check that EMPTY is true. */ + memset(&event, 0, sizeof(event)); + ATF_REQUIRE(kevent(kq, NULL, 0, &event, 1, &ts) == 1); + ATF_REQUIRE(event.ident == (uintptr_t)writefd); + ATF_REQUIRE(event.filter == EVFILT_EMPTY); + + if (is_tcp) { + /* + * Get the write socket buffer size so that we can set + * the read socket buffer size to something larger + * later on. + */ + socklen_t slen = sizeof(sndbufsize); + ATF_REQUIRE(getsockopt(writefd, SOL_SOCKET, + SO_SNDBUF, &sndbufsize, &slen) == 0); + + /* + * Set the receive buffer size to 1, slamming shut + * the TCP receive window, thus trapping all of the + * data in the sender's queue. + */ + int val = 1; + ATF_REQUIRE(setsockopt(readfd, SOL_SOCKET, + SO_RCVBUF, &val, sizeof(val)) == 0); + } + + /* Write until the write buffer is full. */ + for (rv = 0; rv != -1;) { + rv = write(writefd, buf, sizeof(buf)); + error = errno; + ATF_REQUIRE(rv > 0 || (rv == -1 && error == EAGAIN)); + } + + /* Check that EMPTY is false. */ + ATF_REQUIRE(kevent(kq, NULL, 0, &event, 1, &ts) == 0); + + if (is_tcp) { + /* + * Set the receive buffer size to something larger than + * the sender's send buffer. + */ + int val = sndbufsize + 128; + ATF_REQUIRE(setsockopt(readfd, SOL_SOCKET, + SO_RCVBUF, &val, sizeof(val)) == 0); + } + + /* Read all of the data that's available. */ + for (rv = 0; rv != -1;) { + rv = read(readfd, buf, sizeof(buf)); + error = errno; + ATF_REQUIRE(rv > 0 || (rv == -1 && error == EAGAIN)); + } + + /* + * Check that EMPTY is true. Check a few times (TCP might + * not drain immediately). + */ + if (is_tcp) { + for (rv = 0; rv < 5; rv++) { + if (kevent(kq, NULL, 0, &event, 1, &ts) == 1) { + break; + } + } + sleep(1); + } + memset(&event, 0, sizeof(event)); + ATF_REQUIRE(kevent(kq, NULL, 0, &event, 1, &ts) == 1); + ATF_REQUIRE(event.ident == (uintptr_t)writefd); + ATF_REQUIRE(event.filter == EVFILT_EMPTY); +} + +ATF_TC(sock_tcp); +ATF_TC_HEAD(sock_tcp, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Test EVFILT_EMPTY with TCP sockets."); +} + +ATF_TC_BODY(sock_tcp, tc) +{ + int readsock, writesock; + socklen_t slen; + + ATF_REQUIRE((readsock = + socket(PF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)) != -1); + ATF_REQUIRE((writesock = + socket(PF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)) != -1); + + struct sockaddr_in sin = { + .sin_len = sizeof(sin), + .sin_family = AF_INET, + .sin_port = 0, /* no need to swap 0 */ + .sin_addr = { .s_addr = htonl(INADDR_LOOPBACK) }, + }; + ATF_REQUIRE(bind(readsock, (struct sockaddr *)&sin, + sizeof(sin)) == 0); + ATF_REQUIRE(listen(readsock, 1) == 0); + slen = sizeof(sin); + ATF_REQUIRE(getsockname(readsock, (struct sockaddr *)&sin, &slen) == 0); + + ATF_REQUIRE_ERRNO(EINPROGRESS, + connect(writesock, (struct sockaddr *)&sin, sizeof(sin)) == -1); + + slen = sizeof(sin); + ATF_REQUIRE((readsock = accept(readsock, (struct sockaddr *)&sin, + &slen)) != -1); + + test_empty(readsock, writesock, true); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, sock_tcp); + + return atf_no_error(); +}