ping...
> On 26 Oct 2021, at 14:12, Vitaliy Makkoveev <m...@openbsd.org> wrote:
>
> Another step to make UNIX sockets locking fine grained.
>
> The listening socket has the references from file descriptors layer and
> from the vnode(9) layer. This means when we close(2)'ing such socket it
> still referenced by concurrent thread through connect(2) path.
>
> When we bind(2) UNIX socket we link it to vnode(9) by assigning
> `v_socket'. When we connect(2)'ing socket to the socket we previously
> bind(2)'ed we finding it by namei(9) and obtain it's reference through
> `v_socket'. This socket has no extra reference in file descriptor
> layer and could be closed by concurrent thread.
>
> This time we have `unp_lock' rwlock(9) which protects the whole layer
> and the dereference of `v_socket' is safe. But with the fine grained
> locking the `v_socket' will not be protected by global lock. When we
> obtain the vnode(9) by namei(9) in connect(9) or bind(9) paths it is
> already exclusively locked by vlode(9) lock. But in unp_detach() which
> is called on the close(2)'ing socket we assume `unp_lock' protects
> `v_socket'.
>
> I propose to use exclusive vnode(9) lock to protect `v_socket'. With the
> fine grained locking, the `v_socket' dereference in unp_bind() or
> unp_connect() threads will be safe because unp_detach() thread will wait
> the vnode(9) lock release. The vnode referenced by `unp_vnod' has
> reference counter bumped so it's dereference is also safe without
> `unp_lock' held.
>
> The `i_lock' should be take before `unp_lock' and unp_detach() should
> release solock(). To prevent connections on this socket the
> 'SO_ACCEPTCONN' bit cleared in soclose().
>
> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.265
> diff -u -p -r1.265 uipc_socket.c
> --- sys/kern/uipc_socket.c 14 Oct 2021 23:05:10 -0000 1.265
> +++ sys/kern/uipc_socket.c 26 Oct 2021 11:05:59 -0000
> @@ -315,6 +315,8 @@ soclose(struct socket *so, int flags)
> /* Revoke async IO early. There is a final revocation in sofree(). */
> sigio_free(&so->so_sigio);
> if (so->so_options & SO_ACCEPTCONN) {
> + so->so_options &= ~SO_ACCEPTCONN;
> +
> while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
> (void) soqremque(so2, 0);
> (void) soabort(so2);
> Index: sys/kern/uipc_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.150
> diff -u -p -r1.150 uipc_usrreq.c
> --- sys/kern/uipc_usrreq.c 21 Oct 2021 22:11:07 -0000 1.150
> +++ sys/kern/uipc_usrreq.c 26 Oct 2021 11:05:59 -0000
> @@ -474,20 +474,30 @@ void
> unp_detach(struct unpcb *unp)
> {
> struct socket *so = unp->unp_socket;
> - struct vnode *vp = NULL;
> + struct vnode *vp = unp->unp_vnode;
>
> rw_assert_wrlock(&unp_lock);
>
> LIST_REMOVE(unp, unp_link);
> - if (unp->unp_vnode) {
> +
> + if (vp) {
> + unp->unp_vnode = NULL;
> +
> /*
> - * `v_socket' is only read in unp_connect and
> - * unplock prevents concurrent access.
> + * Enforce `i_lock' -> `unp_lock' because fifo
> + * subsystem requires it.
> */
>
> - unp->unp_vnode->v_socket = NULL;
> - vp = unp->unp_vnode;
> - unp->unp_vnode = NULL;
> + sounlock(so, SL_LOCKED);
> +
> + VOP_LOCK(vp, LK_EXCLUSIVE);
> + vp->v_socket = NULL;
> +
> + KERNEL_LOCK();
> + vput(vp);
> + KERNEL_UNLOCK();
> +
> + solock(so);
> }
>
> if (unp->unp_conn)
> @@ -500,21 +510,6 @@ unp_detach(struct unpcb *unp)
> pool_put(&unpcb_pool, unp);
> if (unp_rights)
> task_add(systqmp, &unp_gc_task);
> -
> - if (vp != NULL) {
> - /*
> - * Enforce `i_lock' -> `unplock' because fifo subsystem
> - * requires it. The socket can't be closed concurrently
> - * because the file descriptor reference is
> - * still hold.
> - */
> -
> - sounlock(so, SL_LOCKED);
> - KERNEL_LOCK();
> - vrele(vp);
> - KERNEL_UNLOCK();
> - solock(so);
> - }
> }
>
> int
>