On Mon, Aug 15, 2022 at 04:28:28PM +0300, Vitaliy Makkoveev wrote:
> On Mon, Aug 15, 2022 at 04:20:57PM +0300, Vitaliy Makkoveev wrote:
> > On Mon, Aug 15, 2022 at 02:36:34PM +0200, Alexander Bluhm wrote:
> > > On Mon, Aug 15, 2022 at 02:29:02PM +0300, Vitaliy Makkoveev wrote:
> > > > Nothing special, just get assign `inp' and `tp' from passed socket. I
> > > > want to do this before (*pru_usrreq)() split to avoid huge code
> > > > duplication.
> > > >
> > > > We have "tp might get 0 when using socket splicing" commentary. I
> > > > checked where we set `inp_ppcb' to NULL and found, tcp_close() is the
> > > > only place, and the following in_pcbdetach() sets `so_pcb' to NULL. So
> > > > this commentary looks wrong, because we can't have TCP sockets without
> > > > TCP pcb attached.
> > >
> > > I added the comment and check after a panic in production.
> > >
> > > ----------------------------
> > > revision 1.109
> > > date: 2012/01/03 21:50:12; author: bluhm; state: Exp; lines: +6 -2;
> > > When used with socket splicing, tcp_usrreq() might get called with
> > > a socket that has an inp but tp is NULL. The call stack for that
> > > is tcp_input() tcp_close() soisdisconnected() sorwakeup() somove()
> > > tcp_usrreq(PRU_RCVD). To avoid a NULL dereference, just return in
> > > that case.
> > > ok henning@
> > > ----------------------------
> > >
> > > tcp_input() calls tcp_close() after a reset packet.
> >
>
> May be I misunderstood something, but the socket should be destroyed
> after tcp_close() call:
>
> void
> in_pcbdetach(struct inpcb *inp)
> {
> struct socket *so = inp->inp_socket;
> struct inpcbtable *table = inp->inp_table;
>
> NET_ASSERT_LOCKED();
>
> so->so_pcb = NULL;
> /*
> * As long as the NET_LOCK() is the default lock for Internet
> * sockets, do not release it to not introduce new sleeping
> * points.
> */
> sofree(so, 1);
> /* ... */
> }
>
> So it looks like the use-after-free in the somove() thread.
>
This diff takes a references on sliced sockets and keeps them until
sounslice() called. I don't propose to commit it as is, may be sorele()
should be reworked to call sofree() itself.
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.283
diff -u -p -r1.283 uipc_socket.c
--- sys/kern/uipc_socket.c 15 Aug 2022 09:11:38 -0000 1.283
+++ sys/kern/uipc_socket.c 15 Aug 2022 14:02:37 -0000
@@ -295,15 +295,6 @@ sofree(struct socket *so, int keep_lock)
sounlock(head);
}
- if (persocket) {
- sounlock(so);
- refcnt_finalize(&so->so_refcnt, "sofinal");
- solock(so);
- }
-
- sigio_free(&so->so_sigio);
- klist_free(&so->so_rcv.sb_sel.si_note);
- klist_free(&so->so_snd.sb_sel.si_note);
#ifdef SOCKET_SPLICE
if (so->so_sp) {
if (issplicedback(so)) {
@@ -322,6 +313,14 @@ sofree(struct socket *so, int keep_lock)
}
}
#endif /* SOCKET_SPLICE */
+
+ sounlock(so);
+ refcnt_finalize(&so->so_refcnt, "sofinal");
+ solock(so);
+
+ sigio_free(&so->so_sigio);
+ klist_free(&so->so_rcv.sb_sel.si_note);
+ klist_free(&so->so_snd.sb_sel.si_note);
sbrelease(so, &so->so_snd);
sorflush(so);
if (!keep_lock)
@@ -1329,6 +1328,8 @@ sosplice(struct socket *so, int fd, off_
}
/* Splice so and sosp together. */
+ soref(so);
+ soref(sosp);
so->so_sp->ssp_socket = sosp;
sosp->so_sp->ssp_soback = so;
so->so_splicelen = 0;
@@ -1368,18 +1369,23 @@ sosplice(struct socket *so, int fd, off_
void
sounsplice(struct socket *so, struct socket *sosp, int freeing)
{
+ struct socket *ssp_socket, *ssp_soback;
soassertlocked(so);
task_del(sosplice_taskq, &so->so_splicetask);
timeout_del(&so->so_idleto);
sosp->so_snd.sb_flags &= ~SB_SPLICE;
so->so_rcv.sb_flags &= ~SB_SPLICE;
+ ssp_socket = so->so_sp->ssp_socket;
+ ssp_soback = sosp->so_sp->ssp_soback;
so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
/* Do not wakeup a socket that is about to be freed. */
if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so))
sorwakeup(so);
if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp))
sowwakeup(sosp);
+ sorele(ssp_socket);
+ sorele(ssp_soback);
}
void
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.270
diff -u -p -r1.270 in_pcb.c
--- sys/netinet/in_pcb.c 8 Aug 2022 12:06:30 -0000 1.270
+++ sys/netinet/in_pcb.c 15 Aug 2022 14:02:37 -0000
@@ -582,7 +582,16 @@ in_pcbdetach(struct inpcb *inp)
NET_ASSERT_LOCKED();
+ mtx_enter(&table->inpt_mtx);
+ LIST_REMOVE(inp, inp_lhash);
+ LIST_REMOVE(inp, inp_hash);
+ TAILQ_REMOVE(&table->inpt_queue, inp, inp_queue);
+ table->inpt_count--;
+ mtx_leave(&table->inpt_mtx);
+
so->so_pcb = NULL;
+ inp->inp_socket = NULL;
+
/*
* As long as the NET_LOCK() is the default lock for Internet
* sockets, do not release it to not introduce new sleeping
@@ -608,12 +617,6 @@ in_pcbdetach(struct inpcb *inp)
pf_inp_unlink(inp);
}
#endif
- mtx_enter(&table->inpt_mtx);
- LIST_REMOVE(inp, inp_lhash);
- LIST_REMOVE(inp, inp_hash);
- TAILQ_REMOVE(&table->inpt_queue, inp, inp_queue);
- table->inpt_count--;
- mtx_leave(&table->inpt_mtx);
in_pcbunref(inp);
}