The branch main has been updated by glebius:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=07285bb4c22c026a50f69149d5dae03169b15fe4

commit 07285bb4c22c026a50f69149d5dae03169b15fe4
Author:     Gleb Smirnoff <[email protected]>
AuthorDate: 2022-08-10 18:09:34 +0000
Commit:     Gleb Smirnoff <[email protected]>
CommitDate: 2022-08-10 18:09:34 +0000

    tcp: utilize new solisten_clone() and solisten_enqueue()
    
    This streamlines cloning of a socket from a listener.  Now we do not
    drop the inpcb lock during creation of a new socket, do not do useless
    state transitions, and put a fully initialized socket+inpcb+tcpcb into
    the listen queue.
    
    Before this change, first we would allocate the socket and inpcb+tcpcb via
    tcp_usr_attach() as TCPS_CLOSED, link them into global list of pcbs, unlock
    pcb and put this onto incomplete queue (see 6f3caa6d815).  Then, after
    sonewconn() we would lock it again, transition into TCPS_SYN_RECEIVED,
    insert into inpcb hash, finalize initialization of tcpcb.  And then, in
    call into tcp_do_segment() and upon transition to TCPS_ESTABLISHED call
    soisconnected().  This call would lock the listening socket once again
    with a LOR protection sequence and then we would relocate the socket onto
    the complete queue and only now it is ready for accept(2).
    
    Reviewed by:            rrs, tuexen
    Differential revision:  https://reviews.freebsd.org/D36064
---
 sys/kern/uipc_socket.c     |   2 +-
 sys/netinet/tcp_syncache.c | 130 ++++++++++++++++++++-------------------------
 sys/netinet/tcp_usrreq.c   |   8 ++-
 sys/sys/socketvar.h        |   1 +
 4 files changed, 64 insertions(+), 77 deletions(-)

diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 73725efceb2d..e9fe4dfd7feb 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -455,7 +455,7 @@ soalloc(struct vnet *vnet)
  * locks, labels, etc.  All protocol state is assumed already to have been
  * torn down (and possibly never set up) by the caller.
  */
-static void
+void
 sodealloc(struct socket *so)
 {
 
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index ed5981bf85d7..df900280e3df 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -804,44 +804,25 @@ syncache_socket(struct syncache *sc, struct socket *lso, 
struct mbuf *m)
        /*
         * Ok, create the full blown connection, and set things up
         * as they would have been set up if we had created the
-        * connection when the SYN arrived.  If we can't create
-        * the connection, abort it.
+        * connection when the SYN arrived.
         */
-       so = sonewconn(lso, 0);
-       if (so == NULL) {
-               /*
-                * Drop the connection; we will either send a RST or
-                * have the peer retransmit its SYN again after its
-                * RTO and try again.
-                */
-               TCPSTAT_INC(tcps_listendrop);
-               if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
-                       log(LOG_DEBUG, "%s; %s: Socket create failed "
-                           "due to limits or memory shortage\n",
-                           s, __func__);
-                       free(s, M_TCPLOG);
-               }
-               goto abort2;
-       }
+       if ((so = solisten_clone(lso)) == NULL)
+               goto allocfail;
 #ifdef MAC
        mac_socketpeer_set_from_mbuf(m, so);
 #endif
-
+       error = in_pcballoc(so, &V_tcbinfo);
+       if (error) {
+               sodealloc(so);
+               goto allocfail;
+       }
        inp = sotoinpcb(so);
-       inp->inp_inc.inc_fibnum = so->so_fibnum;
-       INP_WLOCK(inp);
-       /*
-        * Exclusive pcbinfo lock is not required in syncache socket case even
-        * if two inpcb locks can be acquired simultaneously:
-        *  - the inpcb in LISTEN state,
-        *  - the newly created inp.
-        *
-        * In this case, an inp cannot be at same time in LISTEN state and
-        * just created by an accept() call.
-        */
-       INP_HASH_WLOCK(&V_tcbinfo);
-
-       /* Insert new socket into PCB hash list. */
+       if ((tp = tcp_newtcpcb(inp)) == NULL) {
+               in_pcbdetach(inp);
+               in_pcbfree(inp);
+               sodealloc(so);
+               goto allocfail;
+       }
        inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
 #ifdef INET6
        if (sc->sc_inc.inc_flags & INC_ISIPV6) {
@@ -904,16 +885,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, 
struct mbuf *m)
                laddr6 = inp->in6p_laddr;
                if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
                        inp->in6p_laddr = sc->sc_inc.inc6_laddr;
-               if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
-                   thread0.td_ucred, m, false)) != 0) {
+               INP_HASH_WLOCK(&V_tcbinfo);
+               error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
+                   thread0.td_ucred, m, false);
+               INP_HASH_WUNLOCK(&V_tcbinfo);
+               if (error != 0) {
                        inp->in6p_laddr = laddr6;
-                       if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) 
{
-                               log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
-                                   "with error %i\n",
-                                   s, __func__, error);
-                               free(s, M_TCPLOG);
-                       }
-                       INP_HASH_WUNLOCK(&V_tcbinfo);
                        goto abort;
                }
                /* Override flowlabel from in6_pcbconnect. */
@@ -944,16 +921,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, 
struct mbuf *m)
                laddr = inp->inp_laddr;
                if (inp->inp_laddr.s_addr == INADDR_ANY)
                        inp->inp_laddr = sc->sc_inc.inc_laddr;
-               if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin,
-                   thread0.td_ucred, false)) != 0) {
+               INP_HASH_WLOCK(&V_tcbinfo);
+               error = in_pcbconnect(inp, (struct sockaddr *)&sin,
+                   thread0.td_ucred, false);
+               INP_HASH_WUNLOCK(&V_tcbinfo);
+               if (error != 0) {
                        inp->inp_laddr = laddr;
-                       if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) 
{
-                               log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
-                                   "with error %i\n",
-                                   s, __func__, error);
-                               free(s, M_TCPLOG);
-                       }
-                       INP_HASH_WUNLOCK(&V_tcbinfo);
                        goto abort;
                }
        }
@@ -963,9 +936,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, 
struct mbuf *m)
        if (ipsec_copy_pcbpolicy(sotoinpcb(lso), inp) != 0)
                printf("syncache_socket: could not copy policy\n");
 #endif
-       INP_HASH_WUNLOCK(&V_tcbinfo);
-       tp = intotcpcb(inp);
-       tcp_state_change(tp, TCPS_SYN_RECEIVED);
+       tp->t_state = TCPS_SYN_RECEIVED;
        tp->iss = sc->sc_iss;
        tp->irs = sc->sc_irs;
        tp->t_port = sc->sc_port;
@@ -1066,13 +1037,37 @@ syncache_socket(struct syncache *sc, struct socket 
*lso, struct mbuf *m)
        tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 
        TCPSTAT_INC(tcps_accepts);
+       TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, TCPS_LISTEN);
+
+       solisten_enqueue(so, SS_ISCONNECTED);
+
        return (so);
 
+allocfail:
+       /*
+        * Drop the connection; we will either send a RST or have the peer
+        * retransmit its SYN again after its RTO and try again.
+        */
+       if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+               log(LOG_DEBUG, "%s; %s: Socket create failed "
+                   "due to limits or memory shortage\n",
+                   s, __func__);
+               free(s, M_TCPLOG);
+       }
+       TCPSTAT_INC(tcps_listendrop);
+       return (NULL);
+
 abort:
-       INP_WUNLOCK(inp);
-abort2:
-       if (so != NULL)
-               soabort(so);
+       in_pcbdetach(inp);
+       in_pcbfree(inp);
+       sodealloc(so);
+       if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+               log(LOG_DEBUG, "%s; %s: in%s_pcbconnect failed with error %i\n",
+                   s, __func__, (sc->sc_inc.inc_flags & INC_ISIPV6) ? "6" : "",
+                   error);
+               free(s, M_TCPLOG);
+       }
+       TCPSTAT_INC(tcps_listendrop);
        return (NULL);
 }
 
@@ -1176,6 +1171,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt 
*to, struct tcphdr *th,
                        return (-1); /* Do not send RST */
                }
 #endif /* TCP_SIGNATURE */
+               TCPSTATES_INC(TCPS_SYN_RECEIVED);
        } else {
                if (sc->sc_port != port) {
                        SCH_UNLOCK(sch);
@@ -1282,17 +1278,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt 
*to, struct tcphdr *th,
                                return (-1);  /* Do not send RST */
                        }
                }
-
-               /*
-                * Pull out the entry to unlock the bucket row.
-                *
-                * NOTE: We must decrease TCPS_SYN_RECEIVED count here, not
-                * tcp_state_change().  The tcpcb is not existent at this
-                * moment.  A new one will be allocated via syncache_socket->
-                * sonewconn->tcp_usr_attach in TCPS_CLOSED state, then
-                * syncache_socket() will change it to TCPS_SYN_RECEIVED.
-                */
-               TCPSTATES_DEC(TCPS_SYN_RECEIVED);
                TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
                sch->sch_length--;
 #ifdef TCP_OFFLOAD
@@ -1340,8 +1325,11 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt 
*to, struct tcphdr *th,
                syncache_free(sc);
        return (1);
 failed:
-       if (sc != NULL && sc != &scs)
-               syncache_free(sc);
+       if (sc != NULL) {
+               TCPSTATES_DEC(TCPS_SYN_RECEIVED);
+               if (sc != &scs)
+                       syncache_free(sc);
+       }
        if (s != NULL)
                free(s, M_TCPLOG);
        *lsop = NULL;
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 9280e4310bc3..d77f37efad7c 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -172,11 +172,9 @@ tcp_usr_attach(struct socket *so, int proto, struct thread 
*td)
        KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
        TCPDEBUG1();
 
-       if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
-               error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
-               if (error)
-                       goto out;
-       }
+       error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
+       if (error)
+               goto out;
 
        so->so_rcv.sb_flags |= SB_AUTOSIZE;
        so->so_snd.sb_flags |= SB_AUTOSIZE;
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index ed797c6d6239..678642eeff6d 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -498,6 +498,7 @@ int soreceive_generic(struct socket *so, struct sockaddr 
**paddr,
            struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
            int *flagsp);
 void   sorele_locked(struct socket *so);
+void   sodealloc(struct socket *);
 int    soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
 void   sorflush(struct socket *so);
 int    sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,

Reply via email to