Re: lo(4) loopback LRO and TSO
On July 2, 2023 2:33:41 PM GMT+02:00, Claudio Jeker wrote: >On Sun, Jul 02, 2023 at 02:28:17PM +0200, Alexander Bluhm wrote: >> anyone? > >Was not able to test yet but I like the diff. >Right now this is a noop since LRO is not on by default for lo(4). >Because of that OK claudio@ The diff works fine in my sparc64 setup. ok jan@ >> On Fri, Jun 23, 2023 at 06:06:16PM +0200, Alexander Bluhm wrote: >> > Hi, >> > >> > Claudio@ mentioned the idea to use TSO and LRO on the loopback >> > interface to transfer TCP faster. >> > >> > I see a performance effect with this diff, but more importantly it >> > gives us more test coverage. Currently LRO on lo(4) is default >> > off. >> > >> > Future plan is: >> > - Fix some corner cases for LRO/TSO with TCP path-MTU discovery >> > and IP forwarding when LRO is enabled. >> > - Enable LRO/TSO for lo(4) and ix(4) per default. >> > - Jan@ commits his ixl(4) TSO diff. >> > >> > ok for lo(4) LRO/TSO with default off? >> > >> > bluhm >> > >> > Index: sys/net/if.c >> > === >> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v >> > retrieving revision 1.700 >> > diff -u -p -r1.700 if.c >> > --- sys/net/if.c 12 Jun 2023 21:19:54 - 1.700 >> > +++ sys/net/if.c 23 Jun 2023 15:48:27 - >> > @@ -106,6 +106,9 @@ >> > #ifdef MROUTING >> > #include >> > #endif >> > +#include >> > +#include >> > +#include >> > >> > #ifdef INET6 >> > #include >> > @@ -802,12 +805,29 @@ if_input_local(struct ifnet *ifp, struct >> > * is now incorrect, will be calculated before sending. >> > */ >> >keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT | >> > - M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT); >> > + M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT | >> > + M_TCP_TSO); >> >m_resethdr(m); >> >m->m_flags |= M_LOOP | keepflags; >> >m->m_pkthdr.csum_flags = keepcksum; >> >m->m_pkthdr.ph_ifidx = ifp->if_index; >> >m->m_pkthdr.ph_rtableid = ifp->if_rdomain; >> > + >> > + if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) { >> > + if (ifp->if_mtu > 0 && >> > + ((af == AF_INET && >> > + ISSET(ifp->if_capabilities, IFCAP_TSOv4)) || >> > + (af == AF_INET6 && >> > + ISSET(ifp->if_capabilities, IFCAP_TSOv6 { >> > + tcpstat_inc(tcps_inswlro); >> > + tcpstat_add(tcps_inpktlro, >> > + (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu); >> > + } else { >> > + tcpstat_inc(tcps_inbadlro); >> > + m_freem(m); >> > + return (EPROTONOSUPPORT); >> > + } >> > + } >> > >> >if (ISSET(keepcksum, M_TCP_CSUM_OUT)) >> >m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; >> > Index: sys/net/if_loop.c >> > === >> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v >> > retrieving revision 1.94 >> > diff -u -p -r1.94 if_loop.c >> > --- sys/net/if_loop.c 5 Jun 2023 11:35:46 - 1.94 >> > +++ sys/net/if_loop.c 23 Jun 2023 15:48:27 - >> > @@ -175,7 +175,8 @@ loop_clone_create(struct if_clone *ifc, >> >ifp->if_xflags = IFXF_CLONED; >> >ifp->if_capabilities = IFCAP_CSUM_IPv4 | >> >IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | >> > - IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; >> > + IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 | >> > + IFCAP_LRO; >> >ifp->if_rtrequest = lortrequest; >> >ifp->if_ioctl = loioctl; >> >ifp->if_input = loinput; >> > @@ -281,6 +282,10 @@ loioctl(struct ifnet *ifp, u_long cmd, c >> > >> >switch (cmd) { >> >case SIOCSIFFLAGS: >> > + if (ISSET(ifp->if_xflags, IFXF_LRO)) >> > + SET(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); >> > + else >> > + CLR(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); >> >break; >> > >> >case SIOCSIFADDR: >> > Index: sys/netinet/tcp_usrreq.c >> > === >> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v >> > retrieving revision 1.219 >> > diff -u -p -r1.219 tcp_usrreq.c >> > --- sys/netinet/tcp_usrreq.c 23 May 2023 09:16:16 - 1.219 >> > +++ sys/netinet/tcp_usrreq.c 23 Jun 2023 15:48:27 - >> > @@ -1340,6 +1340,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o >> >ASSIGN(tcps_outhwtso); >> >ASSIGN(tcps_outpkttso); >> >ASSIGN(tcps_outbadtso); >> > + ASSIGN(tcps_inswlro); >> >ASSIGN(tcps_inhwlro); >> >ASSIGN(tcps_inpktlro); >> >ASSIGN(tcps_inbadlro); >> > Index: sys/netinet/tcp_var.h >> > === >> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v >> > retrieving revision 1.167 >> > diff -u -p -r1.167 tcp_var.h >> >
Re: lo(4) loopback LRO and TSO
On Sun, Jul 02, 2023 at 02:28:17PM +0200, Alexander Bluhm wrote: > anyone? Was not able to test yet but I like the diff. Right now this is a noop since LRO is not on by default for lo(4). Because of that OK claudio@ > On Fri, Jun 23, 2023 at 06:06:16PM +0200, Alexander Bluhm wrote: > > Hi, > > > > Claudio@ mentioned the idea to use TSO and LRO on the loopback > > interface to transfer TCP faster. > > > > I see a performance effect with this diff, but more importantly it > > gives us more test coverage. Currently LRO on lo(4) is default > > off. > > > > Future plan is: > > - Fix some corner cases for LRO/TSO with TCP path-MTU discovery > > and IP forwarding when LRO is enabled. > > - Enable LRO/TSO for lo(4) and ix(4) per default. > > - Jan@ commits his ixl(4) TSO diff. > > > > ok for lo(4) LRO/TSO with default off? > > > > bluhm > > > > Index: sys/net/if.c > > === > > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v > > retrieving revision 1.700 > > diff -u -p -r1.700 if.c > > --- sys/net/if.c12 Jun 2023 21:19:54 - 1.700 > > +++ sys/net/if.c23 Jun 2023 15:48:27 - > > @@ -106,6 +106,9 @@ > > #ifdef MROUTING > > #include > > #endif > > +#include > > +#include > > +#include > > > > #ifdef INET6 > > #include > > @@ -802,12 +805,29 @@ if_input_local(struct ifnet *ifp, struct > > * is now incorrect, will be calculated before sending. > > */ > > keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT | > > - M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT); > > + M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT | > > + M_TCP_TSO); > > m_resethdr(m); > > m->m_flags |= M_LOOP | keepflags; > > m->m_pkthdr.csum_flags = keepcksum; > > m->m_pkthdr.ph_ifidx = ifp->if_index; > > m->m_pkthdr.ph_rtableid = ifp->if_rdomain; > > + > > + if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) { > > + if (ifp->if_mtu > 0 && > > + ((af == AF_INET && > > + ISSET(ifp->if_capabilities, IFCAP_TSOv4)) || > > + (af == AF_INET6 && > > + ISSET(ifp->if_capabilities, IFCAP_TSOv6 { > > + tcpstat_inc(tcps_inswlro); > > + tcpstat_add(tcps_inpktlro, > > + (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu); > > + } else { > > + tcpstat_inc(tcps_inbadlro); > > + m_freem(m); > > + return (EPROTONOSUPPORT); > > + } > > + } > > > > if (ISSET(keepcksum, M_TCP_CSUM_OUT)) > > m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; > > Index: sys/net/if_loop.c > > === > > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v > > retrieving revision 1.94 > > diff -u -p -r1.94 if_loop.c > > --- sys/net/if_loop.c 5 Jun 2023 11:35:46 - 1.94 > > +++ sys/net/if_loop.c 23 Jun 2023 15:48:27 - > > @@ -175,7 +175,8 @@ loop_clone_create(struct if_clone *ifc, > > ifp->if_xflags = IFXF_CLONED; > > ifp->if_capabilities = IFCAP_CSUM_IPv4 | > > IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | > > - IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > > + IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 | > > + IFCAP_LRO; > > ifp->if_rtrequest = lortrequest; > > ifp->if_ioctl = loioctl; > > ifp->if_input = loinput; > > @@ -281,6 +282,10 @@ loioctl(struct ifnet *ifp, u_long cmd, c > > > > switch (cmd) { > > case SIOCSIFFLAGS: > > + if (ISSET(ifp->if_xflags, IFXF_LRO)) > > + SET(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); > > + else > > + CLR(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); > > break; > > > > case SIOCSIFADDR: > > Index: sys/netinet/tcp_usrreq.c > > === > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v > > retrieving revision 1.219 > > diff -u -p -r1.219 tcp_usrreq.c > > --- sys/netinet/tcp_usrreq.c23 May 2023 09:16:16 - 1.219 > > +++ sys/netinet/tcp_usrreq.c23 Jun 2023 15:48:27 - > > @@ -1340,6 +1340,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o > > ASSIGN(tcps_outhwtso); > > ASSIGN(tcps_outpkttso); > > ASSIGN(tcps_outbadtso); > > + ASSIGN(tcps_inswlro); > > ASSIGN(tcps_inhwlro); > > ASSIGN(tcps_inpktlro); > > ASSIGN(tcps_inbadlro); > > Index: sys/netinet/tcp_var.h > > === > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v > > retrieving revision 1.167 > > diff -u -p -r1.167 tcp_var.h > > --- sys/netinet/tcp_var.h 23 May 2023 09:16:16 - 1.167 > > +++ sys/netinet/tcp_var.h 23 Jun 2023 15:48:27 - > > @@ -447,6 +447,7 @@ struct tcpstat { > >
Re: lo(4) loopback LRO and TSO
anyone? On Fri, Jun 23, 2023 at 06:06:16PM +0200, Alexander Bluhm wrote: > Hi, > > Claudio@ mentioned the idea to use TSO and LRO on the loopback > interface to transfer TCP faster. > > I see a performance effect with this diff, but more importantly it > gives us more test coverage. Currently LRO on lo(4) is default > off. > > Future plan is: > - Fix some corner cases for LRO/TSO with TCP path-MTU discovery > and IP forwarding when LRO is enabled. > - Enable LRO/TSO for lo(4) and ix(4) per default. > - Jan@ commits his ixl(4) TSO diff. > > ok for lo(4) LRO/TSO with default off? > > bluhm > > Index: sys/net/if.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v > retrieving revision 1.700 > diff -u -p -r1.700 if.c > --- sys/net/if.c 12 Jun 2023 21:19:54 - 1.700 > +++ sys/net/if.c 23 Jun 2023 15:48:27 - > @@ -106,6 +106,9 @@ > #ifdef MROUTING > #include > #endif > +#include > +#include > +#include > > #ifdef INET6 > #include > @@ -802,12 +805,29 @@ if_input_local(struct ifnet *ifp, struct >* is now incorrect, will be calculated before sending. >*/ > keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT | > - M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT); > + M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT | > + M_TCP_TSO); > m_resethdr(m); > m->m_flags |= M_LOOP | keepflags; > m->m_pkthdr.csum_flags = keepcksum; > m->m_pkthdr.ph_ifidx = ifp->if_index; > m->m_pkthdr.ph_rtableid = ifp->if_rdomain; > + > + if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) { > + if (ifp->if_mtu > 0 && > + ((af == AF_INET && > + ISSET(ifp->if_capabilities, IFCAP_TSOv4)) || > + (af == AF_INET6 && > + ISSET(ifp->if_capabilities, IFCAP_TSOv6 { > + tcpstat_inc(tcps_inswlro); > + tcpstat_add(tcps_inpktlro, > + (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu); > + } else { > + tcpstat_inc(tcps_inbadlro); > + m_freem(m); > + return (EPROTONOSUPPORT); > + } > + } > > if (ISSET(keepcksum, M_TCP_CSUM_OUT)) > m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; > Index: sys/net/if_loop.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v > retrieving revision 1.94 > diff -u -p -r1.94 if_loop.c > --- sys/net/if_loop.c 5 Jun 2023 11:35:46 - 1.94 > +++ sys/net/if_loop.c 23 Jun 2023 15:48:27 - > @@ -175,7 +175,8 @@ loop_clone_create(struct if_clone *ifc, > ifp->if_xflags = IFXF_CLONED; > ifp->if_capabilities = IFCAP_CSUM_IPv4 | > IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | > - IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > + IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 | > + IFCAP_LRO; > ifp->if_rtrequest = lortrequest; > ifp->if_ioctl = loioctl; > ifp->if_input = loinput; > @@ -281,6 +282,10 @@ loioctl(struct ifnet *ifp, u_long cmd, c > > switch (cmd) { > case SIOCSIFFLAGS: > + if (ISSET(ifp->if_xflags, IFXF_LRO)) > + SET(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); > + else > + CLR(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); > break; > > case SIOCSIFADDR: > Index: sys/netinet/tcp_usrreq.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v > retrieving revision 1.219 > diff -u -p -r1.219 tcp_usrreq.c > --- sys/netinet/tcp_usrreq.c 23 May 2023 09:16:16 - 1.219 > +++ sys/netinet/tcp_usrreq.c 23 Jun 2023 15:48:27 - > @@ -1340,6 +1340,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o > ASSIGN(tcps_outhwtso); > ASSIGN(tcps_outpkttso); > ASSIGN(tcps_outbadtso); > + ASSIGN(tcps_inswlro); > ASSIGN(tcps_inhwlro); > ASSIGN(tcps_inpktlro); > ASSIGN(tcps_inbadlro); > Index: sys/netinet/tcp_var.h > === > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v > retrieving revision 1.167 > diff -u -p -r1.167 tcp_var.h > --- sys/netinet/tcp_var.h 23 May 2023 09:16:16 - 1.167 > +++ sys/netinet/tcp_var.h 23 Jun 2023 15:48:27 - > @@ -447,6 +447,7 @@ structtcpstat { > u_int32_t tcps_outhwtso;/* output tso processed by hardware */ > u_int32_t tcps_outpkttso; /* packets generated by tso */ > u_int32_t tcps_outbadtso; /* output tso failed, packet dropped */ > + u_int32_t tcps_inswlro; /* input lro on pseudo device */ > u_int32_t tcps_inhwlro; /* input lro from
lo(4) loopback LRO and TSO
Hi, Claudio@ mentioned the idea to use TSO and LRO on the loopback interface to transfer TCP faster. I see a performance effect with this diff, but more importantly it gives us more test coverage. Currently LRO on lo(4) is default off. Future plan is: - Fix some corner cases for LRO/TSO with TCP path-MTU discovery and IP forwarding when LRO is enabled. - Enable LRO/TSO for lo(4) and ix(4) per default. - Jan@ commits his ixl(4) TSO diff. ok for lo(4) LRO/TSO with default off? bluhm Index: sys/net/if.c === RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v retrieving revision 1.700 diff -u -p -r1.700 if.c --- sys/net/if.c12 Jun 2023 21:19:54 - 1.700 +++ sys/net/if.c23 Jun 2023 15:48:27 - @@ -106,6 +106,9 @@ #ifdef MROUTING #include #endif +#include +#include +#include #ifdef INET6 #include @@ -802,12 +805,29 @@ if_input_local(struct ifnet *ifp, struct * is now incorrect, will be calculated before sending. */ keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT | - M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT); + M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT | + M_TCP_TSO); m_resethdr(m); m->m_flags |= M_LOOP | keepflags; m->m_pkthdr.csum_flags = keepcksum; m->m_pkthdr.ph_ifidx = ifp->if_index; m->m_pkthdr.ph_rtableid = ifp->if_rdomain; + + if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) { + if (ifp->if_mtu > 0 && + ((af == AF_INET && + ISSET(ifp->if_capabilities, IFCAP_TSOv4)) || + (af == AF_INET6 && + ISSET(ifp->if_capabilities, IFCAP_TSOv6 { + tcpstat_inc(tcps_inswlro); + tcpstat_add(tcps_inpktlro, + (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu); + } else { + tcpstat_inc(tcps_inbadlro); + m_freem(m); + return (EPROTONOSUPPORT); + } + } if (ISSET(keepcksum, M_TCP_CSUM_OUT)) m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; Index: sys/net/if_loop.c === RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v retrieving revision 1.94 diff -u -p -r1.94 if_loop.c --- sys/net/if_loop.c 5 Jun 2023 11:35:46 - 1.94 +++ sys/net/if_loop.c 23 Jun 2023 15:48:27 - @@ -175,7 +175,8 @@ loop_clone_create(struct if_clone *ifc, ifp->if_xflags = IFXF_CLONED; ifp->if_capabilities = IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | - IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; + IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 | + IFCAP_LRO; ifp->if_rtrequest = lortrequest; ifp->if_ioctl = loioctl; ifp->if_input = loinput; @@ -281,6 +282,10 @@ loioctl(struct ifnet *ifp, u_long cmd, c switch (cmd) { case SIOCSIFFLAGS: + if (ISSET(ifp->if_xflags, IFXF_LRO)) + SET(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); + else + CLR(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); break; case SIOCSIFADDR: Index: sys/netinet/tcp_usrreq.c === RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.219 diff -u -p -r1.219 tcp_usrreq.c --- sys/netinet/tcp_usrreq.c23 May 2023 09:16:16 - 1.219 +++ sys/netinet/tcp_usrreq.c23 Jun 2023 15:48:27 - @@ -1340,6 +1340,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o ASSIGN(tcps_outhwtso); ASSIGN(tcps_outpkttso); ASSIGN(tcps_outbadtso); + ASSIGN(tcps_inswlro); ASSIGN(tcps_inhwlro); ASSIGN(tcps_inpktlro); ASSIGN(tcps_inbadlro); Index: sys/netinet/tcp_var.h === RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.167 diff -u -p -r1.167 tcp_var.h --- sys/netinet/tcp_var.h 23 May 2023 09:16:16 - 1.167 +++ sys/netinet/tcp_var.h 23 Jun 2023 15:48:27 - @@ -447,6 +447,7 @@ struct tcpstat { u_int32_t tcps_outhwtso;/* output tso processed by hardware */ u_int32_t tcps_outpkttso; /* packets generated by tso */ u_int32_t tcps_outbadtso; /* output tso failed, packet dropped */ + u_int32_t tcps_inswlro; /* input lro on pseudo device */ u_int32_t tcps_inhwlro; /* input lro from hardware */ u_int32_t tcps_inpktlro;/* packets coalesced by hardware lro */ u_int32_t tcps_inbadlro;/* input bad lro packets */ @@ -628,6 +629,7 @@ enum