Re: Unlock ip6_sysctl()
> On 18 May 2023, at 01:14, Alexander Bluhm wrote: > > On Wed, May 17, 2023 at 12:46:02PM +0300, Vitaliy Makkoveev wrote: >> Introduce `ip6_soiikey_lock' rwlock(9) to protect `ip6_soiikey'. It >> accessed only by ip6_sysctl_soiikey() and ip6_sysctl() is the only >> ip6_sysctl_soiikey() caller so context switch is allowed here. Also >> remove unused `oldkey' from ip6_sysctl_soiikey(). > > rwlock or mutex? As sysctl mlocks userland memory, copyin/copyout > cannot sleep here. So it should be safe to use a mutex. It is locked because context switch is strictly unwanted when you do copyin/copyout within foreach loops of kernel lock protected lists like ps_list. Really this lock is the kind of a hack. But since rwlocks already introduced context switch to sysctl() paths, we use sysctl_lock rwlock to prevent many processes to simultaneously lock large amount of memory. This is another hack in this layer. Guess, both of them will gone in the future, so copyin/copyout will sleep. So rwlock is better. > > And why is this ip6_soiikey in the kernel anyway? I guess it is > from a time when address configuration was done in the kernel. > Could slaacd(8) just read /etc/soii.key? It could, but this should be implemented in slaacd first. And this work is not related to (*pr_sysctl)() unlocking. > >> For IPV6CTL_MRTSTATS, IPV6CTL_MRTMIF and IPV6CTL_MRTMFC cases netlock >> could be relaxed to share netlock, but with separate diffs. >> >> ok? >> >> Index: sys/netinet6/in6_proto.c >> === >> RCS file: /cvs/src/sys/netinet6/in6_proto.c,v >> retrieving revision 1.112 >> diff -u -p -r1.112 in6_proto.c >> --- sys/netinet6/in6_proto.c 23 Nov 2022 14:48:28 - 1.112 >> +++ sys/netinet6/in6_proto.c 17 May 2023 09:37:05 - >> @@ -128,6 +128,7 @@ const struct protosw inet6sw[] = { >> { >> .pr_domain = , >> .pr_protocol = IPPROTO_IPV6, >> + .pr_flags = PR_MPSYSCTL, >> .pr_init = ip6_init, >> .pr_slowtimo = frag6_slowtimo, >> .pr_sysctl = ip6_sysctl >> Index: sys/netinet6/ip6_input.c >> === >> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v >> retrieving revision 1.254 >> diff -u -p -r1.254 ip6_input.c >> --- sys/netinet6/ip6_input.c 21 Aug 2022 14:15:55 - 1.254 >> +++ sys/netinet6/ip6_input.c 17 May 2023 09:37:05 - >> @@ -118,6 +118,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIA >> struct cpumem *ip6counters; >> >> uint8_t ip6_soiikey[IP6_SOIIKEY_LEN]; >> +struct rwlock ip6_soiikey_lock = RWLOCK_INITIALIZER("soiikeylk"); >> >> int ip6_ours(struct mbuf **, int *, int, int); >> int ip6_check_rh0hdr(struct mbuf *, int *); >> @@ -1477,17 +1478,16 @@ ip6_sysctl_ip6stat(void *oldp, size_t *o >> int >> ip6_sysctl_soiikey(void *oldp, size_t *oldlenp, void *newp, size_t newlen) >> { >> -uint8_t oldkey[IP6_SOIIKEY_LEN]; >> int error; >> >> error = suser(curproc); >> if (error != 0) >> return (error); >> >> -memcpy(oldkey, ip6_soiikey, sizeof(oldkey)); >> - >> +rw_enter_write(_soiikey_lock); >> error = sysctl_struct(oldp, oldlenp, newp, newlen, ip6_soiikey, >> sizeof(ip6_soiikey)); >> +rw_exit_write(_soiikey_lock); >> >> return (error); >> }
Re: Unlock ip6_sysctl()
On Wed, May 17, 2023 at 12:46:02PM +0300, Vitaliy Makkoveev wrote: > Introduce `ip6_soiikey_lock' rwlock(9) to protect `ip6_soiikey'. It > accessed only by ip6_sysctl_soiikey() and ip6_sysctl() is the only > ip6_sysctl_soiikey() caller so context switch is allowed here. Also > remove unused `oldkey' from ip6_sysctl_soiikey(). rwlock or mutex? As sysctl mlocks userland memory, copyin/copyout cannot sleep here. So it should be safe to use a mutex. And why is this ip6_soiikey in the kernel anyway? I guess it is from a time when address configuration was done in the kernel. Could slaacd(8) just read /etc/soii.key? > For IPV6CTL_MRTSTATS, IPV6CTL_MRTMIF and IPV6CTL_MRTMFC cases netlock > could be relaxed to share netlock, but with separate diffs. > > ok? > > Index: sys/netinet6/in6_proto.c > === > RCS file: /cvs/src/sys/netinet6/in6_proto.c,v > retrieving revision 1.112 > diff -u -p -r1.112 in6_proto.c > --- sys/netinet6/in6_proto.c 23 Nov 2022 14:48:28 - 1.112 > +++ sys/netinet6/in6_proto.c 17 May 2023 09:37:05 - > @@ -128,6 +128,7 @@ const struct protosw inet6sw[] = { > { >.pr_domain = , >.pr_protocol = IPPROTO_IPV6, > + .pr_flags = PR_MPSYSCTL, >.pr_init = ip6_init, >.pr_slowtimo = frag6_slowtimo, >.pr_sysctl = ip6_sysctl > Index: sys/netinet6/ip6_input.c > === > RCS file: /cvs/src/sys/netinet6/ip6_input.c,v > retrieving revision 1.254 > diff -u -p -r1.254 ip6_input.c > --- sys/netinet6/ip6_input.c 21 Aug 2022 14:15:55 - 1.254 > +++ sys/netinet6/ip6_input.c 17 May 2023 09:37:05 - > @@ -118,6 +118,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIA > struct cpumem *ip6counters; > > uint8_t ip6_soiikey[IP6_SOIIKEY_LEN]; > +struct rwlock ip6_soiikey_lock = RWLOCK_INITIALIZER("soiikeylk"); > > int ip6_ours(struct mbuf **, int *, int, int); > int ip6_check_rh0hdr(struct mbuf *, int *); > @@ -1477,17 +1478,16 @@ ip6_sysctl_ip6stat(void *oldp, size_t *o > int > ip6_sysctl_soiikey(void *oldp, size_t *oldlenp, void *newp, size_t newlen) > { > - uint8_t oldkey[IP6_SOIIKEY_LEN]; > int error; > > error = suser(curproc); > if (error != 0) > return (error); > > - memcpy(oldkey, ip6_soiikey, sizeof(oldkey)); > - > + rw_enter_write(_soiikey_lock); > error = sysctl_struct(oldp, oldlenp, newp, newlen, ip6_soiikey, > sizeof(ip6_soiikey)); > + rw_exit_write(_soiikey_lock); > > return (error); > }
Re: Add LRO counter in ix(4)
On Tue, May 16, 2023 at 09:11:48PM +0200, Jan Klemkow wrote: > @@ -412,6 +412,10 @@ tcp_stats(char *name) > p(tcps_outhwtso, "\t\t%u output TSO packet%s hardware processed\n"); > p(tcps_outpkttso, "\t\t%u output TSO packet%s generated\n"); > p(tcps_outbadtso, "\t\t%u output TSO packet%s dropped\n"); > + p(tcps_inhwlro, "\t\t%u input LRO generated packet%s from hardware\n"); > + p(tcps_inpktlro, "\t\t%u input LRO coalesced packet%s from hardware\n"); ... coalesced packet%s by hardware > + p(tcps_inbadlro, "\t\t%u input bad LRO packet%s from hardware\n"); > + Move this down to the "packets received" section. You included it in "packets sent". > + /* > + * This function iterates over interleaved descriptors. > + * Thus, we reuse ph_mss as global segment counter per > + * TCP connection, insteat of introducing a new variable s/insteat/instead/ > +struct tdb; > + This will be in tcp_var.h when you commit the other diff.
Re: Add LRO counter in ix(4)
On 16.5.2023. 21:11, Jan Klemkow wrote: > Hi, > > This diff introduces new counters for LRO packets, we get from the > network interface. It shows, how many packets the network interface has > coalesced into LRO packets. > > In followup diff, this packet counter will also be used to set the > ph_mss variable to valid value. So, the stack is able to forward or > redirect this kind of packets. Hi, when LRO is enabled with "ifconfig ix0 tcprecvoffload" LRO counter is increasing if I'm sending or receiving tcp traffic with iperf. Is this ok? TSO counter is increasing only when I'm sending traffic. ix0 at pci5 dev 0 function 0 "Intel X552 SFP+" rev 0x00, msix, 4 queues smc4# ifconfig ix0 tcprecvoffload smc4# ifconfig ix0 ix0: flags=2008843 mtu 1500 smc4# ifconfig ix0 -tcprecvoffload smc4# ifconfig ix0 ix0: flags=8843 mtu 1500 smc4# netstat -sp tcp | egrep "TSO|LRO" 10843097 output TSO packets software chopped 0 output TSO packets hardware processed 136103193 output TSO packets generated 0 output TSO packets dropped 4940412 input LRO generated packets from hardware 74984192 input LRO coalesced packets from hardware 0 input bad LRO packets from hardware It is 0 on "TSO packets hardware processed" because I didn't applied latest bluhm@ "ix hardware tso" diff
Re: ix hardware tso
On Tue, May 16, 2023 at 12:35:06PM -0600, Todd C. Miller wrote: > On Tue, 16 May 2023 19:26:07 +0200, Alexander Bluhm wrote: > > > On Tue, May 16, 2023 at 11:15:31AM -0600, Todd C. Miller wrote: > > > Would it be possible to move the forward declaration of struct tdb > > > to netinet/tcp_var.h so it is not required in every driver? > > > > sure > > Thanks, that looks better to me. I have tested ix(4) "Intel X540T" manually, "Intel 82598AF" and "Intel 82599" are in my automatic setup. Hvroje also did a lot of testing. Diff survived a make release. I think jan@ should commit this now. OK bluhm@ Index: dev/pci/if_ix.c === RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.194 diff -u -p -r1.194 if_ix.c --- dev/pci/if_ix.c 16 May 2023 14:32:54 - 1.194 +++ dev/pci/if_ix.c 17 May 2023 08:53:07 - @@ -1924,6 +1924,7 @@ ixgbe_setup_interface(struct ix_softc *s ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; ifp->if_capabilities |= IFCAP_CSUM_IPv4; + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; if (sc->hw.mac.type != ixgbe_mac_82598EB) ifp->if_capabilities |= IFCAP_LRO; @@ -2344,6 +2345,7 @@ ixgbe_initialize_transmit_units(struct i int i; uint64_t tdba; uint32_t txctrl; + uint32_t hlreg; /* Setup the Base and Length of the Tx Descriptor Ring */ @@ -2405,6 +2407,11 @@ ixgbe_initialize_transmit_units(struct i rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } + + /* Enable TCP/UDP padding when using TSO */ + hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + hlreg |= IXGBE_HLREG0_TXPADEN; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); } /* @@ -2473,16 +2480,18 @@ ixgbe_free_transmit_buffers(struct tx_ri **/ static inline int -ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, -uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) +ixgbe_tx_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, +uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status, uint32_t *cmd_type_len, +uint32_t *mss_l4len_idx) { struct ether_extracted ext; int offload = 0; - uint32_t iphlen; + uint32_t ethlen, iphlen; ether_extract_headers(mp, ); + ethlen = sizeof(*ext.eh); - *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + *vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT); if (ext.ip4) { iphlen = ext.ip4->ip_hl << 2; @@ -2500,6 +2509,8 @@ ixgbe_csum_offload(struct mbuf *mp, uint *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; #endif } else { + if (mp->m_pkthdr.csum_flags & M_TCP_TSO) + tcpstat_inc(tcps_outbadtso); return offload; } @@ -2519,6 +2530,31 @@ ixgbe_csum_offload(struct mbuf *mp, uint } } + if (mp->m_pkthdr.csum_flags & M_TCP_TSO) { + if (ext.tcp) { + uint32_t hdrlen, thlen, paylen, outlen; + + thlen = ext.tcp->th_off << 2; + + outlen = mp->m_pkthdr.ph_mss; + *mss_l4len_idx |= outlen << IXGBE_ADVTXD_MSS_SHIFT; + *mss_l4len_idx |= thlen << IXGBE_ADVTXD_L4LEN_SHIFT; + + hdrlen = ethlen + iphlen + thlen; + paylen = mp->m_pkthdr.len - hdrlen; + CLR(*olinfo_status, IXGBE_ADVTXD_PAYLEN_MASK + << IXGBE_ADVTXD_PAYLEN_SHIFT); + *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; + + *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; + offload = 1; + + tcpstat_add(tcps_outpkttso, + (paylen + outlen - 1) / outlen); + } else + tcpstat_inc(tcps_outbadtso); + } + return offload; } @@ -2529,6 +2565,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct ixgbe_adv_tx_context_desc *TXD; struct ixgbe_tx_buf *tx_buffer; uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0; + uint32_t mss_l4len_idx = 0; int ctxd = txr->next_avail_desc; int offload = 0; @@ -2544,8 +2581,8 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, } #endif - offload |= ixgbe_csum_offload(mp, _macip_lens, _tucmd_mlhl, - olinfo_status); + offload |= ixgbe_tx_offload(mp, _macip_lens, _tucmd_mlhl, + olinfo_status, cmd_type_len, _l4len_idx); if (!offload) return (0); @@ -2559,7
Relax netlock to shared netlock and push it down to mrt_sysctl_vif()
Also read-only access to netlock protected data. Index: sys/netinet/ip_input.c === RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.384 diff -u -p -r1.384 ip_input.c --- sys/netinet/ip_input.c 16 May 2023 19:36:00 - 1.384 +++ sys/netinet/ip_input.c 17 May 2023 10:06:49 - @@ -1719,10 +1719,7 @@ ip_sysctl(int *name, u_int namelen, void case IPCTL_MRTVIF: if (newp) return (EPERM); - NET_LOCK(); - error = mrt_sysctl_vif(oldp, oldlenp); - NET_UNLOCK(); - return (error); + return (mrt_sysctl_vif(oldp, oldlenp)); #else case IPCTL_MRTPROTO: case IPCTL_MRTSTATS: Index: sys/netinet/ip_mroute.c === RCS file: /cvs/src/sys/netinet/ip_mroute.c,v retrieving revision 1.138 diff -u -p -r1.138 ip_mroute.c --- sys/netinet/ip_mroute.c 19 Apr 2023 20:03:51 - 1.138 +++ sys/netinet/ip_mroute.c 17 May 2023 10:06:49 - @@ -356,6 +356,8 @@ mrt_sysctl_vif(void *oldp, size_t *oldle given = *oldlenp; needed = 0; memset(, 0, sizeof vinfo); + + NET_LOCK_SHARED(); TAILQ_FOREACH(ifp, , if_list) { if ((vifp = (struct vif *)ifp->if_mcast) == NULL) continue; @@ -375,11 +377,15 @@ mrt_sysctl_vif(void *oldp, size_t *oldle int error; error = copyout(, where, sizeof(vinfo)); - if (error) + if (error) { + NET_UNLOCK_SHARED(); return (error); + } where += sizeof(vinfo); } } + NET_UNLOCK_SHARED(); + if (where) { *oldlenp = needed; if (given < needed)
Relax netlock to shared netlock and push it down to mrt_sysctl_mfc()
mrt_rtwalk_mfcsysctl() performs read-only access to protected data, so rtable_walk() could be called with shared netlock. Index: sys/netinet/ip_input.c === RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.384 diff -u -p -r1.384 ip_input.c --- sys/netinet/ip_input.c 16 May 2023 19:36:00 - 1.384 +++ sys/netinet/ip_input.c 17 May 2023 09:59:16 - @@ -1712,10 +1712,7 @@ ip_sysctl(int *name, u_int namelen, void case IPCTL_MRTMFC: if (newp) return (EPERM); - NET_LOCK(); - error = mrt_sysctl_mfc(oldp, oldlenp); - NET_UNLOCK(); - return (error); + return (mrt_sysctl_mfc(oldp, oldlenp)); case IPCTL_MRTVIF: if (newp) return (EPERM); Index: sys/netinet/ip_mroute.c === RCS file: /cvs/src/sys/netinet/ip_mroute.c,v retrieving revision 1.138 diff -u -p -r1.138 ip_mroute.c --- sys/netinet/ip_mroute.c 19 Apr 2023 20:03:51 - 1.138 +++ sys/netinet/ip_mroute.c 17 May 2023 09:59:16 - @@ -479,10 +479,12 @@ mrt_sysctl_mfc(void *oldp, size_t *oldle msa.msa_len = *oldlenp; msa.msa_needed = 0; + NET_LOCK_SHARED(); for (rtableid = 0; rtableid <= RT_TABLEID_MAX; rtableid++) { rtable_walk(rtableid, AF_INET, NULL, mrt_rtwalk_mfcsysctl, ); } + NET_UNLOCK_SHARED(); if (msa.msa_minfos != NULL && msa.msa_needed > 0 && (error = copyout(msa.msa_minfos, oldp, msa.msa_needed)) != 0) {
Unlock ip6_sysctl()
Introduce `ip6_soiikey_lock' rwlock(9) to protect `ip6_soiikey'. It accessed only by ip6_sysctl_soiikey() and ip6_sysctl() is the only ip6_sysctl_soiikey() caller so context switch is allowed here. Also remove unused `oldkey' from ip6_sysctl_soiikey(). For IPV6CTL_MRTSTATS, IPV6CTL_MRTMIF and IPV6CTL_MRTMFC cases netlock could be relaxed to share netlock, but with separate diffs. ok? Index: sys/netinet6/in6_proto.c === RCS file: /cvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.112 diff -u -p -r1.112 in6_proto.c --- sys/netinet6/in6_proto.c23 Nov 2022 14:48:28 - 1.112 +++ sys/netinet6/in6_proto.c17 May 2023 09:37:05 - @@ -128,6 +128,7 @@ const struct protosw inet6sw[] = { { .pr_domain = , .pr_protocol = IPPROTO_IPV6, + .pr_flags= PR_MPSYSCTL, .pr_init = ip6_init, .pr_slowtimo = frag6_slowtimo, .pr_sysctl = ip6_sysctl Index: sys/netinet6/ip6_input.c === RCS file: /cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.254 diff -u -p -r1.254 ip6_input.c --- sys/netinet6/ip6_input.c21 Aug 2022 14:15:55 - 1.254 +++ sys/netinet6/ip6_input.c17 May 2023 09:37:05 - @@ -118,6 +118,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIA struct cpumem *ip6counters; uint8_t ip6_soiikey[IP6_SOIIKEY_LEN]; +struct rwlock ip6_soiikey_lock = RWLOCK_INITIALIZER("soiikeylk"); int ip6_ours(struct mbuf **, int *, int, int); int ip6_check_rh0hdr(struct mbuf *, int *); @@ -1477,17 +1478,16 @@ ip6_sysctl_ip6stat(void *oldp, size_t *o int ip6_sysctl_soiikey(void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - uint8_t oldkey[IP6_SOIIKEY_LEN]; int error; error = suser(curproc); if (error != 0) return (error); - memcpy(oldkey, ip6_soiikey, sizeof(oldkey)); - + rw_enter_write(_soiikey_lock); error = sysctl_struct(oldp, oldlenp, newp, newlen, ip6_soiikey, sizeof(ip6_soiikey)); + rw_exit_write(_soiikey_lock); return (error); }