Re: Unlock ip6_sysctl()

2023-05-17 Thread Vitaliy Makkoveev
> On 18 May 2023, at 01:14, Alexander Bluhm  wrote:
> 
> On Wed, May 17, 2023 at 12:46:02PM +0300, Vitaliy Makkoveev wrote:
>> Introduce `ip6_soiikey_lock' rwlock(9) to protect `ip6_soiikey'. It
>> accessed only by ip6_sysctl_soiikey() and ip6_sysctl() is the only
>> ip6_sysctl_soiikey() caller so context switch is allowed here. Also
>> remove unused `oldkey' from ip6_sysctl_soiikey().
> 
> rwlock or mutex?  As sysctl mlocks userland memory, copyin/copyout
> cannot sleep here.  So it should be safe to use a mutex.

It is locked because context switch is strictly unwanted when you
do copyin/copyout within foreach loops of kernel lock protected
lists like ps_list. Really this lock is the kind of a hack. But
since rwlocks already introduced context switch to sysctl() paths,
we use sysctl_lock rwlock to prevent many processes to
simultaneously lock large amount of memory. This is another hack
in this layer. Guess, both of them will gone in the future, so
copyin/copyout will sleep. So rwlock is better.

> 
> And why is this ip6_soiikey in the kernel anyway?  I guess it is
> from a time when address configuration was done in the kernel.
> Could slaacd(8) just read /etc/soii.key?

It could, but this should be implemented in slaacd first. And this
work is not related to (*pr_sysctl)() unlocking.

> 
>> For IPV6CTL_MRTSTATS, IPV6CTL_MRTMIF and IPV6CTL_MRTMFC cases netlock
>> could be relaxed to share netlock, but with separate diffs.
>> 
>> ok?
>> 
>> Index: sys/netinet6/in6_proto.c
>> ===
>> RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
>> retrieving revision 1.112
>> diff -u -p -r1.112 in6_proto.c
>> --- sys/netinet6/in6_proto.c 23 Nov 2022 14:48:28 -  1.112
>> +++ sys/netinet6/in6_proto.c 17 May 2023 09:37:05 -
>> @@ -128,6 +128,7 @@ const struct protosw inet6sw[] = {
>> {
>>   .pr_domain = ,
>>   .pr_protocol   = IPPROTO_IPV6,
>> +  .pr_flags = PR_MPSYSCTL,
>>   .pr_init   = ip6_init,
>>   .pr_slowtimo   = frag6_slowtimo,
>>   .pr_sysctl = ip6_sysctl
>> Index: sys/netinet6/ip6_input.c
>> ===
>> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
>> retrieving revision 1.254
>> diff -u -p -r1.254 ip6_input.c
>> --- sys/netinet6/ip6_input.c 21 Aug 2022 14:15:55 -  1.254
>> +++ sys/netinet6/ip6_input.c 17 May 2023 09:37:05 -
>> @@ -118,6 +118,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIA
>> struct cpumem *ip6counters;
>> 
>> uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
>> +struct rwlock ip6_soiikey_lock = RWLOCK_INITIALIZER("soiikeylk");
>> 
>> int ip6_ours(struct mbuf **, int *, int, int);
>> int ip6_check_rh0hdr(struct mbuf *, int *);
>> @@ -1477,17 +1478,16 @@ ip6_sysctl_ip6stat(void *oldp, size_t *o
>> int
>> ip6_sysctl_soiikey(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
>> {
>> -uint8_t oldkey[IP6_SOIIKEY_LEN];
>>  int error;
>> 
>>  error = suser(curproc);
>>  if (error != 0)
>>  return (error);
>> 
>> -memcpy(oldkey, ip6_soiikey, sizeof(oldkey));
>> -
>> +rw_enter_write(_soiikey_lock);
>>  error = sysctl_struct(oldp, oldlenp, newp, newlen, ip6_soiikey,
>>  sizeof(ip6_soiikey));
>> +rw_exit_write(_soiikey_lock);
>> 
>>  return (error);
>> }



Re: Unlock ip6_sysctl()

2023-05-17 Thread Alexander Bluhm
On Wed, May 17, 2023 at 12:46:02PM +0300, Vitaliy Makkoveev wrote:
> Introduce `ip6_soiikey_lock' rwlock(9) to protect `ip6_soiikey'. It
> accessed only by ip6_sysctl_soiikey() and ip6_sysctl() is the only
> ip6_sysctl_soiikey() caller so context switch is allowed here. Also
> remove unused `oldkey' from ip6_sysctl_soiikey().

rwlock or mutex?  As sysctl mlocks userland memory, copyin/copyout
cannot sleep here.  So it should be safe to use a mutex.

And why is this ip6_soiikey in the kernel anyway?  I guess it is
from a time when address configuration was done in the kernel.
Could slaacd(8) just read /etc/soii.key?

> For IPV6CTL_MRTSTATS, IPV6CTL_MRTMIF and IPV6CTL_MRTMFC cases netlock
> could be relaxed to share netlock, but with separate diffs.
> 
> ok?
> 
> Index: sys/netinet6/in6_proto.c
> ===
> RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
> retrieving revision 1.112
> diff -u -p -r1.112 in6_proto.c
> --- sys/netinet6/in6_proto.c  23 Nov 2022 14:48:28 -  1.112
> +++ sys/netinet6/in6_proto.c  17 May 2023 09:37:05 -
> @@ -128,6 +128,7 @@ const struct protosw inet6sw[] = {
>  {
>.pr_domain = ,
>.pr_protocol   = IPPROTO_IPV6,
> +  .pr_flags  = PR_MPSYSCTL,
>.pr_init   = ip6_init,
>.pr_slowtimo   = frag6_slowtimo,
>.pr_sysctl = ip6_sysctl
> Index: sys/netinet6/ip6_input.c
> ===
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.254
> diff -u -p -r1.254 ip6_input.c
> --- sys/netinet6/ip6_input.c  21 Aug 2022 14:15:55 -  1.254
> +++ sys/netinet6/ip6_input.c  17 May 2023 09:37:05 -
> @@ -118,6 +118,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIA
>  struct cpumem *ip6counters;
>  
>  uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
> +struct rwlock ip6_soiikey_lock = RWLOCK_INITIALIZER("soiikeylk");
>  
>  int ip6_ours(struct mbuf **, int *, int, int);
>  int ip6_check_rh0hdr(struct mbuf *, int *);
> @@ -1477,17 +1478,16 @@ ip6_sysctl_ip6stat(void *oldp, size_t *o
>  int
>  ip6_sysctl_soiikey(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
>  {
> - uint8_t oldkey[IP6_SOIIKEY_LEN];
>   int error;
>  
>   error = suser(curproc);
>   if (error != 0)
>   return (error);
>  
> - memcpy(oldkey, ip6_soiikey, sizeof(oldkey));
> -
> + rw_enter_write(_soiikey_lock);
>   error = sysctl_struct(oldp, oldlenp, newp, newlen, ip6_soiikey,
>   sizeof(ip6_soiikey));
> + rw_exit_write(_soiikey_lock);
>  
>   return (error);
>  }



Re: Add LRO counter in ix(4)

2023-05-17 Thread Alexander Bluhm
On Tue, May 16, 2023 at 09:11:48PM +0200, Jan Klemkow wrote:
> @@ -412,6 +412,10 @@ tcp_stats(char *name)
>   p(tcps_outhwtso, "\t\t%u output TSO packet%s hardware processed\n");
>   p(tcps_outpkttso, "\t\t%u output TSO packet%s generated\n");
>   p(tcps_outbadtso, "\t\t%u output TSO packet%s dropped\n");
> + p(tcps_inhwlro, "\t\t%u input LRO generated packet%s from hardware\n");
> + p(tcps_inpktlro, "\t\t%u input LRO coalesced packet%s from hardware\n");

... coalesced packet%s by hardware

> + p(tcps_inbadlro, "\t\t%u input bad LRO packet%s from hardware\n");
> +

Move this down to the "packets received" section.  You included it
in "packets sent".

> + /*
> +  * This function iterates over interleaved descriptors.
> +  * Thus, we reuse ph_mss as global segment counter per
> +  * TCP connection, insteat of introducing a new variable

s/insteat/instead/

> +struct tdb;
> +

This will be in tcp_var.h when you commit the other diff.



Re: Add LRO counter in ix(4)

2023-05-17 Thread Hrvoje Popovski
On 16.5.2023. 21:11, Jan Klemkow wrote:
> Hi,
> 
> This diff introduces new counters for LRO packets, we get from the
> network interface.  It shows, how many packets the network interface has
> coalesced into LRO packets.
> 
> In followup diff, this packet counter will also be used to set the
> ph_mss variable to valid value.  So, the stack is able to forward or
> redirect this kind of packets.

Hi,

when LRO is enabled with "ifconfig ix0 tcprecvoffload" LRO counter is
increasing if I'm sending or receiving tcp traffic with iperf.
Is this ok?
TSO counter is increasing only when I'm sending traffic.



ix0 at pci5 dev 0 function 0 "Intel X552 SFP+" rev 0x00, msix, 4 queues

smc4# ifconfig ix0 tcprecvoffload
smc4# ifconfig ix0
ix0: flags=2008843 mtu 1500

smc4# ifconfig ix0 -tcprecvoffload
smc4# ifconfig ix0
ix0: flags=8843 mtu 1500

smc4# netstat -sp tcp | egrep "TSO|LRO"
10843097 output TSO packets software chopped
0 output TSO packets hardware processed
136103193 output TSO packets generated
0 output TSO packets dropped
4940412 input LRO generated packets from hardware
74984192 input LRO coalesced packets from hardware
0 input bad LRO packets from hardware

It is 0 on "TSO packets hardware processed" because I didn't applied
latest bluhm@ "ix hardware tso" diff




Re: ix hardware tso

2023-05-17 Thread Alexander Bluhm
On Tue, May 16, 2023 at 12:35:06PM -0600, Todd C. Miller wrote:
> On Tue, 16 May 2023 19:26:07 +0200, Alexander Bluhm wrote:
> 
> > On Tue, May 16, 2023 at 11:15:31AM -0600, Todd C. Miller wrote:
> > > Would it be possible to move the forward declaration of struct tdb
> > > to netinet/tcp_var.h so it is not required in every driver?
> >
> > sure
> 
> Thanks, that looks better to me.

I have tested ix(4) "Intel X540T" manually, "Intel 82598AF" and
"Intel 82599" are in my automatic setup.  Hvroje also did a lot of
testing.

Diff survived a make release.

I think jan@ should commit this now.  OK bluhm@

Index: dev/pci/if_ix.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.194
diff -u -p -r1.194 if_ix.c
--- dev/pci/if_ix.c 16 May 2023 14:32:54 -  1.194
+++ dev/pci/if_ix.c 17 May 2023 08:53:07 -
@@ -1924,6 +1924,7 @@ ixgbe_setup_interface(struct ix_softc *s
ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
ifp->if_capabilities |= IFCAP_CSUM_IPv4;
 
+   ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
if (sc->hw.mac.type != ixgbe_mac_82598EB)
ifp->if_capabilities |= IFCAP_LRO;
 
@@ -2344,6 +2345,7 @@ ixgbe_initialize_transmit_units(struct i
int  i;
uint64_t tdba;
uint32_t txctrl;
+   uint32_t hlreg;
 
/* Setup the Base and Length of the Tx Descriptor Ring */
 
@@ -2405,6 +2407,11 @@ ixgbe_initialize_transmit_units(struct i
rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
}
+
+   /* Enable TCP/UDP padding when using TSO */
+   hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+   hlreg |= IXGBE_HLREG0_TXPADEN;
+   IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
 }
 
 /*
@@ -2473,16 +2480,18 @@ ixgbe_free_transmit_buffers(struct tx_ri
  **/
 
 static inline int
-ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
-uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
+ixgbe_tx_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
+uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status, uint32_t *cmd_type_len,
+uint32_t *mss_l4len_idx)
 {
struct ether_extracted ext;
int offload = 0;
-   uint32_t iphlen;
+   uint32_t ethlen, iphlen;
 
ether_extract_headers(mp, );
+   ethlen = sizeof(*ext.eh);
 
-   *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   *vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT);
 
if (ext.ip4) {
iphlen = ext.ip4->ip_hl << 2;
@@ -2500,6 +2509,8 @@ ixgbe_csum_offload(struct mbuf *mp, uint
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
 #endif
} else {
+   if (mp->m_pkthdr.csum_flags & M_TCP_TSO)
+   tcpstat_inc(tcps_outbadtso);
return offload;
}
 
@@ -2519,6 +2530,31 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
}
 
+   if (mp->m_pkthdr.csum_flags & M_TCP_TSO) {
+   if (ext.tcp) {
+   uint32_t hdrlen, thlen, paylen, outlen;
+
+   thlen = ext.tcp->th_off << 2;
+
+   outlen = mp->m_pkthdr.ph_mss;
+   *mss_l4len_idx |= outlen << IXGBE_ADVTXD_MSS_SHIFT;
+   *mss_l4len_idx |= thlen << IXGBE_ADVTXD_L4LEN_SHIFT;
+
+   hdrlen = ethlen + iphlen + thlen;
+   paylen = mp->m_pkthdr.len - hdrlen;
+   CLR(*olinfo_status, IXGBE_ADVTXD_PAYLEN_MASK
+   << IXGBE_ADVTXD_PAYLEN_SHIFT);
+   *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
+
+   *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
+   offload = 1;
+
+   tcpstat_add(tcps_outpkttso,
+   (paylen + outlen - 1) / outlen);
+   } else
+   tcpstat_inc(tcps_outbadtso);
+   }
+
return offload;
 }
 
@@ -2529,6 +2565,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
struct ixgbe_adv_tx_context_desc *TXD;
struct ixgbe_tx_buf *tx_buffer;
uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
+   uint32_t mss_l4len_idx = 0;
int ctxd = txr->next_avail_desc;
int offload = 0;
 
@@ -2544,8 +2581,8 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
}
 #endif
 
-   offload |= ixgbe_csum_offload(mp, _macip_lens, _tucmd_mlhl,
-   olinfo_status);
+   offload |= ixgbe_tx_offload(mp, _macip_lens, _tucmd_mlhl,
+   olinfo_status, cmd_type_len, _l4len_idx);
 
if (!offload)
return (0);
@@ -2559,7 

Relax netlock to shared netlock and push it down to mrt_sysctl_vif()

2023-05-17 Thread Vitaliy Makkoveev
Also read-only access to netlock protected data.

Index: sys/netinet/ip_input.c
===
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.384
diff -u -p -r1.384 ip_input.c
--- sys/netinet/ip_input.c  16 May 2023 19:36:00 -  1.384
+++ sys/netinet/ip_input.c  17 May 2023 10:06:49 -
@@ -1719,10 +1719,7 @@ ip_sysctl(int *name, u_int namelen, void
case IPCTL_MRTVIF:
if (newp)
return (EPERM);
-   NET_LOCK();
-   error = mrt_sysctl_vif(oldp, oldlenp);
-   NET_UNLOCK();
-   return (error);
+   return (mrt_sysctl_vif(oldp, oldlenp));
 #else
case IPCTL_MRTPROTO:
case IPCTL_MRTSTATS:
Index: sys/netinet/ip_mroute.c
===
RCS file: /cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.138
diff -u -p -r1.138 ip_mroute.c
--- sys/netinet/ip_mroute.c 19 Apr 2023 20:03:51 -  1.138
+++ sys/netinet/ip_mroute.c 17 May 2023 10:06:49 -
@@ -356,6 +356,8 @@ mrt_sysctl_vif(void *oldp, size_t *oldle
given = *oldlenp;
needed = 0;
memset(, 0, sizeof vinfo);
+
+   NET_LOCK_SHARED();
TAILQ_FOREACH(ifp, , if_list) {
if ((vifp = (struct vif *)ifp->if_mcast) == NULL)
continue;
@@ -375,11 +377,15 @@ mrt_sysctl_vif(void *oldp, size_t *oldle
int error;
 
error = copyout(, where, sizeof(vinfo));
-   if (error)
+   if (error) {
+   NET_UNLOCK_SHARED();
return (error);
+   }
where += sizeof(vinfo);
}
}
+   NET_UNLOCK_SHARED();
+
if (where) {
*oldlenp = needed;
if (given < needed)



Relax netlock to shared netlock and push it down to mrt_sysctl_mfc()

2023-05-17 Thread Vitaliy Makkoveev
mrt_rtwalk_mfcsysctl() performs read-only access to protected data, so
rtable_walk() could be called with shared netlock.

Index: sys/netinet/ip_input.c
===
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.384
diff -u -p -r1.384 ip_input.c
--- sys/netinet/ip_input.c  16 May 2023 19:36:00 -  1.384
+++ sys/netinet/ip_input.c  17 May 2023 09:59:16 -
@@ -1712,10 +1712,7 @@ ip_sysctl(int *name, u_int namelen, void
case IPCTL_MRTMFC:
if (newp)
return (EPERM);
-   NET_LOCK();
-   error = mrt_sysctl_mfc(oldp, oldlenp);
-   NET_UNLOCK();
-   return (error);
+   return (mrt_sysctl_mfc(oldp, oldlenp));
case IPCTL_MRTVIF:
if (newp)
return (EPERM);
Index: sys/netinet/ip_mroute.c
===
RCS file: /cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.138
diff -u -p -r1.138 ip_mroute.c
--- sys/netinet/ip_mroute.c 19 Apr 2023 20:03:51 -  1.138
+++ sys/netinet/ip_mroute.c 17 May 2023 09:59:16 -
@@ -479,10 +479,12 @@ mrt_sysctl_mfc(void *oldp, size_t *oldle
msa.msa_len = *oldlenp;
msa.msa_needed = 0;
 
+   NET_LOCK_SHARED();
for (rtableid = 0; rtableid <= RT_TABLEID_MAX; rtableid++) {
rtable_walk(rtableid, AF_INET, NULL, mrt_rtwalk_mfcsysctl,
);
}
+   NET_UNLOCK_SHARED();
 
if (msa.msa_minfos != NULL && msa.msa_needed > 0 &&
(error = copyout(msa.msa_minfos, oldp, msa.msa_needed)) != 0) {



Unlock ip6_sysctl()

2023-05-17 Thread Vitaliy Makkoveev
Introduce `ip6_soiikey_lock' rwlock(9) to protect `ip6_soiikey'. It
accessed only by ip6_sysctl_soiikey() and ip6_sysctl() is the only
ip6_sysctl_soiikey() caller so context switch is allowed here. Also
remove unused `oldkey' from ip6_sysctl_soiikey().

For IPV6CTL_MRTSTATS, IPV6CTL_MRTMIF and IPV6CTL_MRTMFC cases netlock
could be relaxed to share netlock, but with separate diffs.

ok?

Index: sys/netinet6/in6_proto.c
===
RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.112
diff -u -p -r1.112 in6_proto.c
--- sys/netinet6/in6_proto.c23 Nov 2022 14:48:28 -  1.112
+++ sys/netinet6/in6_proto.c17 May 2023 09:37:05 -
@@ -128,6 +128,7 @@ const struct protosw inet6sw[] = {
 {
   .pr_domain   = ,
   .pr_protocol = IPPROTO_IPV6,
+  .pr_flags= PR_MPSYSCTL,
   .pr_init = ip6_init,
   .pr_slowtimo = frag6_slowtimo,
   .pr_sysctl   = ip6_sysctl
Index: sys/netinet6/ip6_input.c
===
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.254
diff -u -p -r1.254 ip6_input.c
--- sys/netinet6/ip6_input.c21 Aug 2022 14:15:55 -  1.254
+++ sys/netinet6/ip6_input.c17 May 2023 09:37:05 -
@@ -118,6 +118,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIA
 struct cpumem *ip6counters;
 
 uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
+struct rwlock ip6_soiikey_lock = RWLOCK_INITIALIZER("soiikeylk");
 
 int ip6_ours(struct mbuf **, int *, int, int);
 int ip6_check_rh0hdr(struct mbuf *, int *);
@@ -1477,17 +1478,16 @@ ip6_sysctl_ip6stat(void *oldp, size_t *o
 int
 ip6_sysctl_soiikey(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
-   uint8_t oldkey[IP6_SOIIKEY_LEN];
int error;
 
error = suser(curproc);
if (error != 0)
return (error);
 
-   memcpy(oldkey, ip6_soiikey, sizeof(oldkey));
-
+   rw_enter_write(_soiikey_lock);
error = sysctl_struct(oldp, oldlenp, newp, newlen, ip6_soiikey,
sizeof(ip6_soiikey));
+   rw_exit_write(_soiikey_lock);
 
return (error);
 }