ipv4 header checksum

2023-05-12 Thread Alexander Bluhm
Hi,

Instead of implementing IPv4 header checksum everywhere differently,
introduce in_hdr_cksum_out().  It is used like in_proto_cksum_out().

ok?

bluhm

Index: net/if_bridge.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.366
diff -u -p -r1.366 if_bridge.c
--- net/if_bridge.c 7 May 2023 16:23:23 -   1.366
+++ net/if_bridge.c 12 May 2023 20:43:06 -
@@ -1735,15 +1735,8 @@ bridge_ip(struct ifnet *brifp, int dir, 
return (NULL);
if (m->m_len < sizeof(struct ip))
goto dropit;
+   in_hdr_cksum_out(m, ifp);
in_proto_cksum_out(m, ifp);
-   ip = mtod(m, struct ip *);
-   ip->ip_sum = 0;
-   if (0 && (ifp->if_capabilities & IFCAP_CSUM_IPv4))
-   m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
-   else {
-   ipstat_inc(ips_outswcsum);
-   ip->ip_sum = in_cksum(m, hlen);
-   }
 
 #if NPF > 0
if (dir == BRIDGE_IN &&
@@ -1993,8 +1986,7 @@ bridge_send_icmp_err(struct ifnet *ifp,
ip->ip_off &= htons(IP_DF);
ip->ip_id = htons(ip_randomid());
ip->ip_ttl = MAXTTL;
-   ip->ip_sum = 0;
-   ip->ip_sum = in_cksum(m, hlen);
+   in_hdr_cksum_out(m, NULL);
 
/* Swap ethernet addresses */
bcopy(>ether_dhost, _tmp, sizeof(ether_tmp));
Index: net/if_gre.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_gre.c,v
retrieving revision 1.173
diff -u -p -r1.173 if_gre.c
--- net/if_gre.c13 Apr 2023 02:19:05 -  1.173
+++ net/if_gre.c12 May 2023 20:43:06 -
@@ -3028,8 +3028,7 @@ gre_keepalive_send(void *arg)
 
ip = mtod(m, struct ip *);
ip->ip_id = htons(ip_randomid());
-   ip->ip_sum = 0;
-   ip->ip_sum = in_cksum(m, sizeof(*ip));
+   in_hdr_cksum_out(m, NULL);
 
proto = htons(ETHERTYPE_IP);
break;
Index: net/pf.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1178
diff -u -p -r1.1178 pf.c
--- net/pf.c10 May 2023 12:07:16 -  1.1178
+++ net/pf.c12 May 2023 20:43:06 -
@@ -2868,7 +2868,7 @@ pf_change_icmp_af(struct mbuf *m, int ip
ip4->ip_p = pd2->proto;
ip4->ip_src = src->v4;
ip4->ip_dst = dst->v4;
-   ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
+   in_hdr_cksum_out(n, NULL);
break;
case AF_INET6:
ip6 = mtod(n, struct ip6_hdr *);
@@ -6549,13 +6549,7 @@ pf_route(struct pf_pdesc *pd, struct pf_
}
 
if (ntohs(ip->ip_len) <= ifp->if_mtu) {
-   ip->ip_sum = 0;
-   if (ifp->if_capabilities & IFCAP_CSUM_IPv4)
-   m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
-   else {
-   ipstat_inc(ips_outswcsum);
-   ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
-   }
+   in_hdr_cksum_out(m0, ifp);
in_proto_cksum_out(m0, ifp);
ifp->if_output(ifp, m0, sintosa(dst), rt);
goto done;
Index: netinet/in.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in.h,v
retrieving revision 1.143
diff -u -p -r1.143 in.h
--- netinet/in.h10 May 2023 12:07:16 -  1.143
+++ netinet/in.h12 May 2023 20:43:06 -
@@ -779,6 +779,7 @@ intin_broadcast(struct in_addr, u_in
 int   in_canforward(struct in_addr);
 int   in_cksum(struct mbuf *, int);
 int   in4_cksum(struct mbuf *, u_int8_t, int, int);
+void  in_hdr_cksum_out(struct mbuf *, struct ifnet *);
 void  in_proto_cksum_out(struct mbuf *, struct ifnet *);
 int   in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
 void  in_ifdetach(struct ifnet *);
Index: netinet/ip_divert.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.90
diff -u -p -r1.90 ip_divert.c
--- netinet/ip_divert.c 4 Apr 2023 10:12:03 -   1.90
+++ netinet/ip_divert.c 12 May 2023 20:43:06 -
@@ -157,8 +157,7 @@ divert_output(struct inpcb *inp, struct 
 * since the userspace application may have modified the packet
 * prior to reinjection.
 */
-   ip->ip_sum = 0;
-   ip->ip_sum = in_cksum(m, off);
+   in_hdr_cksum_out(m, NULL);
in_proto_cksum_out(m, NULL);
 
ifp = if_get(m->m_pkthdr.ph_ifidx);
@@ -190,8 +189,6 @@ 

ix hardware tso

2023-05-12 Thread Alexander Bluhm
Hi,

I merged jan@'s ix(4) TSO diff with my software TSO diff.  It seems
to work.

# netstat -s -p tcp | grep TSO
1 output TSO packet software chopped
1824861 output TSO packets hardware processed
31668597 output TSO packets generated
83 output TSO packets dropped

Do not set ifconfig ix tso, this flag does not work correctly.
Hardware TSO is enabled if the hardware interface supports it.  To
disable TSO globally, use sysctl net.inet.tcp.tso=0.

I have tested it with this interface types.
ix0 at pci3 dev 0 function 0 "Intel 82598AF" rev 0x01, msix, 4 queues, address 
00:1b:21:0d:db:8f
ix1 at pci4 dev 0 function 0 "Intel 82599" rev 0x01, msix, 4 queues, address 
00:1b:21:c5:19:50
ix5 at pci13 dev 0 function 0 "Intel 82599" rev 0x01, msix, 4 queues, address 
90:e2:ba:d6:28:ac

Feel free to try other ix interfaces.  pf route-to should work, but
I have not tested it.

I have not yet investigated where the dropped counter 83 comes from.
If you see that also, please report what you did.

bluhm

Index: dev/pci/if_ix.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.193
diff -u -p -r1.193 if_ix.c
--- dev/pci/if_ix.c 28 Apr 2023 10:18:57 -  1.193
+++ dev/pci/if_ix.c 12 May 2023 22:43:04 -
@@ -1924,8 +1924,9 @@ ixgbe_setup_interface(struct ix_softc *s
ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
ifp->if_capabilities |= IFCAP_CSUM_IPv4;
 
+   ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
if (sc->hw.mac.type != ixgbe_mac_82598EB)
-   ifp->if_capabilities |= IFCAP_TSO;
+   ifp->if_capabilities |= IFCAP_LRO;
 
/*
 * Specify the media types supported by this sc and register
@@ -2344,6 +2345,7 @@ ixgbe_initialize_transmit_units(struct i
int  i;
uint64_t tdba;
uint32_t txctrl;
+   uint32_t hlreg;
 
/* Setup the Base and Length of the Tx Descriptor Ring */
 
@@ -2405,6 +2407,11 @@ ixgbe_initialize_transmit_units(struct i
rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
}
+
+   /* Enable TCP/UDP padding when using TSO */
+   hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+   hlreg |= IXGBE_HLREG0_TXPADEN;
+   IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
 }
 
 /*
@@ -2473,16 +2480,18 @@ ixgbe_free_transmit_buffers(struct tx_ri
  **/
 
 static inline int
-ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
-uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
+ixgbe_tx_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
+uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status, uint32_t *cmd_type_len,
+uint32_t *mss_l4len_idx)
 {
struct ether_extracted ext;
int offload = 0;
-   uint32_t iphlen;
+   uint32_t ethlen, iphlen;
 
ether_extract_headers(mp, );
+   ethlen = sizeof(*ext.eh);
 
-   *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   *vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT);
 
if (ext.ip4) {
iphlen = ext.ip4->ip_hl << 2;
@@ -2500,6 +2509,8 @@ ixgbe_csum_offload(struct mbuf *mp, uint
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
 #endif
} else {
+   if (mp->m_pkthdr.csum_flags & M_TCP_TSO)
+   tcpstat_inc(tcps_outbadtso);
return offload;
}
 
@@ -2519,6 +2530,32 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
}
 
+   if (mp->m_pkthdr.csum_flags & M_TCP_TSO) {
+   if (ext.tcp) {
+   uint32_t pktlen, hdrlen, thlen, outlen;
+
+   thlen = ext.tcp->th_off << 2;
+
+   *mss_l4len_idx |= (uint32_t)(mp->m_pkthdr.ph_mss
+   << IXGBE_ADVTXD_MSS_SHIFT);
+   *mss_l4len_idx |= thlen << IXGBE_ADVTXD_L4LEN_SHIFT;
+
+   hdrlen = ethlen + iphlen + thlen;
+   pktlen = mp->m_pkthdr.len - hdrlen;
+   CLR(*olinfo_status, IXGBE_ADVTXD_PAYLEN_MASK
+   << IXGBE_ADVTXD_PAYLEN_SHIFT);
+   *olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT;
+
+   *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
+   offload = 1;
+
+   outlen = hdrlen + mp->m_pkthdr.ph_mss;
+   tcpstat_add(tcps_outpkttso,
+   (pktlen + outlen - 1) / outlen);
+   } else
+   tcpstat_inc(tcps_outbadtso);
+   }
+
return offload;
 }
 
@@ -2529,6 +2566,7 @@ 

Re: ifconfig: SIOCSIFFLAGS: device not configured

2023-05-12 Thread Hrvoje Popovski
On 12.5.2023. 16:21, Jan Klemkow wrote:
> On Thu, May 11, 2023 at 09:17:37PM +0200, Hrvoje Popovski wrote:
>> is it possible to change "ifconfig: SIOCSIFFLAGS: device not configured"
>> message that it has an interface name in it, something like:
>> ifconfig pfsync0: SIOCSIFFLAGS: device not configured <- in my case.
>>
>> I have many vlans and static routes in my setup and while testing some
>> diffs, it took me a long time to figure out which interface the message
>> was coming from.
>>
>> starting network
>> add host 10.11.2.69: gateway 10.12.253.225
>> add host 10.250.184.36: gateway 10.12.253.225
>> add host 9.9.9.9: gateway 10.12.253.225
>> add host 10.11.1.234: gateway 10.12.253.225
>> add host 10.11.1.235: gateway 10.12.253.225
>> add host 10.11.255.123: gateway 10.12.253.225
>> add net 10.101/16: gateway 10.12.253.225
>> ifconfig: SIOCSIFFLAGS: Device not configured
>> add net 16/8: gateway 192.168.100.112
>> add net a192:a168:a100:a100::/64: gateway 192:168:1000:1000::112
>> add net 48/8: gateway 192.168.111.112
>> add net a192:a168:a111:a111::/64: gateway 192:168::::112
>> reordering: ld.so libc libcrypto sshd.
>>
>> or when I'm doing sh /etc/netstart and have aggr interface
>>
>> ifconfig: SIOCSTRUNKPORT: Device busy
>> ifconfig: SIOCSTRUNKPORT: Device busy
>>
>> to change
>> ifconfig ix0: SIOCSTRUNKPORT: Device busy
>> ifconfig ix1: SIOCSTRUNKPORT: Device busy
> I also run into this issue sometimes.  So, here is diff that prints the
> interface name in front of most of these anonym error messages.


That's it, thank you.
This will help me a lot when doing some network testing...


old
ifconfig: SIOCSIFFLAGS: Device not configured
ifconfig: SIOCSIFFLAGS: Inappropriate ioctl for device
reordering: ld.so libc libcrypto sshd.

new
ifconfig: pfsync0: SIOCSIFFLAGS: Device not configured
ifconfig: pfsync0: SIOCSIFFLAGS: Inappropriate ioctl for device
reordering: ld.so libc libcrypto sshd.




when doing sh /etc/netstart with aggr0

old
ifconfig: SIOCSTRUNKPORT: Device busy
ifconfig: SIOCSTRUNKPORT: Device busy

new
ifconfig: aggr0 ix0: SIOCSTRUNKPORT: Device busy
ifconfig: aggr0 ix1: SIOCSTRUNKPORT: Device busy



Re: ifconfig: SIOCSIFFLAGS: device not configured

2023-05-12 Thread Peter Hessler
On 2023 May 12 (Fri) at 16:21:10 +0200 (+0200), Jan Klemkow wrote:
:On Thu, May 11, 2023 at 09:17:37PM +0200, Hrvoje Popovski wrote:
:> ifconfig: SIOCSTRUNKPORT: Device busy
:> ifconfig: SIOCSTRUNKPORT: Device busy
:> 
:> to change
:> ifconfig ix0: SIOCSTRUNKPORT: Device busy
:> ifconfig ix1: SIOCSTRUNKPORT: Device busy
:
:I also run into this issue sometimes.  So, here is diff that prints the
:interface name in front of most of these anonym error messages.
:
:ok?
:
:Jan

OK


-- 
It is illegal to drive more than two thousand sheep down Hollywood
Boulevard at one time.



Re: ifconfig: SIOCSIFFLAGS: device not configured

2023-05-12 Thread Jan Klemkow
On Thu, May 11, 2023 at 09:17:37PM +0200, Hrvoje Popovski wrote:
> is it possible to change "ifconfig: SIOCSIFFLAGS: device not configured"
> message that it has an interface name in it, something like:
> ifconfig pfsync0: SIOCSIFFLAGS: device not configured <- in my case.
> 
> I have many vlans and static routes in my setup and while testing some
> diffs, it took me a long time to figure out which interface the message
> was coming from.
> 
> starting network
> add host 10.11.2.69: gateway 10.12.253.225
> add host 10.250.184.36: gateway 10.12.253.225
> add host 9.9.9.9: gateway 10.12.253.225
> add host 10.11.1.234: gateway 10.12.253.225
> add host 10.11.1.235: gateway 10.12.253.225
> add host 10.11.255.123: gateway 10.12.253.225
> add net 10.101/16: gateway 10.12.253.225
> ifconfig: SIOCSIFFLAGS: Device not configured
> add net 16/8: gateway 192.168.100.112
> add net a192:a168:a100:a100::/64: gateway 192:168:1000:1000::112
> add net 48/8: gateway 192.168.111.112
> add net a192:a168:a111:a111::/64: gateway 192:168::::112
> reordering: ld.so libc libcrypto sshd.
> 
> or when I'm doing sh /etc/netstart and have aggr interface
> 
> ifconfig: SIOCSTRUNKPORT: Device busy
> ifconfig: SIOCSTRUNKPORT: Device busy
> 
> to change
> ifconfig ix0: SIOCSTRUNKPORT: Device busy
> ifconfig ix1: SIOCSTRUNKPORT: Device busy

I also run into this issue sometimes.  So, here is diff that prints the
interface name in front of most of these anonym error messages.

ok?

Jan

Index: ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.462
diff -u -p -r1.462 ifconfig.c
--- ifconfig.c  8 Mar 2023 04:43:06 -   1.462
+++ ifconfig.c  12 May 2023 14:14:01 -
@@ -1070,14 +1070,14 @@ printgroup(char *groupname, int ifaliase
errno == ENOENT)
return (-1);
else
-   err(1, "SIOCGIFGMEMB");
+   err(1, "%s: SIOCGIFGMEMB", ifgr.ifgr_name);
}
 
len = ifgr.ifgr_len;
if ((ifgr.ifgr_groups = calloc(1, len)) == NULL)
err(1, "printgroup");
if (ioctl(sock, SIOCGIFGMEMB, (caddr_t)) == -1)
-   err(1, "SIOCGIFGMEMB");
+   err(1, "%s: SIOCGIFGMEMB", ifgr.ifgr_name);
 
for (ifg = ifgr.ifgr_groups; ifg && len >= sizeof(struct ifg_req);
ifg++) {
@@ -1099,7 +1099,7 @@ printgroupattribs(char *groupname)
bzero(, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, groupname, sizeof(ifgr.ifgr_name));
if (ioctl(sock, SIOCGIFGATTR, (caddr_t)) == -1)
-   err(1, "SIOCGIFGATTR");
+   err(1, "%s: SIOCGIFGATTR", ifgr.ifgr_name);
 
printf("%s:", groupname);
printf(" carp demote count %d", ifgr.ifgr_attrib.ifg_carp_demoted);
@@ -1122,7 +1122,8 @@ setgroupattribs(char *groupname, int arg
if (argc > 1) {
neg = strtonum(argv[1], 0, 128, );
if (errstr)
-   errx(1, "invalid carp demotion: %s", errstr);
+   errx(1, "%s: invalid carp demotion: %s", ifgr.ifgr_name,
+   errstr);
}
 
if (p[0] == '-') {
@@ -1135,7 +1136,7 @@ setgroupattribs(char *groupname, int arg
usage();
 
if (ioctl(sock, SIOCSIFGATTR, (caddr_t)) == -1)
-   err(1, "SIOCSIFGATTR");
+   err(1, "%s: SIOCSIFGATTR", ifgr.ifgr_name);
 }
 
 void
@@ -1249,7 +1250,7 @@ clone_create(const char *addr, int param
 
(void) strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
if (ioctl(sock, SIOCIFCREATE, ) == -1)
-   err(1, "SIOCIFCREATE");
+   err(1, "%s: SIOCIFCREATE", ifr.ifr_name);
 }
 
 void
@@ -1258,7 +1259,7 @@ clone_destroy(const char *addr, int para
 
(void) strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
if (ioctl(sock, SIOCIFDESTROY, ) == -1)
-   err(1, "SIOCIFDESTROY");
+   err(1, "%s: SIOCIFDESTROY", ifr.ifr_name);
 }
 
 struct if_clonereq *
@@ -1422,7 +1423,7 @@ setifflags(const char *vname, int value)
bcopy((char *), (char *)_ifr, sizeof(struct ifreq));
 
if (ioctl(sock, SIOCGIFFLAGS, (caddr_t)_ifr) == -1)
-   err(1, "SIOCGIFFLAGS");
+   err(1, "%s: SIOCGIFFLAGS", my_ifr.ifr_name);
(void) strlcpy(my_ifr.ifr_name, ifname, sizeof(my_ifr.ifr_name));
flags = my_ifr.ifr_flags;
 
@@ -1433,7 +1434,7 @@ setifflags(const char *vname, int value)
flags |= value;
my_ifr.ifr_flags = flags;
if (ioctl(sock, SIOCSIFFLAGS, (caddr_t)_ifr) == -1)
-   err(1, "SIOCSIFFLAGS");
+   err(1, "%s: SIOCSIFFLAGS", my_ifr.ifr_name);
 }
 
 void
@@ -1444,7 +1445,7 @@ setifxflags(const char *vname, int value
bcopy((char *), (char *)_ifr, sizeof(struct ifreq));
 
if (ioctl(sock, SIOCGIFXFLAGS, (caddr_t)_ifr) == -1)
-   

Re: give softnet threads their own names

2023-05-12 Thread Alexandr Nedvedicky
On Fri, May 12, 2023 at 10:34:00AM +1000, David Gwynne wrote:
> so in top you see softnet0, softnet1, etc.
> 
> the real change is putting a struct softnet in place to wrap the name
> and taskq up with.
> 
> ok?

sure, thanks
sashan



Re: nd6 remove kernel lock

2023-05-12 Thread Vitaliy Makkoveev
> On 12 May 2023, at 15:07, Alexander Bluhm  wrote:
> 
> On Fri, May 12, 2023 at 11:43:42AM +, Klemens Nanni wrote:
>>> Access rt_llinfo and check for NULL without checking RTF_LLINFO
>>> flag before.  They are changed togehter with the arp or nd6 mutex.
>> 
>> It is the same change, but I'd commit ARP separately (you don't change
>> any locking semantics there).
> 
> I had prepared a smaller diff already.  Here is the part that does
> not touch the locking.  Just some cleanup to get ARP and ND6 in
> sync.
> 
> Let's start with that and discuss locking separately.
> 
> ok?
> 

ok mvs

> bluhm
> 
> Index: netinet/if_ether.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> retrieving revision 1.264
> diff -u -p -r1.264 if_ether.c
> --- netinet/if_ether.c7 May 2023 16:23:23 -   1.264
> +++ netinet/if_ether.c12 May 2023 11:15:07 -
> @@ -388,10 +388,8 @@ arpresolve(struct ifnet *ifp, struct rte
>   rt->rt_expire - arpt_keep / 8 < uptime) {
> 
>   mtx_enter(_mtx);
> - if (ISSET(rt->rt_flags, RTF_LLINFO)) {
> - la = (struct llinfo_arp *)rt->rt_llinfo;
> - KASSERT(la != NULL);
> -
> + la = (struct llinfo_arp *)rt->rt_llinfo;
> + if (la != NULL) {
>   if (la->la_refreshed + 30 < uptime) {
>   la->la_refreshed = uptime;
>   refresh = 1;
> @@ -412,12 +410,11 @@ arpresolve(struct ifnet *ifp, struct rte
>   goto bad;
> 
>   mtx_enter(_mtx);
> - if (!ISSET(rt->rt_flags, RTF_LLINFO)) {
> + la = (struct llinfo_arp *)rt->rt_llinfo;
> + if (la == NULL) {
>   mtx_leave(_mtx);
>   goto bad;
>   }
> - la = (struct llinfo_arp *)rt->rt_llinfo;
> - KASSERT(la != NULL);
> 
>   /*
>* There is an arptab entry, but no ethernet address
> Index: netinet6/nd6.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.c,v
> retrieving revision 1.278
> diff -u -p -r1.278 nd6.c
> --- netinet6/nd6.c8 May 2023 13:14:21 -   1.278
> +++ netinet6/nd6.c12 May 2023 11:58:54 -
> @@ -527,6 +527,7 @@ nd6_lookup(const struct in6_addr *addr6,
>   if (rt == NULL) {
>   if (create && ifp) {
>   struct rt_addrinfo info;
> + struct llinfo_nd6 *ln;
>   struct ifaddr *ifa;
>   int error;
> 
> @@ -556,11 +557,9 @@ nd6_lookup(const struct in6_addr *addr6,
>   rtableid);
>   if (error)
>   return (NULL);
> - if (rt->rt_llinfo != NULL) {
> - struct llinfo_nd6 *ln =
> - (struct llinfo_nd6 *)rt->rt_llinfo;
> + ln = (struct llinfo_nd6 *)rt->rt_llinfo;
> + if (ln != NULL)
>   ln->ln_state = ND6_LLINFO_NOSTATE;
> - }
>   } else
>   return (NULL);
>   }
> @@ -741,7 +740,7 @@ void
> nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
> {
>   struct sockaddr *gate = rt->rt_gateway;
> - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
> + struct llinfo_nd6 *ln;
>   struct ifaddr *ifa;
>   struct in6_ifaddr *ifa6;
> 
> @@ -1027,10 +1026,10 @@ void
> nd6_cache_lladdr(struct ifnet *ifp, const struct in6_addr *from, char *lladdr,
> int lladdrlen, int type, int code)
> {
> - struct rtentry *rt = NULL;
> - struct llinfo_nd6 *ln = NULL;
> + struct rtentry *rt;
> + struct llinfo_nd6 *ln;
>   int is_newentry;
> - struct sockaddr_dl *sdl = NULL;
> + struct sockaddr_dl *sdl;
>   int do_update;
>   int olladdr;
>   int llchange;
> @@ -1257,7 +1256,7 @@ nd6_resolve(struct ifnet *ifp, struct rt
> {
>   struct sockaddr_dl *sdl;
>   struct rtentry *rt;
> - struct llinfo_nd6 *ln = NULL;
> + struct llinfo_nd6 *ln;
>   struct in6_addr saddr6;
>   time_t uptime;
>   int solicit = 0;
> 



Re: nd6 remove kernel lock

2023-05-12 Thread Alexander Bluhm
On Fri, May 12, 2023 at 11:43:42AM +, Klemens Nanni wrote:
> > Access rt_llinfo and check for NULL without checking RTF_LLINFO
> > flag before.  They are changed togehter with the arp or nd6 mutex.
> 
> It is the same change, but I'd commit ARP separately (you don't change
> any locking semantics there).

I had prepared a smaller diff already.  Here is the part that does
not touch the locking.  Just some cleanup to get ARP and ND6 in
sync.

Let's start with that and discuss locking separately.

ok?

bluhm

Index: netinet/if_ether.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.264
diff -u -p -r1.264 if_ether.c
--- netinet/if_ether.c  7 May 2023 16:23:23 -   1.264
+++ netinet/if_ether.c  12 May 2023 11:15:07 -
@@ -388,10 +388,8 @@ arpresolve(struct ifnet *ifp, struct rte
rt->rt_expire - arpt_keep / 8 < uptime) {
 
mtx_enter(_mtx);
-   if (ISSET(rt->rt_flags, RTF_LLINFO)) {
-   la = (struct llinfo_arp *)rt->rt_llinfo;
-   KASSERT(la != NULL);
-
+   la = (struct llinfo_arp *)rt->rt_llinfo;
+   if (la != NULL) {
if (la->la_refreshed + 30 < uptime) {
la->la_refreshed = uptime;
refresh = 1;
@@ -412,12 +410,11 @@ arpresolve(struct ifnet *ifp, struct rte
goto bad;
 
mtx_enter(_mtx);
-   if (!ISSET(rt->rt_flags, RTF_LLINFO)) {
+   la = (struct llinfo_arp *)rt->rt_llinfo;
+   if (la == NULL) {
mtx_leave(_mtx);
goto bad;
}
-   la = (struct llinfo_arp *)rt->rt_llinfo;
-   KASSERT(la != NULL);
 
/*
 * There is an arptab entry, but no ethernet address
Index: netinet6/nd6.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.278
diff -u -p -r1.278 nd6.c
--- netinet6/nd6.c  8 May 2023 13:14:21 -   1.278
+++ netinet6/nd6.c  12 May 2023 11:58:54 -
@@ -527,6 +527,7 @@ nd6_lookup(const struct in6_addr *addr6,
if (rt == NULL) {
if (create && ifp) {
struct rt_addrinfo info;
+   struct llinfo_nd6 *ln;
struct ifaddr *ifa;
int error;
 
@@ -556,11 +557,9 @@ nd6_lookup(const struct in6_addr *addr6,
rtableid);
if (error)
return (NULL);
-   if (rt->rt_llinfo != NULL) {
-   struct llinfo_nd6 *ln =
-   (struct llinfo_nd6 *)rt->rt_llinfo;
+   ln = (struct llinfo_nd6 *)rt->rt_llinfo;
+   if (ln != NULL)
ln->ln_state = ND6_LLINFO_NOSTATE;
-   }
} else
return (NULL);
}
@@ -741,7 +740,7 @@ void
 nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
 {
struct sockaddr *gate = rt->rt_gateway;
-   struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
+   struct llinfo_nd6 *ln;
struct ifaddr *ifa;
struct in6_ifaddr *ifa6;
 
@@ -1027,10 +1026,10 @@ void
 nd6_cache_lladdr(struct ifnet *ifp, const struct in6_addr *from, char *lladdr,
 int lladdrlen, int type, int code)
 {
-   struct rtentry *rt = NULL;
-   struct llinfo_nd6 *ln = NULL;
+   struct rtentry *rt;
+   struct llinfo_nd6 *ln;
int is_newentry;
-   struct sockaddr_dl *sdl = NULL;
+   struct sockaddr_dl *sdl;
int do_update;
int olladdr;
int llchange;
@@ -1257,7 +1256,7 @@ nd6_resolve(struct ifnet *ifp, struct rt
 {
struct sockaddr_dl *sdl;
struct rtentry *rt;
-   struct llinfo_nd6 *ln = NULL;
+   struct llinfo_nd6 *ln;
struct in6_addr saddr6;
time_t uptime;
int solicit = 0;



Re: give softnet threads their own names

2023-05-12 Thread Vitaliy Makkoveev
> On 12 May 2023, at 03:34, David Gwynne  wrote:
> 
> so in top you see softnet0, softnet1, etc.
> 
> the real change is putting a struct softnet in place to wrap the name
> and taskq up with.
> 
> ok?

Makes sense. ok mvs

> 
> Index: if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.695
> diff -u -p -r1.695 if.c
> --- if.c  7 May 2023 16:23:23 -   1.695
> +++ if.c  12 May 2023 00:19:38 -
> @@ -243,8 +243,13 @@ int  ifq_congestion;
> 
> intnetisr;
> 
> +struct softnet {
> + char sn_name[16];
> + struct taskq*sn_taskq;
> +};
> +
> #define   NET_TASKQ   4
> -struct taskq *nettqmp[NET_TASKQ];
> +struct softnet   softnets[NET_TASKQ];
> 
> struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
> 
> @@ -269,8 +274,11 @@ ifinit(void)
>   if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
> 
>   for (i = 0; i < NET_TASKQ; i++) {
> - nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
> - if (nettqmp[i] == NULL)
> + struct softnet *sn = [i];
> + snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
> + sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
> + TASKQ_MPSAFE);
> + if (sn->sn_taskq == NULL)
>   panic("unable to create network taskq %d", i);
>   }
> }
> @@ -3463,13 +3471,13 @@ unhandled_af(int af)
> struct taskq *
> net_tq(unsigned int ifindex)
> {
> - struct taskq *t = NULL;
> + struct softnet *sn;
>   static int nettaskqs;
> 
>   if (nettaskqs == 0)
>   nettaskqs = min(NET_TASKQ, ncpus);
> 
> - t = nettqmp[ifindex % nettaskqs];
> + sn = [ifindex % nettaskqs];
> 
> - return (t);
> + return (sn->sn_taskq);
> }
> 



Re: nd6 remove kernel lock

2023-05-12 Thread Klemens Nanni
On Fri, May 12, 2023 at 12:18:12AM +0200, Alexander Bluhm wrote:
> Hi,
> 
> I would like to remove the kernel lock from nd6 resolve and use nd6
> mutex instead.
> 
> Access rt_llinfo and check for NULL without checking RTF_LLINFO
> flag before.  They are changed togehter with the arp or nd6 mutex.

It is the same change, but I'd commit ARP separately (you don't change
any locking semantics there).

> Access rt_llinfo either with nd6 mutex or exclusive netlock.

Can you leave a comment at the read-only ioctl wrt. exclusive net lock?

> Remove some needless NULL initializations.
> 
> In nd6 resolve replace kernel lock with nd6 mutex.

I prefert the direct llinfo_* check, simpler and more obvious.

> 
> ok?
> 
> bluhm
> 
> Index: netinet/if_ether.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> retrieving revision 1.264
> diff -u -p -r1.264 if_ether.c
> --- netinet/if_ether.c7 May 2023 16:23:23 -   1.264
> +++ netinet/if_ether.c11 May 2023 19:00:19 -
> @@ -388,10 +388,8 @@ arpresolve(struct ifnet *ifp, struct rte
>   rt->rt_expire - arpt_keep / 8 < uptime) {
>  
>   mtx_enter(_mtx);
> - if (ISSET(rt->rt_flags, RTF_LLINFO)) {
> - la = (struct llinfo_arp *)rt->rt_llinfo;
> - KASSERT(la != NULL);
> -
> + la = (struct llinfo_arp *)rt->rt_llinfo;
> + if (la != NULL) {
>   if (la->la_refreshed + 30 < uptime) {
>   la->la_refreshed = uptime;
>   refresh = 1;
> @@ -412,12 +410,11 @@ arpresolve(struct ifnet *ifp, struct rte
>   goto bad;
>  
>   mtx_enter(_mtx);
> - if (!ISSET(rt->rt_flags, RTF_LLINFO)) {
> + la = (struct llinfo_arp *)rt->rt_llinfo;
> + if (la == NULL) {
>   mtx_leave(_mtx);
>   goto bad;
>   }
> - la = (struct llinfo_arp *)rt->rt_llinfo;
> - KASSERT(la != NULL);
>  
>   /*
>* There is an arptab entry, but no ethernet address
> Index: netinet6/nd6.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.c,v
> retrieving revision 1.278
> diff -u -p -r1.278 nd6.c
> --- netinet6/nd6.c8 May 2023 13:14:21 -   1.278
> +++ netinet6/nd6.c11 May 2023 20:39:58 -
> @@ -306,7 +306,7 @@ nd6_llinfo_timer(struct rtentry *rt)
>   struct sockaddr_in6 *dst = satosin6(rt_key(rt));
>   struct ifnet *ifp;
>  
> - NET_ASSERT_LOCKED();
> + NET_ASSERT_LOCKED_EXCLUSIVE();
>  
>   if ((ifp = if_get(rt->rt_ifidx)) == NULL)
>   return 1;
> @@ -527,6 +527,7 @@ nd6_lookup(const struct in6_addr *addr6,
>   if (rt == NULL) {
>   if (create && ifp) {
>   struct rt_addrinfo info;
> + struct llinfo_nd6 *ln;
>   struct ifaddr *ifa;
>   int error;
>  
> @@ -556,11 +557,11 @@ nd6_lookup(const struct in6_addr *addr6,
>   rtableid);
>   if (error)
>   return (NULL);
> - if (rt->rt_llinfo != NULL) {
> - struct llinfo_nd6 *ln =
> - (struct llinfo_nd6 *)rt->rt_llinfo;
> + mtx_enter(_mtx);
> + ln = (struct llinfo_nd6 *)rt->rt_llinfo;
> + if (ln != NULL)
>   ln->ln_state = ND6_LLINFO_NOSTATE;
> - }
> + mtx_leave(_mtx);

You now grab a mutex while holding the exclusive net lock in some callers,
e.g. nd6_na_input -> NET_ASSERT_LOCKED_EXCLUSIVE -> nd6_lookup -> nd6_mtx,
is that intended?


What about the end of nd6_lookup(), your diff does not add a mutex there:

/*
 * Validation for the entry.
 * Note that the check for rt_llinfo is necessary because a cloned
 * route from a parent route that has the L flag (e.g. the default
 * route to a p2p interface) may have the flag, too, while the
 * destination is not actually a neighbor.
 */
if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
(ifp != NULL && rt->rt_ifidx != ifp->if_index)) {
if (create) {
char addr[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG, "%s: failed to lookup %s (if=%s)\n",
__func__,
inet_ntop(AF_INET6, addr6, addr, sizeof(addr)),
ifp ? ifp->if_xname : "unspec"));
}
rtfree(rt);
return (NULL);
   

Re: give softnet threads their own names

2023-05-12 Thread Alexander Bluhm
On Fri, May 12, 2023 at 10:34:00AM +1000, David Gwynne wrote:
> so in top you see softnet0, softnet1, etc.
> 
> the real change is putting a struct softnet in place to wrap the name
> and taskq up with.
> 
> ok?

OK bluhm@

> Index: if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.695
> diff -u -p -r1.695 if.c
> --- if.c  7 May 2023 16:23:23 -   1.695
> +++ if.c  12 May 2023 00:19:38 -
> @@ -243,8 +243,13 @@ int  ifq_congestion;
>  
>  int   netisr;
>  
> +struct softnet {
> + char sn_name[16];
> + struct taskq*sn_taskq;
> +};
> +
>  #define  NET_TASKQ   4
> -struct taskq *nettqmp[NET_TASKQ];
> +struct softnet   softnets[NET_TASKQ];
>  
>  struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
>  
> @@ -269,8 +274,11 @@ ifinit(void)
>   if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
>  
>   for (i = 0; i < NET_TASKQ; i++) {
> - nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
> - if (nettqmp[i] == NULL)
> + struct softnet *sn = [i];
> + snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
> + sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
> + TASKQ_MPSAFE);
> + if (sn->sn_taskq == NULL)
>   panic("unable to create network taskq %d", i);
>   }
>  }
> @@ -3463,13 +3471,13 @@ unhandled_af(int af)
>  struct taskq *
>  net_tq(unsigned int ifindex)
>  {
> - struct taskq *t = NULL;
> + struct softnet *sn;
>   static int nettaskqs;
>  
>   if (nettaskqs == 0)
>   nettaskqs = min(NET_TASKQ, ncpus);
>  
> - t = nettqmp[ifindex % nettaskqs];
> + sn = [ifindex % nettaskqs];
>  
> - return (t);
> + return (sn->sn_taskq);
>  }