ola, i recently had a chance to deploy an openbsd box in a predominantly cisco shop to offload NAT from an aging 7200 router. it would have gone well except they'd allocated me /31 networks on ethernet ports for us to talk with. /31s currently dont work well because of how the kernel handles broadcast addresses on ethernet (well, broadcast) interfaces.
this diff effectively special cases /31s so they dont get a broadcast address. this in turn allows us to configure /31s to squeeze a bit more out of our address space, and to interoperate with "carrier" gear including the aforementioned cisco. i have tested this with openbsd on both the 0th and 1st address in the /31 talking to another openbsd box, and talking to a cisco. in both cases i can tried tcp, udp, icmp, and ospf traffic. it all works as expected now. this also includes tweaks from claudio@ to better special case the changed broadcast address handling. if anyone is interested in the config i had between the cisco and openbsd, it follows. on the cisco side: interface Loopback0 ip address 192.168.1.1 255.255.255.128 interface GigabitEthernet1/0/1 no switchport ip address 192.168.0.4 255.255.255.254 router ospf 1 log-adjacency-changes passive-interface default no passive-interface GigabitEthernet1/0/1 network 192.168.0.4 0.0.0.1 area 0 network 192.168.1.0 0.0.0.127 area 0 and on the openbsd side: dlg@hotspare netinet$ ifconfig bnx2 bnx2: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500 lladdr f0:4d:a2:3f:94:7a priority: 0 media: Ethernet autoselect (1000baseT full-duplex) status: active inet6 fe80::f24d:a2ff:fe3f:947a%bnx2 prefixlen 64 scopeid 0x5 inet 192.168.0.5 netmask 0xfffffffe dlg@hotspare netinet$ ifconfig vether0 vether0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500 lladdr fe:e1:ba:d0:48:2d priority: 0 groups: vether media: Ethernet autoselect status: active inet 192.168.1.129 netmask 0xffffff80 broadcast 192.168.1.255 inet6 fe80::fce1:baff:fed0:482d%vether0 prefixlen 64 scopeid 0x9 dlg@hotspare netinet$ sysctl net.inet.ip.forwarding net.inet.ip.forwarding=1 dlg@hotspare netinet$ sudo cat /etc/ospfd.conf redistribute connected area 0.0.0.0 { interface bnx2 interface vether0 { passive } } produces this on the cisco: sw-mr-tmp#sh ip ospf ne Neighbor ID Pri State Dead Time Address Interface 130.102.64.2 1 FULL/DR 00:00:31 192.168.0.5 GigabitEthernet1/0/1 sw-mr-tmp#sh ip route Codes: C - connected, S - static, R - RIP, M - mobile, B - BGP D - EIGRP, EX - EIGRP external, O - OSPF, IA - OSPF inter area N1 - OSPF NSSA external type 1, N2 - OSPF NSSA external type 2 E1 - OSPF external type 1, E2 - OSPF external type 2 i - IS-IS, su - IS-IS summary, L1 - IS-IS level-1, L2 - IS-IS level-2 ia - IS-IS inter area, * - candidate default, U - per-user static route o - ODR, P - periodic downloaded static route Gateway of last resort is not set 130.102.0.0/22 is subnetted, 1 subnets O E1 130.102.64.0 [110/101] via 192.168.0.5, 00:19:14, GigabitEthernet1/0/1 192.168.0.0/31 is subnetted, 2 subnets O E1 192.168.0.0 [110/101] via 192.168.0.5, 00:19:14, GigabitEthernet1/0/1 C 192.168.0.4 is directly connected, GigabitEthernet1/0/1 192.168.1.0/25 is subnetted, 2 subnets C 192.168.1.0 is directly connected, Loopback0 O 192.168.1.128 [110/11] via 192.168.0.5, 00:19:14, GigabitEthernet1/0/1 sw-mr-tmp#ping 192.168.1.129 Type escape sequence to abort. Sending 5, 100-byte ICMP Echos to 192.168.1.129, timeout is 2 seconds: !!!!! Success rate is 100 percent (5/5), round-trip min/avg/max = 1/1/1 ms and on openbsd: dlg@hotspare netinet$ sudo ospfctl sh ne ID Pri State DeadTime Address Iface Uptime 192.168.1.1 1 FULL/BCKUP 00:00:37 192.168.0.4 bnx2 00:20:03 dlg@hotspare netinet$ sudo ospfctl sh fib flags: * = valid, O = OSPF, C = Connected, S = Static Flags Prio Destination Nexthop *S 8 0.0.0.0/0 130.102.67.254 *C 0 127.0.0.0/8 link#0 *S 8 127.0.0.0/8 127.0.0.1 * 4 127.0.0.1/32 127.0.0.1 *C 4 130.102.64.0/22 link#3 *C 4 192.168.0.0/31 link#8 *C 4 192.168.0.4/31 link#5 *O 32 192.168.0.4/31 192.168.0.5 *O 32 192.168.1.1/32 192.168.0.4 *C 4 192.168.1.128/25 link#9 *S 8 224.0.0.0/4 127.0.0.1 dlg@hotspare netinet$ ping -c 5 192.168.1.1 PING 192.168.1.1 (192.168.1.1): 56 data bytes 64 bytes from 192.168.1.1: icmp_seq=0 ttl=255 time=0.477 ms 64 bytes from 192.168.1.1: icmp_seq=1 ttl=255 time=0.900 ms 64 bytes from 192.168.1.1: icmp_seq=2 ttl=255 time=0.477 ms 64 bytes from 192.168.1.1: icmp_seq=3 ttl=255 time=0.464 ms 64 bytes from 192.168.1.1: icmp_seq=4 ttl=255 time=2.578 ms --- 192.168.1.1 ping statistics --- 5 packets transmitted, 5 packets received, 0.0% packet loss round-trip min/avg/max/std-dev = 0.464/0.979/2.578/0.816 ms ok? Index: in.c =================================================================== RCS file: /cvs/src/sys/netinet/in.c,v retrieving revision 1.67 diff -u -p -r1.67 in.c --- in.c 3 Jul 2011 06:24:13 -0000 1.67 +++ in.c 5 Jul 2011 03:10:05 -0000 @@ -700,8 +700,12 @@ in_ifinit(struct ifnet *ifp, struct in_i */ ia->ia_ifa.ifa_metric = ifp->if_metric; if (ifp->if_flags & IFF_BROADCAST) { - ia->ia_broadaddr.sin_addr.s_addr = - ia->ia_net | ~ia->ia_netmask; + if (IN_RFC3021_SUBNET(ia->ia_netmask)) + ia->ia_broadaddr.sin_addr.s_addr = 0; + else { + ia->ia_broadaddr.sin_addr.s_addr = + ia->ia_net | ~ia->ia_netmask; + } } else if (ifp->if_flags & IFF_LOOPBACK) { ia->ia_dstaddr = ia->ia_addr; flags |= RTF_HOST; Index: in.h =================================================================== RCS file: /cvs/src/sys/netinet/in.h,v retrieving revision 1.89 diff -u -p -r1.89 in.h --- in.h 15 Jun 2011 09:11:01 -0000 1.89 +++ in.h 5 Jul 2011 03:10:05 -0000 @@ -187,6 +187,12 @@ struct in_addr { #define IN_CLASSD_HOST __IPADDR(0x0fffffff) #define IN_MULTICAST(i) IN_CLASSD(i) +#define IN_RFC3021_NET __IPADDR(0xfffffffe) +#define IN_RFC3021_NSHIFT 31 +#define IN_RFC3021_HOST __IPADDR(0x00000001) +#define IN_RFC3021_SUBNET(n) (((u_int32_t)(n) & IN_RFC3021_NET) == \ + IN_RFC3021_NET) + #define IN_EXPERIMENTAL(i) (((u_int32_t)(i) & __IPADDR(0xf0000000)) == \ __IPADDR(0xf0000000)) #define IN_BADCLASS(i) (((u_int32_t)(i) & __IPADDR(0xf0000000)) == \ Index: in_pcb.c =================================================================== RCS file: /cvs/src/sys/netinet/in_pcb.c,v retrieving revision 1.122 diff -u -p -r1.122 in_pcb.c --- in_pcb.c 4 Jul 2011 06:54:49 -0000 1.122 +++ in_pcb.c 5 Jul 2011 03:10:05 -0000 @@ -412,8 +412,10 @@ in_pcbconnect(v, nam) if (sin->sin_addr.s_addr == INADDR_ANY) sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_addr.sin_addr; else if (sin->sin_addr.s_addr == INADDR_BROADCAST && - (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST)) - sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr; + (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST) && + TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr.s_addr) + sin->sin_addr = + TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr; } if (inp->inp_laddr.s_addr == INADDR_ANY) { int error; Index: ip_icmp.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_icmp.c,v retrieving revision 1.93 diff -u -p -r1.93 ip_icmp.c --- ip_icmp.c 4 Apr 2011 16:33:52 -0000 1.93 +++ ip_icmp.c 5 Jul 2011 03:10:05 -0000 @@ -533,8 +533,12 @@ icmp_input(struct mbuf *m, ...) icp->icmp_type = ICMP_MASKREPLY; icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; if (ip->ip_src.s_addr == 0) { - if (ia->ia_ifp->if_flags & IFF_BROADCAST) - ip->ip_src = ia->ia_broadaddr.sin_addr; + if (ia->ia_ifp->if_flags & IFF_BROADCAST) { + if (ia->ia_broadaddr.sin_addr.s_addr) + ip->ip_src = ia->ia_broadaddr.sin_addr; + else + ip->ip_src.s_addr = INADDR_BROADCAST; + } else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) ip->ip_src = ia->ia_dstaddr.sin_addr; } @@ -674,6 +678,7 @@ icmp_reflect(struct mbuf *m, struct mbuf if (t.s_addr == ia->ia_addr.sin_addr.s_addr) break; if ((ia->ia_ifp->if_flags & IFF_BROADCAST) && + ia->ia_broadaddr.sin_addr.s_addr != 0 && t.s_addr == ia->ia_broadaddr.sin_addr.s_addr) break; }