ola,

i recently had a chance to deploy an openbsd box in a predominantly
cisco shop to offload NAT from an aging 7200 router. it would have
gone well except they'd allocated me /31 networks on ethernet ports
for us to talk with. /31s currently dont work well because of how
the kernel handles broadcast addresses on ethernet (well, broadcast)
interfaces.

this diff effectively special cases /31s so they dont get a broadcast
address. this in turn allows us to configure /31s to squeeze a bit
more out of our address space, and to interoperate with "carrier"
gear including the aforementioned cisco.

i have tested this with openbsd on both the 0th and 1st address in
the /31 talking to another openbsd box, and talking to a cisco. in
both cases i can tried tcp, udp, icmp, and ospf traffic. it all
works as expected now.

this also includes tweaks from claudio@ to better special case the
changed broadcast address handling.

if anyone is interested in the config i had between the cisco and
openbsd, it follows.

on the cisco side:

interface Loopback0
 ip address 192.168.1.1 255.255.255.128

interface GigabitEthernet1/0/1
 no switchport
 ip address 192.168.0.4 255.255.255.254

router ospf 1
 log-adjacency-changes
 passive-interface default
 no passive-interface GigabitEthernet1/0/1
 network 192.168.0.4 0.0.0.1 area 0
 network 192.168.1.0 0.0.0.127 area 0

and on the openbsd side:

dlg@hotspare netinet$ ifconfig bnx2
bnx2: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
       lladdr f0:4d:a2:3f:94:7a
       priority: 0
       media: Ethernet autoselect (1000baseT full-duplex)
       status: active
       inet6 fe80::f24d:a2ff:fe3f:947a%bnx2 prefixlen 64 scopeid 0x5
       inet 192.168.0.5 netmask 0xfffffffe
dlg@hotspare netinet$ ifconfig vether0
vether0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
       lladdr fe:e1:ba:d0:48:2d
       priority: 0
       groups: vether
       media: Ethernet autoselect
       status: active
       inet 192.168.1.129 netmask 0xffffff80 broadcast 192.168.1.255
       inet6 fe80::fce1:baff:fed0:482d%vether0 prefixlen 64 scopeid 0x9
dlg@hotspare netinet$ sysctl net.inet.ip.forwarding
net.inet.ip.forwarding=1

dlg@hotspare netinet$ sudo cat /etc/ospfd.conf
redistribute connected

area 0.0.0.0 {
       interface bnx2
       interface vether0 { passive }
}

produces this on the cisco:

sw-mr-tmp#sh ip ospf ne

Neighbor ID     Pri   State           Dead Time   Address         Interface
130.102.64.2      1   FULL/DR         00:00:31    192.168.0.5     
GigabitEthernet1/0/1
sw-mr-tmp#sh ip route
Codes: C - connected, S - static, R - RIP, M - mobile, B - BGP
      D - EIGRP, EX - EIGRP external, O - OSPF, IA - OSPF inter area
      N1 - OSPF NSSA external type 1, N2 - OSPF NSSA external type 2
      E1 - OSPF external type 1, E2 - OSPF external type 2
      i - IS-IS, su - IS-IS summary, L1 - IS-IS level-1, L2 - IS-IS level-2
      ia - IS-IS inter area, * - candidate default, U - per-user static route
      o - ODR, P - periodic downloaded static route

Gateway of last resort is not set

    130.102.0.0/22 is subnetted, 1 subnets
O E1    130.102.64.0 [110/101] via 192.168.0.5, 00:19:14, GigabitEthernet1/0/1
    192.168.0.0/31 is subnetted, 2 subnets
O E1    192.168.0.0 [110/101] via 192.168.0.5, 00:19:14, GigabitEthernet1/0/1
C       192.168.0.4 is directly connected, GigabitEthernet1/0/1
    192.168.1.0/25 is subnetted, 2 subnets
C       192.168.1.0 is directly connected, Loopback0
O       192.168.1.128 [110/11] via 192.168.0.5, 00:19:14, GigabitEthernet1/0/1
sw-mr-tmp#ping 192.168.1.129

Type escape sequence to abort.
Sending 5, 100-byte ICMP Echos to 192.168.1.129, timeout is 2 seconds:
!!!!!
Success rate is 100 percent (5/5), round-trip min/avg/max = 1/1/1 ms

and on openbsd:
dlg@hotspare netinet$ sudo ospfctl sh ne
ID              Pri State        DeadTime Address         Iface     Uptime
192.168.1.1     1   FULL/BCKUP   00:00:37 192.168.0.4     bnx2      00:20:03

dlg@hotspare netinet$ sudo ospfctl sh fib
flags: * = valid, O = OSPF, C = Connected, S = Static
Flags  Prio Destination          Nexthop
*S        8 0.0.0.0/0            130.102.67.254
*C        0 127.0.0.0/8          link#0
*S        8 127.0.0.0/8          127.0.0.1
*         4 127.0.0.1/32         127.0.0.1
*C        4 130.102.64.0/22      link#3
*C        4 192.168.0.0/31       link#8
*C        4 192.168.0.4/31       link#5
*O       32 192.168.0.4/31       192.168.0.5
*O       32 192.168.1.1/32       192.168.0.4
*C        4 192.168.1.128/25     link#9
*S        8 224.0.0.0/4          127.0.0.1

dlg@hotspare netinet$ ping -c 5 192.168.1.1
PING 192.168.1.1 (192.168.1.1): 56 data bytes
64 bytes from 192.168.1.1: icmp_seq=0 ttl=255 time=0.477 ms
64 bytes from 192.168.1.1: icmp_seq=1 ttl=255 time=0.900 ms
64 bytes from 192.168.1.1: icmp_seq=2 ttl=255 time=0.477 ms
64 bytes from 192.168.1.1: icmp_seq=3 ttl=255 time=0.464 ms
64 bytes from 192.168.1.1: icmp_seq=4 ttl=255 time=2.578 ms
--- 192.168.1.1 ping statistics ---
5 packets transmitted, 5 packets received, 0.0% packet loss
round-trip min/avg/max/std-dev = 0.464/0.979/2.578/0.816 ms

ok?

Index: in.c
===================================================================
RCS file: /cvs/src/sys/netinet/in.c,v
retrieving revision 1.67
diff -u -p -r1.67 in.c
--- in.c        3 Jul 2011 06:24:13 -0000       1.67
+++ in.c        5 Jul 2011 03:10:05 -0000
@@ -700,8 +700,12 @@ in_ifinit(struct ifnet *ifp, struct in_i
         */
        ia->ia_ifa.ifa_metric = ifp->if_metric;
        if (ifp->if_flags & IFF_BROADCAST) {
-               ia->ia_broadaddr.sin_addr.s_addr =
-                       ia->ia_net | ~ia->ia_netmask;
+               if (IN_RFC3021_SUBNET(ia->ia_netmask))
+                       ia->ia_broadaddr.sin_addr.s_addr = 0;
+               else {
+                       ia->ia_broadaddr.sin_addr.s_addr =
+                           ia->ia_net | ~ia->ia_netmask;
+               }
        } else if (ifp->if_flags & IFF_LOOPBACK) {
                ia->ia_dstaddr = ia->ia_addr;
                flags |= RTF_HOST;
Index: in.h
===================================================================
RCS file: /cvs/src/sys/netinet/in.h,v
retrieving revision 1.89
diff -u -p -r1.89 in.h
--- in.h        15 Jun 2011 09:11:01 -0000      1.89
+++ in.h        5 Jul 2011 03:10:05 -0000
@@ -187,6 +187,12 @@ struct in_addr {
 #define        IN_CLASSD_HOST          __IPADDR(0x0fffffff)
 #define        IN_MULTICAST(i)         IN_CLASSD(i)
 
+#define        IN_RFC3021_NET          __IPADDR(0xfffffffe)
+#define        IN_RFC3021_NSHIFT       31
+#define        IN_RFC3021_HOST         __IPADDR(0x00000001)
+#define        IN_RFC3021_SUBNET(n)    (((u_int32_t)(n) & IN_RFC3021_NET) == \
+                                IN_RFC3021_NET)
+
 #define        IN_EXPERIMENTAL(i)      (((u_int32_t)(i) & 
__IPADDR(0xf0000000)) == \
                                 __IPADDR(0xf0000000))
 #define        IN_BADCLASS(i)          (((u_int32_t)(i) & 
__IPADDR(0xf0000000)) == \
Index: in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.122
diff -u -p -r1.122 in_pcb.c
--- in_pcb.c    4 Jul 2011 06:54:49 -0000       1.122
+++ in_pcb.c    5 Jul 2011 03:10:05 -0000
@@ -412,8 +412,10 @@ in_pcbconnect(v, nam)
                if (sin->sin_addr.s_addr == INADDR_ANY)
                        sin->sin_addr = 
TAILQ_FIRST(&in_ifaddr)->ia_addr.sin_addr;
                else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
-                 (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST))
-                       sin->sin_addr = 
TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr;
+                 (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST) &&
+                 TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr.s_addr)
+                       sin->sin_addr =
+                           TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr;
        }
        if (inp->inp_laddr.s_addr == INADDR_ANY) {
                int error;
Index: ip_icmp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.93
diff -u -p -r1.93 ip_icmp.c
--- ip_icmp.c   4 Apr 2011 16:33:52 -0000       1.93
+++ ip_icmp.c   5 Jul 2011 03:10:05 -0000
@@ -533,8 +533,12 @@ icmp_input(struct mbuf *m, ...)
                icp->icmp_type = ICMP_MASKREPLY;
                icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
                if (ip->ip_src.s_addr == 0) {
-                       if (ia->ia_ifp->if_flags & IFF_BROADCAST)
-                               ip->ip_src = ia->ia_broadaddr.sin_addr;
+                       if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
+                               if (ia->ia_broadaddr.sin_addr.s_addr)
+                                       ip->ip_src = ia->ia_broadaddr.sin_addr;
+                               else
+                                       ip->ip_src.s_addr = INADDR_BROADCAST;
+                       }
                        else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
                                ip->ip_src = ia->ia_dstaddr.sin_addr;
                }
@@ -674,6 +678,7 @@ icmp_reflect(struct mbuf *m, struct mbuf
                        if (t.s_addr == ia->ia_addr.sin_addr.s_addr)
                                break;
                        if ((ia->ia_ifp->if_flags & IFF_BROADCAST) &&
+                           ia->ia_broadaddr.sin_addr.s_addr != 0 &&
                            t.s_addr == ia->ia_broadaddr.sin_addr.s_addr)
                                break;
                }

Reply via email to