On Sat, Feb 06, 2016 at 04:43:28PM -0500, Anthony Eden wrote:
> >Synopsis:  <alignment fault on armv7 (omap) using carp(4)>
> >Category:  arm
> >Environment:
>     System      : OpenBSD 5.9
>     Details     : OpenBSD 5.9 (DBGGENERIC) #0: Sat Feb  6 12:22:27 EST 2016
>              [email protected]:/usr/src/sys/arch/armv7/compile/DBGGENERIC
> 
>     Architecture: OpenBSD.armv7
>     Machine     : armv7
> >Description:
>     With two beaglebone black's running -current, an alignment fault is
>     encountered at ip_input.c:262 in ipv4_input() when they are
>     configured to use carp(4) to share the same IP address.
> 
>     Source context from ip_input.c (alignment fault occurs when
>     ip->ip_dst.s_addr is loaded at line 262):
> 
> 258:            ip = mtod(m, struct ip *);
> 259:    }
> 260:
> 261:    /* 127/8 must not appear on wire - RFC1122 */
> 262:    if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET 
> ||
> 263:       (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
> 264:            if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
> 265:                    ipstat.ips_badaddr++;
> 266:                    goto bad;
> 
>     ddb(4) output:
> 
> $ Fatal kernel mode data abort: 'Alignment Fault 1'
> trapframe: 0xcb2d8e40
> DFSR=00000001, DFAR=c4cb401e, spsr=80000013
> r0 =c924d400, r1 =00000003, r2 =00000045, r3 =00000038
> r4 =c4cb400e, r5 =c06f2ca4, r6 =00000014, r7 =c4d65800
> r8 =c0710e50, r9 =c069294c, r10=c0692918, r11=cb2d8eb8
> r12=60000093, ssp=cb2d8e8c, slr=c040bc88, pc =c04616ec
> 
> Stopped at      ipv4_input+0x9c:        ldrls   r3, [r4, #0x010]
> ddb> trace
> ipv4_input+0xc
>         scp=0xc046165c rlv=0xc0461ab4 (ipintr+0x24)
>         rsp=0xcb2d8ebc rfp=0xcb2d8ecc
>         r10=0xc0692918 r8=0xc0710e50 r7=0xc06edd88 r6=0xc06edd88
>         r5=0x00000000 r4=0x00000004
> ipintr+0xc
>         scp=0xc0461a9c rlv=0xc041b290 (netintr+0xa0)
>         rsp=0xcb2d8ed0 rfp=0xcb2d8ef0
> netintr+0xc
>         scp=0xc041b1fc rlv=0xc053f3d0 (softintr_dispatch+0x84)
>         rsp=0xcb2d8ef4 rfp=0xcb2d8f10
>         r7=0x00000000 r6=0xc0710eb4 r5=0xc0710ec0 r4=0xc89e13a0
> softintr_dispatch+0x18
>         scp=0xc053f364 rlv=0xc053eef8 (arm_do_pending_intr+0x110)
>         rsp=0xcb2d8f14 rfp=0xcb2d8f40
>         r6=0xc0710190 r5=0x20000013 r4=0x00000004
> arm_do_pending_intr+0x10
>         scp=0xc053edf8 rlv=0xc040d9a8 (if_input_process+0xcc)
>         rsp=0xcb2d8f44 rfp=0xcb2d8f78
>         r10=0xc0692918 r9=0x00000000 r8=0x00000000 r7=0xcb2d8f44
>         r6=0x00000000 r5=0xc4d65800 r4=0xc4d57480
> if_input_process+0xc
>         scp=0xc040d8e8 rlv=0xc03b5c2c (taskq_thread+0x90)
>         rsp=0xcb2d8f7c rfp=0xcb2d8fb0
>         r10=0xc06e643c r8=0xc06e65d8 r7=0xcb2d8f7c r6=0x00000001
>         r5=0xc89e2040 r4=0xc03b5b04
> taskq_thread+0xc
>         scp=0xc03b5ba8 rlv=0xc0536c10 (proc_trampoline+0x18)
>         rsp=0xcb2d8fb4 rfp=0xc07f3edc
>         r7=0x00000000 r6=0x00000000 r5=0xc89e2040 r4=0xc03b5b9c
> Bad frame pointer: 0xc07f3edc
> 
>     this problem has also been encountered with both BB's running -stable.
> 
> >How-To-Repeat:
>     Install either -current or -stable on two beaglebone black's, with names
>     beagle1 and beagle2. On a LAN 192.168.123.0/24 with default
>     gateway 192.168.123.2, set /etc/mygate to 192.168.123.2 on beagle1 and
>     beagle2, then set /etc/hostname.cpsw0 on beagle1 to be
> 
> inet 192.168.123.201 255.255.255.0 NONE
> 
>     and on beagle2
> 
> inet 192.168.123.202 255.255.255.0 NONE
> 
>     then run the following commands on both to use carp(4):
> 
> doas ifconfig carp0 create
> doas ifconfig carp0 vhid 1 pass tyrell carpdev cpsw0 192.168.123.222
> netmask 255.255.255.0
> 
>     shortly thereafter a beaglebone will encounter an alignment fault.
> 
> >Fix:
>     The cause of this problem is unknown to me. I would speculate that the
>     issue lies in m_pullup mishandling alignment, given that netowkring on
>     the beaglebone black usually functions normally, and that there are
>     branches prior to the crash in which m_pullup is used in deriving a
>     pointer to ip, which when using carp(4) apparently misaligned.
> 
>     In investigating this issue further, I replaced offending 32-bit loads
>     in the kernel with calls to get_unaligned_le32(), defined as (from
>     linux/unaligned/packed_struct.h):
> 
> struct __una_u32 { u32 x; } __packed;
> static inline u32 get_unaligned_le32(const void *p) {
>     const struct __una_u32 *ptr = (const struct __una_u32 *)p;
>     return ptr->x;
> }
> 
>     Other than replacements in ip_input.c, udp_usrreq.c was also changed as
>     well as the macros IN6_IS_ADDR_UNSPECIFIED, IN6_IS_ADDR_LOOPBACK,
>     IN6_IS_ADDR_V4COMPAT, and IN6_IS_ADDR_V4MAPPED in in6.h.
> 
>     This resulted in carp(4) appearing to function normally, but beagle1
>     and beagle2 repeatedly lost networking temporarily and recurrent
>     'device timeout's appeared in dmesg (as well as carp(4) messages
>     informing state changes from master to slave and vice versa).
> 
>     To me that behavior might suggest the problem is deeper than a
>     bookkeeping mistake of aligning memory in mbuf.

nope, you were right, it's a screwup with alignment.

the problem is multicast packets that arent to a carp interfaces
mac address have to be duplicated and sent to all carp interfaces
on a parent. the duplication is done with m_copym2, which doesn't
respect the alignment requirements of the ip header inside the 14
byte ethernet header.

the following dups the packet inside carp, and makes sure the
ethernet payload is aligned properly.

i was able to reproduce this on sparc64, and i believe this fixes
it. could you test it and see if it helps?

Index: ip_carp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.286
diff -u -p -r1.286 ip_carp.c
--- ip_carp.c   21 Jan 2016 11:23:48 -0000      1.286
+++ ip_carp.c   8 Feb 2016 12:56:27 -0000
@@ -210,6 +210,7 @@ int carp_input(struct ifnet *, struct mb
 void   carp_proto_input_c(struct ifnet *, struct mbuf *,
            struct carp_header *, int, sa_family_t);
 void   carp_proto_input_if(struct ifnet *, struct mbuf *, int);
+int    carp_input_mcast(struct carp_softc *, struct mbuf *);
 void   carpattach(int);
 void   carpdetach(struct carp_softc *);
 int    carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
@@ -1392,6 +1393,41 @@ carp_vhe_match(struct carp_softc *sc, ui
 }
 
 int
+carp_input_mcast(struct carp_softc *sc, struct mbuf *m)
+{
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+       struct mbuf *m0;
+       int len;
+
+       MGET(m0, M_DONTWAIT, m->m_type);
+       if (m0 == NULL)
+               return (-1);
+
+       if (m_dup_pkthdr(m0, m, M_DONTWAIT) != 0)
+               goto fail;
+
+       len = m->m_pkthdr.len + max_linkhdr + ETHER_ALIGN;
+
+       if (len > MHLEN) {
+               MCLGETI(m0, M_DONTWAIT, NULL, len);
+               if (!ISSET(m0->m_flags, M_EXT))
+                       goto fail;
+       }
+       m0->m_pkthdr.len = m0->m_len = len;
+       m_adj(m0, max_linkhdr + ETHER_ALIGN);
+       m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
+
+       ml_enqueue(&ml, m0);
+       if_input(&sc->sc_if, &ml);
+
+       return (0);
+
+fail:
+       m_freem(m0);
+       return (-1);
+}
+
+int
 carp_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
 {
        struct ether_header *eh;
@@ -1424,19 +1460,12 @@ carp_input(struct ifnet *ifp0, struct mb
                 * for each CARP interface _before_ copying.
                 */
                SRPL_FOREACH(sc, &cif->vhif_vrs, &i, sc_list) {
-                       struct mbuf *m0;
-
                        if (!(sc->sc_if.if_flags & IFF_UP))
                                continue;
 
-                       m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
-                       if (m0 == NULL)
-                               continue;
-
-                       ml_init(&ml);
-                       ml_enqueue(&ml, m0);
-
-                       if_input(&sc->sc_if, &ml);
+                       /* if we cant send one we probably cant send more */
+                       if (carp_input_mcast(sc, m) != 0)
+                               break;
                }
                SRPL_LEAVE(&i, sc);
 

Reply via email to