On Tue, Feb 18, 2020 at 12:37:10AM +0100, alexandre wrote:
>
> > On Mon, Feb 17, 2020 at 01:31:08AM +0100, Alexandre wrote:
> > > Hello,
> > >
> > > I am running an OpenBSD/armv7 guest on a QEMU 4.2.0 "virt" machine host;
> > > see the attached file (from_qemu_virt.dts) for the fdt of the guest
> > > machine, it has only a virtio-mmio bus with a virtio-net device attached.
> > >
> > > The QEMU command line is also attached, together with source and bin for
> > > the tiny bootloader for the bsd kernel (from the distfiles of 6.6 release)
> > > used as QEMU "bios".
> > >
> > > I used the netdev user backend.
> > >
> > > Boot is OK (see dmesg.txt).
> > >
> > > The vio network interface is configured by dhclient and we have this:
> > >
> > > my# ifconfig
> > > lo0: flags=8049 mtu 32768
> > > index 3 priority 0 llprio 3
> > > groups: lo
> > > inet6 ::1 prefixlen 128
> > > inet6 fe80::1%lo0 prefixlen 64 scopeid 0x3
> > > inet 127.0.0.1 netmask 0xff000000
> > > vio0: flags=808843 mtu
> > > 1500
> > > lladdr 52:54:00:12:34:56
> > > index 1 priority 0 llprio 3
> > > groups: egress
> > > media: Ethernet autoselect
> > > status: active
> > > inet 10.0.2.15 netmask 0xffffff00 broadcast 10.0.2.255
> > > enc0: flags=0<>
> > > index 2 priority 0 llprio 3
> > > groups: enc
> > > status: active
> > > pflog0: flags=141 mtu 33168
> > > index 4 priority 0 llprio 3
> > > groups: pflog
> > >
> > > my# route -n show
> > > Routing tables
> > >
> > > Internet:
> > > Destination Gateway Flags Refs Use Mtu Prio
> > > Iface
> > > default 10.0.2.2 UGS 0 0 - 8 vio0
> > > 224/4 127.0.0.1 URS 0 0 32768 8 lo0
> > > 10.0.2/24 10.0.2.15 UCn 1 0 - 4 vio0
> > > 10.0.2.2 52:55:0a0:00:02:02 UHLch 1 4 - 3
> > > vio0
> > > 10.0.2.15 52:54:00:12:34:56 UHLl 0 46 - 1 vio0
> > > 10.0.2.255 10.0.2.15 UHb 0 0 - 1 vio0
> > > 127/8 127.0.0.1 UGRS 0 0 32768 8 lo0
> > > 127.0.0.1 127.0.0.1 UHhl 1 2 32768 1 lo0
> > > [[ edited Inet6 routes ..]]
> > >
> > > Ping works OK from the guest to the host ICMP Echo requests are correctly
> > > sent and Echo replies correctly received. It does not work from the guest
> > > to a public IP (but that's fine, it is a known limitation of QEMU net
> > > user).
> > >
> > > UDP packets are OK in the direction guest --> host, but not in reverse
> > > host
> > > --> guest. This cause failure of DNS resolution for instance. TCP packets
> > > have the same problem (the guest sends the SYN, which is received by the
> > > host who sends the SYN-ACK, but the SYN-ACK is not "seen" by the OBSD
> > > guest
> > > and connect timeouts).
> > >
> > > What's surprising (to me !) is that packets are visible on tcpdump on the
> > > guest (with 0 packets "dropped by kernel")
> > >
> > > Steps to reproduce:
> > >
> > > on guest:
> > >
> > > my# tcpdump -w test.dump -p&
> > > [1] 13288
> > > my# tcpdump: listening on vio0, link-type EN10MB
> > > my# nc -v -u 10.0.2.2 2222
> > > Connection to 10.0.2.2 2222 port [udp/*] succeeded!
> > > hello from guest (<<-- typed on the guest console)
> > >
> > > on host:
> > >
> > > $ nc -l -v -u -p 2222
> > > listening on 0.0.0.0:2222 ...
> > > connect to 127.0.0.1:2222 from localhost.localdomain:60487
> > > (127.0.0.1:60487)
> > > hello from guest (-->> got this from the guest)
> > > hello from host (<<-- typed on the host console, NOT shown on the guest
> > > console)
> > >
> > > Now the tcpdump -neX on the guest is attached, you can see that the reply
> > > packets are seen by the kernel but forwarded to beyond "to user space". I
> > > also attached tcpdump on the guest, no difference is shown.
> > >
> > > I tried the 0x02 flags of vio (see dmesg) with no effect. The same with
> > > 0x100 or by guetting the vio0 interface in promiscuous mode with tcpdump.
> > >
> > > pf has default rules (block return all, pass all flags S/SA, X11 and dpb
> > > builder blocking). Same problem with pf disabled. When appropriate log
> > > rules are configured, I see the faulty packets in pflogd journal as in the
> > > guest tcpdump.
> > >
> > >
> >
> > Maybe netstat -s -p ip and netstat -s -p udp help to find the cause of the
> > packet drops. Also check pfctl -si if one of those counters change when
> > you send UDP packets to the guest.
> >
> > I normally used qemu with the tap virtio option (using
> > -net nic,vlan=$id,macaddr=$mac,model=virtio -net tap,vlan=$id,fd=$fd),
> > never had issues with that.
>
>
> I confirm it is a problem with in4_cksum on ARM.
>
> Networking is OK when I either:
>
> * change
>
> --- sys/dev/pv/vio.c.old
> +++ sys/dev/pv/vio.c
> - m->m_pkthdr.csum_flags = 0;
> + m->m_pkthdr.csum_flags = M_UDP_CSUM_IN_OK | M_TCP_CSUM_IN_OK;
>
> so as to disable checksum verification by udp_input()
>
> OR:
>
> * use the portable C implementation of in4_cksum instead of the ARM one by
> doing
I can not reproduce this problem on a cubox with fec(4) but can on qemu
with vio(4) (by the way you can use qemu_arm/u-boot.bin for bios with
-M virt,highmem=off).
It turns out the armv5te/xscale path (which was removed) works.
It was removed in
revision 1.6
date: 2018/06/03 18:58:11; author: kettenis; state: Exp; lines: +1 -93;
commitid:
B56F7yb62qDTzHIJ;
Remove #ifdef __XSCALE__ bits. No binary change.
ok deraadt@
This diff is a revert and unifdef -D __XSCALE__
Index: sys/arch/arm/arm/in_cksum_arm.S
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/in_cksum_arm.S,v
retrieving revision 1.7
diff -u -p -r1.7 in_cksum_arm.S
--- sys/arch/arm/arm/in_cksum_arm.S 6 Aug 2018 18:39:13 -0000 1.7
+++ sys/arch/arm/arm/in_cksum_arm.S 21 Feb 2020 05:29:59 -0000
@@ -112,6 +112,7 @@ ENTRY(in4_cksum)
cmp r1, #0x00
beq .Lin4_cksum_skip_entry
+ pld [r6, #(IP_SRC)]
add r4, r6, #(IP_SRC)
ands r4, r4, #0x03
add r8, r1, r3 /* sum = nxt + len */
@@ -139,14 +140,14 @@ ENTRY(in4_cksum)
b .Lin4_cksum_add_ips
/* 0x02: Data 16-bit aligned */
- ldr r4, [r6, #(IP_SRC - 2)] /* r4 = 10xx */
- ldr r7, [r6, #(IP_DST - 2)] /* r7 = xx76 */
- ldr r5, [r6, #(IP_SRC + 2)] /* r5 = 5432 */
- mov r4, r4, lsr #16 /* r4 = ..10 */
- orr r4, r4, r7, lsl #16 /* r4 = 7610 */
+ ldrh r5, [r6, #(IP_SRC)] /* BE:r5 = ..01 LE:r5 = ..10 */
+ ldrh r7, [r6, #(IP_DST + 2)] /* BE:r7 = ..67 LE:r7 = ..76 */
+ ldr r4, [r6, #(IP_SRC + 2)] /* BE:r4 = 2345 LE:r4 = 5432 */
+ orr r5, r7, r5, lsl #16 /* BE:r5 = 0167 LE:r5 = 1076 */
b .Lin4_cksum_add_ips
nop
nop
+ nop
/* 0x03: Data 8-bit aligned */
ldrb r4, [r6, #(IP_SRC)] /* r4 = ...0 */
@@ -207,6 +208,7 @@ ENTRY(in4_cksum)
*/
/* LINTSTUB: Ignore */
ASENTRY_NP(L_cksumdata)
+ pld [r0] /* Pre-fetch the start of the buffer */
mov r2, #0
/* We first have to word-align the buffer. */
@@ -231,49 +233,78 @@ ASENTRY_NP(L_cksumdata)
/* Buffer is now word aligned */
.Lcksumdata_wordaligned:
+ cmp r1, #0x04 /* Less than 4 bytes left? */
+ blt .Lcksumdata_endgame /* Yup */
+
+ /* Now quad-align, if necessary */
+ ands r7, r0, #0x04
+ ldrne r7, [r0], #0x04
+ subne r1, r1, #0x04
subs r1, r1, #0x40
- blt .Lcksumdata_bigloop_end
+ blt .Lcksumdata_bigloop_end /* Note: C flag clear if branch taken */
+ /*
+ * Buffer is now quad aligned. Sum 64 bytes at a time.
+ * Note: First ldrd is hoisted above the loop, together with
+ * setting r6 to zero to avoid stalling for results in the
+ * loop. (r7 is live, from above).
+ */
+ ldrd r4, [r0], #0x08
+ mov r6, #0x00
.Lcksumdata_bigloop:
- ldmia r0!, {r3, r4, r5, r6}
- adds r2, r2, r3
+ pld [r0, #0x18]
+ adds r2, r2, r6
+ adcs r2, r2, r7
+ ldrd r6, [r0], #0x08
adcs r2, r2, r4
adcs r2, r2, r5
- ldmia r0!, {r3, r4, r5, r7}
+ ldrd r4, [r0], #0x08
adcs r2, r2, r6
- adcs r2, r2, r3
+ adcs r2, r2, r7
+ ldrd r6, [r0], #0x08
adcs r2, r2, r4
adcs r2, r2, r5
- ldmia r0!, {r3, r4, r5, r6}
+ ldrd r4, [r0], #0x08
+ adcs r2, r2, r6
adcs r2, r2, r7
- adcs r2, r2, r3
+ pld [r0, #0x18]
+ ldrd r6, [r0], #0x08
adcs r2, r2, r4
adcs r2, r2, r5
- ldmia r0!, {r3, r4, r5, r7}
+ ldrd r4, [r0], #0x08
adcs r2, r2, r6
- adcs r2, r2, r3
+ adcs r2, r2, r7
+ ldrd r6, [r0], #0x08
adcs r2, r2, r4
adcs r2, r2, r5
- adcs r2, r2, r7
adc r2, r2, #0x00
subs r1, r1, #0x40
+ ldrdge r4, [r0], #0x08
bge .Lcksumdata_bigloop
+
+ adds r2, r2, r6 /* r6/r7 still need summing */
.Lcksumdata_bigloop_end:
+ adcs r2, r2, r7
+ adc r2, r2, #0x00
+
adds r1, r1, #0x40
moveq pc, lr
cmp r1, #0x20
+ ldrdge r4, [r0], #0x08 /* Avoid stalling pld and result */
blt .Lcksumdata_less_than_32
- ldmia r0!, {r3, r4, r5, r6}
- adds r2, r2, r3
- adcs r2, r2, r4
+ pld [r0, #0x18]
+ ldrd r6, [r0], #0x08
+ adds r2, r2, r4
adcs r2, r2, r5
- ldmia r0!, {r3, r4, r5, r7}
+ ldrd r4, [r0], #0x08
adcs r2, r2, r6
- adcs r2, r2, r3
+ adcs r2, r2, r7
+ ldrd r6, [r0], #0x08
adcs r2, r2, r4
adcs r2, r2, r5
+ adcs r2, r2, r6 /* XXX: Unavoidable result stall */
adcs r2, r2, r7
adc r2, r2, #0x00
subs r1, r1, #0x20