Let me start by saying sorry for all the noise on this subject! This is a
wrap up message, I promise. :)

With help of Flavio L [fbl], I got a better insight on what I did wrong.
When I set the type of the [tap1] interface to
internal, ovsdb was happy to accept it, but that -- of course -- turns out
to 1) be a stupid thing to do and 2) leave
things in a half baked state.

There is not much in the logs, but as OVS fails to set the type, it frees
up the interface, but that clean up is not
completely done as far as the kernel datapath goes. It then becomes a booby
trap later when VM was created,
as something is still hanging on to its address and calling into
dev_get_stats() with a stale net_device pointer.

All in all, this is an artifact of a user error and while code could be
made more resilient, it gave me what I
deserved. ;)

Best,

-- flaviof

==

Example steps of my mistake:

# /bin/ovs-vsctl add-br br1
# ip tuntap add mode tap tap1
# /bin/ovs-vsctl  add-port br1 tap1
# /bin/ovs-vsctl set Interface tap1 type=internal    ;  # REALLY BAD,
because tap1 already existed as a tun port!!!
# echo $?
0


Note this would have been 'okay' if tap1 did not exist prior to
"/bin/ovs-vsctl  add-port br1 tap1"
Ref: https://github.com/openvswitch/ovs/blame/master/FAQ.md#L968

==

crash> dmesg
...


*[ 3971.747070] device tap1 left promiscuous mode[ 3971.760258] device br1
left promiscuous mode*...

# NOTE that tap1 is not listed here anymore
crash> net
   NET_DEVICE     NAME   IP ADDRESS(ES)
ffff880853a4e000  lo     127.0.0.1
ffff88084f6e8000  eno1   192.168.2.233
ffff88084eabc000  eno2
ffff880035c8d000  ovs-system
ffff8808512ce000  docker0 172.17.0.1
*ffff880035e2b000*  br1    192.168.50.1
ffff88084666d000  vboxnet0
ffff88084666a000  vboxnet1
ffff88084666c000  vboxnet2
ffff88084e414000  vboxnet3
ffff88082b556000  vboxnet4

crash> dis -l ovs_internal_dev_get_vport
/usr/src/debug/kernel-3.10.0-327.3.1.el7/linux-3.10.0-327.3.1.el7.x86_64/net/openvswitch/vport-internal_dev.c:
261
0xffffffffa04ef5a0 <ovs_internal_dev_get_vport>:        data32 data32
data32 xchg %ax,%ax [FTRACE NOP]
/usr/src/debug/kernel-3.10.0-327.3.1.el7/linux-3.10.0-327.3.1.el7.x86_64/net/openvswitch/vport-internal_dev.c:
263
0xffffffffa04ef5a5 <ovs_internal_dev_get_vport+5>:      xor    %eax,%eax
/usr/src/debug/kernel-3.10.0-327.3.1.el7/linux-3.10.0-327.3.1.el7.x86_64/net/openvswitch/vport-internal_dev.c:
262
*0xffffffffa04ef5a7 <ovs_internal_dev_get_vport+7>:      cmpq
 $0xffffffffa04f0aa0,0x198(%rdi)*
/usr/src/debug/kernel-3.10.0-327.3.1.el7/linux-3.10.0-327.3.1.el7.x86_64/net/openvswitch/vport-internal_dev.c:
261
0xffffffffa04ef5b2 <ovs_internal_dev_get_vport+18>:     push   %rbp

crash> dis -l internal_dev_get_stats
/usr/src/debug/kernel-3.10.0-327.3.1.el7/linux-3.10.0-327.3.1.el7.x86_64/net/openvswitch/vport-internal_dev.c:
49
0xffffffffa04ef040 <internal_dev_get_stats>:    data32 data32 data32 xchg
%ax,%ax [FTRACE NOP]
0xffffffffa04ef045 <internal_dev_get_stats+0x5>:        push   %rbp
0xffffffffa04ef046 <internal_dev_get_stats+0x6>:        mov    %rsp,%rbp
0xffffffffa04ef049 <internal_dev_get_stats+0x9>:        push   %rbx
0xffffffffa04ef04a <internal_dev_get_stats+0xa>:        mov    %rsi,%rbx
0xffffffffa04ef04d <internal_dev_get_stats+0xd>:        sub    $0x48,%rsp
0xffffffffa04ef051 <internal_dev_get_stats+0x11>:       mov
%gs:0x28,%rax
0xffffffffa04ef05a <internal_dev_get_stats+0x1a>:       mov
%rax,-0x10(%rbp)
0xffffffffa04ef05e <internal_dev_get_stats+0x1e>:       xor    %eax,%eax
/usr/src/debug/kernel-3.10.0-327.3.1.el7/linux-3.10.0-327.3.1.el7.x86_64/net/openvswitch/vport-internal_dev.c:
262
*0xffffffffa04ef060 <internal_dev_get_stats+0x20>:       cmpq
 $0xffffffffa04f0aa0,0x198(%rdi)*
0xffffffffa04ef06b <internal_dev_get_stats+0x2b>:       je
 0xffffffffa04ef0d8 <internal_dev_get_stats+0x98>

crash> *net_device*
struct net_device {
    char name[16];
    struct hlist_node name_hlist;
    char *ifalias;
    unsigned long mem_end;
    unsigned long mem_start;
    unsigned long base_addr;
…

crash> whatis net_device.netdev_ops
struct net_device {
  *[0x198] const struct net_device_ops *netdev_ops;*
}

crash> bt
PID: 7368   TASK: ffff880850ee8000  CPU: 3   COMMAND: "EMT"
…
#8 [ffff88082c1df7c0] page_fault at ffffffff8163d388
    [exception RIP: ovs_vport_get_stats+106]
    RIP: ffffffffa04eeafa  RSP: ffff88082c1df870  RFLAGS: 00010246
    RAX: 0000000000000000  RBX: ffff88082c1df898  RCX: 0000000000000000
    RDX: 0000000000000000  RSI: ffff88082c1df898  *RDI: ffff88082c1df8d8
 <== net_device stored in rdi*
    RBP: ffff88082c1df888   R8: ffff88084d9ed010   R9: ffff880853324118
    R10: ffff88085f003400  R11: 0000000000000048  R12: 0000000000000000
    R13: 0000000000000008  R14: ffff880853324000  R15: ffff8808533240b8
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #9 [ffff88082c1df890] internal_dev_get_stats at ffffffffa04ef079
[openvswitch]

crash> hex
output radix: 16 (hex)

crash> net_device *0xffff88082c1df8d8    <== RDI*
struct net_device {
  name = “\240\212\302[\000\000\000\000X\372\035,\b\210\377\377”,  <==
garbage!
  name_hlist = {
    next = 0xffff88082c1df908,
    pprev = 0xffffffff8152582e <dev_get_stats+110>
  },
  ifalias = 0xffff8808527e4100 "",
 …

crash> net_device *0xffff880035e2b000  <== *br1, from net command above
struct net_device {
  name = "br1\000\000\000\000\000\000\000\000\000\000\000\000",
  name_hlist = {
    next = 0x0,
    pprev = 0xffff880853a07878
  },
...
_______________________________________________
discuss mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/discuss

Reply via email to