I'm running OVS master (afc3987) with the net-next kernel module
(4e7a84b), and if I run ovs-dpctl I sometimes get the following error:

dpif|WARN|failed to enumerate system datapaths: Protocol error

I made some local changes to OVS lib/netlink* to get a little more
useful information; it seems to be coming from the netlink
deserialization for getting the vport genetlink family from the
kernel. The error is that OVS userspace expects to see another
20 bytes beyond what it is given. I haven't had a chance to really
dig into this yet.

Problem is intermittent, but seems to happen more reliably when the
kmod was recently loaded.

I thought it may be related to the recent vxlan changes, but it goes
back at least as far as the PACKET_ATTR_PROBE changes (1ba3980), and
likely further back than that. Sending out this message in case it
sounds familiar to anyone.


Reproduction
==========

net-next # git lo -5
4e7a84b Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
2e62fa6 Merge branch 'vxlan_group_policy_extension'
1dd144c openvswitch: Support VXLAN Group Policy extension
81bfe3c openvswitch: Allow for any level of nesting in flow attributes
d91641d openvswitch: Rename GENEVE_TUN_OPTS() to TUN_METADATA_OPTS()

net-next # rmmod openvswitch ; make M=net/openvswitch modules &&
insmod net/openvswitch/openvswitch.ko && v kill run
  Building modules, stage 2.
  MODPOST 2 modules
------> sudo killall -q -2 ovs-vswitchd
------> sudo killall -q -2 ovsdb-server
------> sudo killall -q -2 ovs-vswitchd
------> sudo killall -q -2 ovsdb-server
------> ovsdb-server --remote=punix:/home/joe/root/run/db.sock
--remote=db:Open_vSwitch,Open_vSwitch,manager_options
--private-key=db:Open_vSwitch,SSL,private_key
--certificate=db:Open_vSwitch,SSL,certificate
--bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert --detach -vconsole:off
--pidfile --log-file
------> ovs-vsctl --no-wait --bootstrap set-ssl
/home/joe/root/pki/ovsclient-privkey.pem
/home/joe/root/pki/ovsclient-cert.pem
/home/joe/root/pki/vswitchd.cacert
------> ovs-vsctl --no-wait --version
------> ovs-vsctl --no-wait --bare list Open_vSwitch
------> ovs-vsctl --no-wait set Open_vSwitch
5a8e89bf-97b2-4f4f-8238-9377c3c78ee5 ovs_version=2.3.90
------> sudo /home/joe/ovs/_build-gcc/vswitchd/ovs-vswitchd --pidfile
--log-file -vconsole:off --detach --enable-dummy
2015-01-15T21:30:07Z|00001|vlog|INFO|opened log file
/home/joe/root/log/ovs-vswitchd.log

net-next # ovs-dpctl -v show 2>&1 | cut -d'|' -f3-
netlink_socket|DBG|nl_sock_send__ (Success): nl(len:17,
type=26(family-defined), flags=305[REQUEST][ACK][DUMP], seq=1,
pid=4294961345
netlink_socket|DBG|../lib/dpif-netlink.c:2308:
nl_lookup_genl_family(ovs_datapath, 0)
netlink_socket|DBG|../lib/netlink-socket.c:1534: into do_lookup_genl_family
netlink_socket|DBG|nl_sock_transact_multiple__ (Success): nl(len:40,
type=16(control), flags=1[REQUEST], seq=1,
pid=19580,genl(cmd=3,version=1)
netlink_socket|DBG|nl_sock_recv__ (Success): nl(len:192,
type=16(control), flags=0, seq=1, pid=19580,genl(cmd=1,version=2)
netlink_socket|DBG|../lib/dpif-netlink.c:2315:
nl_lookup_genl_family(ovs_vport, 0)
netlink_socket|DBG|../lib/netlink-socket.c:1534: into do_lookup_genl_family
netlink_socket|DBG|nl_sock_transact_multiple__ (Success): nl(len:36,
type=16(control), flags=1[REQUEST], seq=1,
pid=19580,genl(cmd=3,version=1)
netlink|DBG|../lib/netlink-socket.c:1361: missing headers in
nl_policy_parse (0, 20)
dpif|WARN|failed to enumerate system datapaths: Protocol error

ovs # ovs-vsctl show
5a8e89bf-97b2-4f4f-8238-9377c3c78ee5
    Bridge "br0"
        Port "br0"
            Interface "br0"
                type: internal
        Port "mz0"
            Interface "mz0"
                type: internal
        Port "em2"
            Interface "em2"
    ovs_version: "2.3.90"


Local changes:
============

diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index e4f153f..e4a7704 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -1331,13 +1331,15 @@ genl_family_to_name(uint16_t id)

 #ifndef _WIN32
 static int
-do_lookup_genl_family(const char *name, struct nlattr **attrs,
-                      struct ofpbuf **replyp)
+do_lookup_genl_family__(const char *name, struct nlattr **attrs,
+                      struct ofpbuf **replyp, const char *where)
 {
     struct nl_sock *sock;
     struct ofpbuf request, *reply;
     int error;

+    VLOG_DBG("%s: into do_lookup_genl_family", where);
+
     *replyp = NULL;
     error = nl_sock_create(NETLINK_GENERIC, &sock);
     if (error) {
@@ -1367,6 +1369,9 @@ do_lookup_genl_family(const char *name, struct
nlattr **attrs,
     *replyp = reply;
     return 0;
 }
+
+#define do_lookup_genl_family(n, a, reply) \
+    do_lookup_genl_family__(n, a, reply, OVS_SOURCE_LOCATOR)
 #else
 static int
 do_lookup_genl_family(const char *name, struct nlattr **attrs,
@@ -1518,8 +1523,9 @@ exit:
  * may use '*number' as the family number.  On failure, returns a positive
  * errno value and '*number' caches the errno value. */
 int
-nl_lookup_genl_family(const char *name, int *number)
+nl_lookup_genl_family__(const char *name, int *number, const char *where)
 {
+    VLOG_DBG("%s: nl_lookup_genl_family(%s, %d)", where, name, *number);
     if (*number == 0) {
         struct nlattr *attrs[ARRAY_SIZE(family_policy)];
         struct ofpbuf *reply;
diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
index 7a63b71..1a61bc7 100644
--- a/lib/netlink-socket.h
+++ b/lib/netlink-socket.h
@@ -272,7 +272,9 @@ bool nl_dump_next(struct nl_dump *, struct ofpbuf
*reply, struct ofpbuf *buf);
 int nl_dump_done(struct nl_dump *);

 /* Miscellaneous */
-int nl_lookup_genl_family(const char *name, int *number);
+int nl_lookup_genl_family__(const char *name, int *number, const char *where);
+#define nl_lookup_genl_family(name, n) \
+    nl_lookup_genl_family__(name, n, OVS_SOURCE_LOCATOR)
 int nl_lookup_genl_mcgroup(const char *family_name, const char *group_name,
                            unsigned int *multicast_group);

diff --git a/lib/netlink.c b/lib/netlink.c
index 52fc7dd..cef12c1 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -719,9 +719,9 @@ nl_attr_validate(const struct nlattr *nla, const
struct nl_policy *policy)
  * If the Netlink attributes in 'msg' follow a Netlink header and a Generic
  * Netlink header, then 'nla_offset' should be NLMSG_HDRLEN + GENL_HDRLEN. */
 bool
-nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset,
+nl_policy_parse__(const struct ofpbuf *msg, size_t nla_offset,
                 const struct nl_policy policy[],
-                struct nlattr *attrs[], size_t n_attrs)
+                struct nlattr *attrs[], size_t n_attrs, const char *where)
 {
     struct nlattr *nla;
     size_t left;
@@ -730,7 +730,8 @@ nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset,
     memset(attrs, 0, n_attrs * sizeof *attrs);

     if (ofpbuf_size(msg) < nla_offset) {
-        VLOG_DBG_RL(&rl, "missing headers in nl_policy_parse");
+        VLOG_DBG_RL(&rl, "%s: missing headers in nl_policy_parse (%d,
%"PRIuSIZE")",
+                    where, ofpbuf_size(msg), nla_offset);
         return false;
     }

diff --git a/lib/netlink.h b/lib/netlink.h
index 6068f5d..b71fda6 100644
--- a/lib/netlink.h
+++ b/lib/netlink.h
@@ -209,9 +209,11 @@ struct nl_policy

 bool nl_attr_validate(const struct nlattr *, const struct nl_policy *);

-bool nl_policy_parse(const struct ofpbuf *, size_t offset,
+bool nl_policy_parse__(const struct ofpbuf *, size_t offset,
                      const struct nl_policy[],
-                     struct nlattr *[], size_t n_attrs);
+                     struct nlattr *[], size_t n_attrs, const char *where);
+#define nl_policy_parse(buf, off, pol, a, n)            \
+    nl_policy_parse__(buf, off, pol, a, n, OVS_SOURCE_LOCATOR)
 bool nl_parse_nested(const struct nlattr *, const struct nl_policy[],
                      struct nlattr *[], size_t n_attrs);
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Reply via email to