ovn-controller abort was found in pinctrl_run() when debugging
an occasional test case failure of:
    ovn-controller.at: Chassis external-id

Backtrace:
(gdb) bt
0  0x00007fd0f84878d7 in raise () from /lib64/libc.so.6
1  0x00007fd0f848953a in abort () from /lib64/libc.so.6
2  0x00000000004a6c9e in ovs_abort_valist (err_no=err_no@entry=0, 
format=format@entry=0x55d050 "%s: assertion %s failed in %s()", 
args=args@entry=0x7fff24390158) at lib/util.c:360
3  0x00000000004ad8b0 in vlog_abort_valist (module_=<optimized out>, 
message=0x55d050 "%s: assertion %s failed in %s()", 
args=args@entry=0x7fff24390158) at lib/vlog.c:1219
4  0x00000000004ad937 in vlog_abort (module=module@entry=0x7f6b20 
<this_module>, message=message@entry=0x55d050 "%s: assertion %s failed in 
%s()") at lib/vlog.c:1233
5  0x00000000004a6a4e in ovs_assert_failure (where=where@entry=0x552ec3 
"lib/ofp-msgs.c:1062", function=function@entry=0x553a20 <__func__.9268> 
"raw_instance_get", condition=condition@entry=0x552c00 "version >= 
info->min_version && version <= info->max_version") at lib/util.c:80
6  0x0000000000471fb4 in raw_instance_get (info=<optimized out>, 
version=<optimized out>) at lib/ofp-msgs.c:1062
7  0x0000000000472533 in ofpraw_put__ 
(raw=raw@entry=OFPRAW_NXT_SET_PACKET_IN_FORMAT, version=version@entry=255 
'\377', xid=xid@entry=83886080, extra_tailroom=extra_tailroom@entry=0, 
buf=buf@entry=0x123b340) at lib/ofp-msgs.c:712
8  0x000000000047299c in ofpraw_alloc_xid (extra_tailroom=0, xid=83886080, 
version=255 '\377', raw=OFPRAW_NXT_SET_PACKET_IN_FORMAT) at lib/ofp-msgs.c:588
9  ofpraw_alloc (raw=raw@entry=OFPRAW_NXT_SET_PACKET_IN_FORMAT, 
version=<optimized out>, extra_tailroom=extra_tailroom@entry=0) at 
lib/ofp-msgs.c:579
10 0x000000000047383a in ofputil_encode_set_packet_in_format 
(ofp_version=<optimized out>, format=format@entry=OFPUTIL_PACKET_IN_NXT2) at 
lib/ofp-packet.c:70
11 0x00000000004145d3 in pinctrl_setup () at ovn/controller/pinctrl.c:134
12 pinctrl_run (ctx=ctx@entry=0x7fff243904f0, br_int=br_int@entry=0x1216a50, 
chassis=chassis@entry=0x1239f90, 
chassis_index=chassis_index@entry=0x7fff243a1c00, 
local_datapaths=local_datapaths@entry=0x7fff243a1c20, 
active_tunnels=active_tunnels@entry=0x7fff243a1c80)
    at ovn/controller/pinctrl.c:1258
13 0x00000000004076ce in main (argc=6, argv=0x7fff243a3e38) at 
ovn/controller/ovn-controller.c:1177

Root cause:
When entering pinctrl_setup() the rconn got Broken pipe error:

2018-05-19T06:10:06.697Z|00105|stream_fd|DBG|send: Broken pipe
2018-05-19T06:10:06.697Z|00106|vconn|DBG|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt:
 sent (Broken pipe): OFPT_GET_CONFIG_REQUEST (OF1.3) (xid=0x4):
2018-05-19T06:10:06.697Z|00107|rconn|WARN|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt:
 connection dropped (Broken pipe)
2018-05-19T06:10:06.697Z|00109|rconn|DBG|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt:
 entering BACKOFF
2018-05-19T06:10:06.698Z|00110|util|EMER|lib/ofp-msgs.c:1062: assertion version 
>= info->min_version && version <= info->max_version failed in 
raw_instance_get()

(The connection is closed because the test case set datapath_type to
foo, which is invalid for OVS but the test case doesn't care.)

There are two message sendings in pinctrl_setup(). The first message
sending detected the connection lost and triggered disconnect()
which set rconn status to BACKOFF. However, before sending the second
message, it calls rconn_get_version() again. When rconn is not
connected, the rconn_get_version() returns -1, which is used when
calling ofputil_encode_set_packet_in_format(), which finally triggered
the abort().

This problem exists not only in pinctrl_setup(), but many other places
in pinctrl and ofctrl modules. In those places when calling
rconn_get_version(), the connection status may not be connected, and
rconn_get_version() may return -1, but the return value is not
checked and directly used for following function calls, so potentially
ovn-controller can crash in these places. This patch fixes these
problems.

Signed-off-by: Han Zhou <[email protected]>
---
 ovn/controller/ofctrl.c  |  6 ++++++
 ovn/controller/pinctrl.c | 38 +++++++++++++++++++++++++++++++++-----
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index 349de3a..861547d 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -180,6 +180,9 @@ ofctrl_init(struct ovn_extend_table *group_table,
 static void
 run_S_NEW(void)
 {
+    if (rconn_get_version(swconn) < 0) {
+        return;
+    }
     struct ofpbuf *buf = ofpraw_alloc(OFPRAW_NXT_TLV_TABLE_REQUEST,
                                       rconn_get_version(swconn), 0);
     xid = queue_msg(buf);
@@ -804,6 +807,9 @@ add_meter_mod(const struct ofputil_meter_mod *mm, struct 
ovs_list *msgs)
 static void
 add_ct_flush_zone(uint16_t zone_id, struct ovs_list *msgs)
 {
+    if (rconn_get_version(swconn) < 0) {
+        return;
+    }
     struct ofpbuf *msg = ofpraw_alloc(OFPRAW_NXT_CT_FLUSH_ZONE,
                                       rconn_get_version(swconn), 0);
     struct nx_zone_id *nzi = ofpbuf_put_zeros(msg, sizeof *nzi);
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 305f206..b06b4ef 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -127,11 +127,15 @@ pinctrl_setup(void)
     /* Fetch the switch configuration.  The response later will allow us to
      * change the miss_send_len to UINT16_MAX, so that we can enable
      * asynchronous messages. */
+    enum ofp_version version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST,
-                           rconn_get_version(swconn), 0));
+                           version, 0));
 
     /* Set a packet-in format that supports userdata.  */
-    queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn),
+    queue_msg(ofputil_encode_set_packet_in_format(version,
                                                   OFPUTIL_PACKET_IN_NXT2));
 }
 
@@ -140,6 +144,9 @@ set_switch_config(struct rconn *swconn_,
                   const struct ofputil_switch_config *config)
 {
     enum ofp_version version = rconn_get_version(swconn_);
+    if (version < 0) {
+        return;
+    }
     struct ofpbuf *request = ofputil_encode_set_config(config, version);
     queue_msg(request);
 }
@@ -152,9 +159,12 @@ set_actions_and_enqueue_msg(const struct dp_packet *packet,
     /* Copy metadata from 'md' into the packet-out via "set_field"
      * actions, then add actions from 'userdata'.
      */
+    enum ofp_version version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     uint64_t ofpacts_stub[4096 / 8];
     struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
-    enum ofp_version version = rconn_get_version(swconn);
 
     reload_metadata(&ofpacts, md);
     enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
@@ -377,6 +387,9 @@ pinctrl_handle_put_dhcp_opts(
     struct ofpbuf *userdata, struct ofpbuf *continuation)
 {
     enum ofp_version version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     struct dp_packet *pkt_out_ptr = NULL;
     uint32_t success = 0;
@@ -668,6 +681,9 @@ pinctrl_handle_put_dhcpv6_opts(
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
     enum ofp_version version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     struct dp_packet *pkt_out_ptr = NULL;
     uint32_t success = 0;
@@ -862,6 +878,9 @@ pinctrl_handle_dns_lookup(
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
     enum ofp_version version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     struct dp_packet *pkt_out_ptr = NULL;
     uint32_t success = 0;
@@ -1431,6 +1450,10 @@ ipv6_ra_send(struct ipv6_ra_state *ra)
     if (time_msec() < ra->next_announce) {
         return ra->next_announce;
     }
+    enum ofp_version version = rconn_get_version(swconn);
+    if (version < 0) {
+        return ra->next_announce;
+    }
 
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
@@ -1471,7 +1494,6 @@ ipv6_ra_send(struct ipv6_ra_state *ra)
     };
 
     match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
-    enum ofp_version version = rconn_get_version(swconn);
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     queue_msg(ofputil_encode_packet_out(&po, proto));
     dp_packet_uninit(&packet);
@@ -1897,6 +1919,10 @@ send_garp(struct garp_data *garp, long long int 
current_time)
         return garp->announce_time;
     }
 
+    enum ofp_version version = rconn_get_version(swconn);
+    if (version < 0) {
+        return garp->announce_time;
+    }
     /* Compose a GARP request packet. */
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
@@ -1912,7 +1938,6 @@ send_garp(struct garp_data *garp, long long int 
current_time)
     /* Compose actions.  The garp request is output on localnet ofport. */
     uint64_t ofpacts_stub[4096 / 8];
     struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
-    enum ofp_version version = rconn_get_version(swconn);
     ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport;
 
     struct ofputil_packet_out po = {
@@ -2373,6 +2398,9 @@ pinctrl_handle_put_nd_ra_opts(
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
     enum ofp_version version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     struct dp_packet *pkt_out_ptr = NULL;
     uint32_t success = 0;
-- 
2.1.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to