On Wed, May 23, 2018 at 5:14 PM, Ben Pfaff <[email protected]> wrote:
>
> Until now, rconn_get_version() has only reported the OpenFlow version in
> use when the rconn is actually connected.  This makes sense, but it has a
> harsh consequence.  Consider code like this:
>
>     if (rconn_is_connected(rconn) && rconn_get_version(rconn) >= 0) {
>         for (int i = 0; i < 2; i++) {
>             struct ofpbuf *b = ofputil_encode_echo_request(
>                 rconn_get_version(rconn));
>             rconn_send(rconn, b, NULL);
>         }
>     }
>
> Maybe not the smartest code in the world, and probably no one would write
> this exact code in any case, but it doesn't look too risky or crazy.
>
> But it is.  The second trip through the loop can assert-fail inside
> ofputil_encode_echo_request() because rconn_get_version(rconn) returns -1
> instead of a valid OpenFlow version.  That happens if the first call to
> rconn_send() encounters an error while sending the message and therefore
> destroys the underlying vconn and disconnects so that rconn_get_version()
> doesn't have a vconn to query for its version.
>
> In a case like this where all the code to send the messages is close by,
we
> could just check rconn_get_version() in each loop iteration.  We could
even
> go through the tree and convince ourselves that individual bits of code
are
> safe, or be conservative and check rconn_get_version() >= 0 in the iffy
> cases.  But this seems to me like an ongoing source of risk and a way to
> get things wrong in corner cases.
>
> This commit takes a different approach.  It introduces a new invariant: if
> an rconn has ever been connected, then it returns a valid OpenFlow version
> from rconn_get_version().  In addition, if an rconn is currently
connected,
> then the OpenFlow version it returns is the correct one (that may be
> obvious, but there were corner cases before where it returned -1 even
> though rconn_is_connected() returned true).
>
> With this commit, the code above would work OK.  If the first call to
> rconn_send() encounters an error sending the message, then
> rconn_get_version() in the second iteration will return the same value as
> in the first iteration.  The message passed to rconn_send() will end up
> being discarded, but that's much better than either an assertion failure
or
> having to carefully analyze a lot of our code to deal with one unusual
> corner case.
>
> Reported-by: Han Zhou <[email protected]>
> Signed-off-by: Ben Pfaff <[email protected]>
> ---
>  lib/learning-switch.c |  2 +-
>  lib/rconn.c           | 41 ++++++++++++++++-------------------------
>  lib/vconn.c           |  1 +
>  3 files changed, 18 insertions(+), 26 deletions(-)
>
> diff --git a/lib/learning-switch.c b/lib/learning-switch.c
> index 04eaa6e8e73f..8102475cae52 100644
> --- a/lib/learning-switch.c
> +++ b/lib/learning-switch.c
> @@ -305,7 +305,7 @@ lswitch_run(struct lswitch *sw)
>      rconn_run(sw->rconn);
>
>      if (sw->state == S_CONNECTING) {
> -        if (rconn_get_version(sw->rconn) != -1) {
> +        if (rconn_is_connected(sw->rconn)) {
>              lswitch_handshake(sw);
>              sw->state = S_FEATURES_REPLY;
>          }
> diff --git a/lib/rconn.c b/lib/rconn.c
> index 3cad259d0178..a3f811fedbe3 100644
> --- a/lib/rconn.c
> +++ b/lib/rconn.c
> @@ -141,7 +141,8 @@ struct rconn {
>      struct vconn *monitors[MAXIMUM_MONITORS];
>      size_t n_monitors;
>
> -    uint32_t allowed_versions;
> +    uint32_t allowed_versions;  /* Acceptable OpenFlow versions. */
> +    int version;                /* Current or most recent version. */
>  };
>
>  /* Counts packets and bytes queued into an rconn by a given source. */
> @@ -182,8 +183,6 @@ static bool is_connected_state(enum state);
>  static bool is_admitted_msg(const struct ofpbuf *);
>  static bool rconn_logging_connection_attempts__(const struct rconn *rc)
>      OVS_REQUIRES(rc->mutex);
> -static int rconn_get_version__(const struct rconn *rconn)
> -    OVS_REQUIRES(rconn->mutex);
>
>  /* The following prototypes duplicate those in rconn.h, but there we
weren't
>   * able to add the OVS_EXCLUDED annotations because the definition of
struct
> @@ -284,7 +283,9 @@ rconn_create(int probe_interval, int max_backoff,
uint8_t dscp,
>      rconn_set_dscp(rc, dscp);
>
>      rc->n_monitors = 0;
> +
>      rc->allowed_versions = allowed_versions;
> +    rc->version = -1;
>
>      return rc;
>  }
> @@ -376,8 +377,7 @@ rconn_connect_unreliably(struct rconn *rc,
>      rconn_set_target__(rc, vconn_get_name(vconn), name);
>      rc->reliable = false;
>      rc->vconn = vconn;
> -    rc->last_connected = time_now();
> -    state_transition(rc, S_ACTIVE);
> +    state_transition(rc, S_CONNECTING);
>      ovs_mutex_unlock(&rc->mutex);
>  }
>
> @@ -514,6 +514,7 @@ run_CONNECTING(struct rconn *rc)
>          VLOG_INFO("%s: connected", rc->name);
>          rc->n_successful_connections++;
>          state_transition(rc, S_ACTIVE);
> +        rc->version = vconn_get_version(rc->vconn);
>          rc->last_connected = rc->state_entered;
>      } else if (retval != EAGAIN) {
>          if (rconn_logging_connection_attempts__(rc)) {
> @@ -575,14 +576,8 @@ run_ACTIVE(struct rconn *rc)
>           * can end up queuing a packet with vconn == NULL and then
*boom*. */
>          state_transition(rc, S_IDLE);
>
> -        /* Send an echo request if we can.  (If version negotiation is
not
> -         * complete, that is, if we did not yet receive a "hello"
message from
> -         * the peer, we do not know the version to use, so we don't send
> -         * anything.) */
> -        int version = rconn_get_version__(rc);
> -        if (version >= 0 && version <= 0xff) {
> -            rconn_send__(rc, ofputil_encode_echo_request(version), NULL);
> -        }
> +        /* Send an echo request. */
> +        rconn_send__(rc, ofputil_encode_echo_request(rc->version), NULL);
>
>          return;
>      }
> @@ -944,23 +939,19 @@ rconn_failure_duration(const struct rconn *rconn)
>      return duration;
>  }
>
> -static int
> -rconn_get_version__(const struct rconn *rconn)
> -    OVS_REQUIRES(rconn->mutex)
> -{
> -    return rconn->vconn ? vconn_get_version(rconn->vconn) : -1;
> -}
> -
> -/* Returns the OpenFlow version negotiated with the peer, or -1 if there
is
> - * currently no connection or if version negotiation is not yet
complete. */
> +/* Returns the OpenFlow version most recently negotiated with a peer, or
-1 if
> + * no version has ever been negotiated.
> + *
> + * If 'rconn' is connected (that is, if 'rconn_is_connected(rconn)' would
> + * return true), then the return value is guaranteed to be the OpenFlow
version
> + * in use for the connection.  The converse is not true: when the return
value
> + * is not -1, 'rconn' might be disconnected. */
>  int
>  rconn_get_version(const struct rconn *rconn)
>      OVS_EXCLUDED(rconn->mutex)
>  {
> -    int version;
> -
>      ovs_mutex_lock(&rconn->mutex);
> -    version = rconn_get_version__(rconn);
> +    int version = rconn->version;
>      ovs_mutex_unlock(&rconn->mutex);
>
>      return version;
> diff --git a/lib/vconn.c b/lib/vconn.c
> index aeb2cd7dd826..224e8a4c4fbf 100644
> --- a/lib/vconn.c
> +++ b/lib/vconn.c
> @@ -571,6 +571,7 @@ vconn_connect(struct vconn *vconn)
>              break;
>
>          case VCS_DISCONNECTED:
> +            ovs_assert(vconn->error != 0);
>              return vconn->error;
>
>          default:
> --
> 2.16.1
>

Acked-by: Han Zhou <[email protected]>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to