Hi Felix,

Felix Huettner via dev <[email protected]> writes:

> When "commit" is false then conn_not_found will never write to
> ct->conns. This means that the guarantee the mutex provides for us is not
> necessary.
> If "commit" is true then the mutex is still necessary. We need to be sure
> that we do not insert a connection that is added in parallel somewhere
> else.
>
> When doing a loadtest with OVN and natting across two LRs we increased
> our packet rate from 1 Mio pps to 2.5 Mio pps. This mainly comes from
> the UNSNAT and UNDNAT stages of OVN that generally do not match anything
> as long as we are in not specific cases.
>
> Co-Authored-by: Florian Werner <[email protected]>
> Signed-off-by: Florian Werner <[email protected]>
> Co-Authored-by: Sebastian Riese <[email protected]>
> Signed-off-by: Sebastian Riese <[email protected]>
> Signed-off-by: Felix Huettner <[email protected]>
> ---

Some quick comments for the series.

>  lib/conntrack.c | 225 ++++++++++++++++++++++++++++--------------------
>  1 file changed, 132 insertions(+), 93 deletions(-)
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index f84cdd216..bcce71808 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -1022,115 +1022,102 @@ ct_verify_helper(const char *helper, enum 
> ct_alg_ctl_type ct_alg_ctl)
>  }
>  
>  static struct conn *
> -conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> -               struct conn_lookup_ctx *ctx, bool commit, long long now,
> -               const struct nat_action_info_t *nat_action_info,
> -               const char *helper, const struct alg_exp_node *alg_exp,
> -               enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
> +conn_insert(struct conntrack *ct, struct dp_packet *pkt,
> +            struct conn_lookup_ctx *ctx, long long now,
> +            const struct nat_action_info_t *nat_action_info,
> +            const char *helper, const struct alg_exp_node *alg_exp,
> +            enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
>      OVS_REQUIRES(ct->ct_lock)
>  {
>      struct conn *nc = NULL;
>  
> -    if (!valid_new(pkt, &ctx->key)) {
> -        pkt->md.ct_state = CS_INVALID;
> +    int64_t czl_limit;
> +    struct conn_key_node *fwd_key_node, *rev_key_node;
> +    struct zone_limit *zl = zone_limit_lookup_or_default(ct,
> +                                                         ctx->key.zone);
> +    if (zl) {
> +        czl_limit = zone_limit_get_limit(ct, &zl->czl);
> +        if (czl_limit >= 0 &&
> +            atomic_count_get(&zl->czl.count) >= czl_limit) {
> +            COVERAGE_INC(conntrack_zone_full);
> +            return nc;
> +        }
> +    }
> +
> +    unsigned int n_conn_limit;
> +    atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
> +    if (atomic_count_get(&ct->n_conn) >= n_conn_limit) {
> +        COVERAGE_INC(conntrack_full);
>          return nc;
>      }
>  
> -    pkt->md.ct_state = CS_NEW;
> +    nc = new_conn(ct, pkt, &ctx->key, now, tp_id);
> +    fwd_key_node = &nc->key_node[CT_DIR_FWD];
> +    rev_key_node = &nc->key_node[CT_DIR_REV];
> +    memcpy(&fwd_key_node->key, &ctx->key, sizeof fwd_key_node->key);
> +    memcpy(&rev_key_node->key, &fwd_key_node->key,
> +           sizeof rev_key_node->key);
> +    conn_key_reverse(&rev_key_node->key);
> +
> +    if (ct_verify_helper(helper, ct_alg_ctl)) {
> +        nc->alg = nullable_xstrdup(helper);
> +    }
>  
>      if (alg_exp) {
> -        pkt->md.ct_state |= CS_RELATED;
> +        nc->alg_related = true;
> +        nc->mark = alg_exp->parent_mark;
> +        nc->label = alg_exp->parent_label;
> +        nc->parent_key = alg_exp->parent_key;
>      }
>  
> -    if (commit) {
> -        int64_t czl_limit;
> -        struct conn_key_node *fwd_key_node, *rev_key_node;
> -        struct zone_limit *zl = zone_limit_lookup_or_default(ct,
> -                                                             ctx->key.zone);
> -        if (zl) {
> -            czl_limit = zone_limit_get_limit(ct, &zl->czl);
> -            if (czl_limit >= 0 &&
> -                atomic_count_get(&zl->czl.count) >= czl_limit) {
> -                COVERAGE_INC(conntrack_zone_full);
> -                return nc;
> -            }
> -        }
> +    ovs_mutex_init_adaptive(&nc->lock);
> +    atomic_flag_clear(&nc->reclaimed);
> +    fwd_key_node->dir = CT_DIR_FWD;
> +    rev_key_node->dir = CT_DIR_REV;
>  
> -        unsigned int n_conn_limit;
> -        atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
> -        if (atomic_count_get(&ct->n_conn) >= n_conn_limit) {
> -            COVERAGE_INC(conntrack_full);
> -            return nc;
> -        }
> +    if (zl) {
> +        nc->admit_zone = zl->czl.zone;
> +        nc->zone_limit_seq = zl->czl.zone_limit_seq;
> +    } else {
> +        nc->admit_zone = INVALID_ZONE;
> +    }
>  
> -        nc = new_conn(ct, pkt, &ctx->key, now, tp_id);
> -        fwd_key_node = &nc->key_node[CT_DIR_FWD];
> -        rev_key_node = &nc->key_node[CT_DIR_REV];
> -        memcpy(&fwd_key_node->key, &ctx->key, sizeof fwd_key_node->key);
> -        memcpy(&rev_key_node->key, &fwd_key_node->key,
> -               sizeof rev_key_node->key);
> -        conn_key_reverse(&rev_key_node->key);
> -
> -        if (ct_verify_helper(helper, ct_alg_ctl)) {
> -            nc->alg = nullable_xstrdup(helper);
> -        }
> +    if (nat_action_info) {
> +        nc->nat_action = nat_action_info->nat_action;
>  
>          if (alg_exp) {
> -            nc->alg_related = true;
> -            nc->mark = alg_exp->parent_mark;
> -            nc->label = alg_exp->parent_label;
> -            nc->parent_key = alg_exp->parent_key;
> -        }
> -
> -        ovs_mutex_init_adaptive(&nc->lock);
> -        atomic_flag_clear(&nc->reclaimed);
> -        fwd_key_node->dir = CT_DIR_FWD;
> -        rev_key_node->dir = CT_DIR_REV;
> -
> -        if (zl) {
> -            nc->admit_zone = zl->czl.zone;
> -            nc->zone_limit_seq = zl->czl.zone_limit_seq;
> -        } else {
> -            nc->admit_zone = INVALID_ZONE;
> -        }
> -
> -        if (nat_action_info) {
> -            nc->nat_action = nat_action_info->nat_action;
> -
> -            if (alg_exp) {
> -                if (alg_exp->nat_rpl_dst) {
> -                    rev_key_node->key.dst.addr = alg_exp->alg_nat_repl_addr;
> -                    nc->nat_action = NAT_ACTION_SRC;
> -                } else {
> -                    rev_key_node->key.src.addr = alg_exp->alg_nat_repl_addr;
> -                    nc->nat_action = NAT_ACTION_DST;
> -                }
> +            if (alg_exp->nat_rpl_dst) {
> +                rev_key_node->key.dst.addr = alg_exp->alg_nat_repl_addr;
> +                nc->nat_action = NAT_ACTION_SRC;
>              } else {
> -                bool nat_res = nat_get_unique_tuple(ct, nc, nat_action_info);
> -                if (!nat_res) {
> -                    goto nat_res_exhaustion;
> -                }
> +                rev_key_node->key.src.addr = alg_exp->alg_nat_repl_addr;
> +                nc->nat_action = NAT_ACTION_DST;
> +            }
> +        } else {
> +            bool nat_res = nat_get_unique_tuple(ct, nc, nat_action_info);
> +            if (!nat_res) {
> +                goto nat_res_exhaustion;
>              }
> -
> -            nat_packet(pkt, nc, false, ctx->icmp_related);
> -            uint32_t rev_hash = conn_key_hash(&rev_key_node->key,
> -                                              ct->hash_basis);
> -            cmap_insert(&ct->conns[ctx->key.zone],
> -                        &rev_key_node->cm_node, rev_hash);
>          }
>  
> +        nat_packet(pkt, nc, false, ctx->icmp_related);
> +        uint32_t rev_hash = conn_key_hash(&rev_key_node->key,
> +                                          ct->hash_basis);
>          cmap_insert(&ct->conns[ctx->key.zone],
> -                    &fwd_key_node->cm_node, ctx->hash);
> -        conn_expire_push_front(ct, nc);
> -        atomic_count_inc(&ct->n_conn);
> -
> -        if (zl) {
> -            atomic_count_inc(&zl->czl.count);
> -        }
> -
> -        ctx->conn = nc; /* For completeness. */
> +                    &rev_key_node->cm_node, rev_hash);
>      }
>  
> +    cmap_insert(&ct->conns[ctx->key.zone],
> +                &fwd_key_node->cm_node, ctx->hash);
> +    conn_expire_push_front(ct, nc);
> +    atomic_count_inc(&ct->n_conn);
> +
> +    if (zl) {
> +        atomic_count_inc(&zl->czl.count);
> +    }
> +
> +    ctx->conn = nc; /* For completeness. */
> +
>      return nc;
>  
>      /* This would be a user error or a DOS attack.  A user error is prevented
> @@ -1146,6 +1133,62 @@ nat_res_exhaustion:
>      return NULL;
>  }
>  
> +static bool
> +pkt_set_new_ct_state(struct dp_packet *pkt, struct conn_lookup_ctx *ctx,
> +                     const struct alg_exp_node *alg_exp)
> +{
> +    if (!valid_new(pkt, &ctx->key)) {
> +        pkt->md.ct_state = CS_INVALID;
> +        return false;
> +    }
> +
> +    pkt->md.ct_state = CS_NEW;
> +
> +    if (alg_exp) {
> +        pkt->md.ct_state |= CS_RELATED;
> +    }
> +
> +    return true;
> +}
> +
> +static struct conn *
> +conn_maybe_not_found(struct conntrack *ct, struct dp_packet *pkt,
> +                     struct conn_lookup_ctx *ctx, bool commit, long long now,
> +                     const struct nat_action_info_t *nat_action_info,
> +                     const char *helper, const struct alg_exp_node *alg_exp,
> +                     enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
> +    {

^ Whitespace

> +    struct conn *nc = NULL;
> +
> +    /* Note that we only insert a connection during if commit=true. In this

omit the 'during' I think.

> +     * case we must ensure that the connection is not already part of
> +     * ct->conns. This means the lock needs to be held across the loockup and

lookup

> +     * insert.
> +     * Since we do not need this sequencing if we do not insert we can skip 
> the
> +     * lock completely.
> +     * We do not use multiple small if's as this confused clangs locking
> +     * analysis. */
> +    if (commit) {
> +        ovs_mutex_lock(&ct->ct_lock);
> +        bool found = conn_lookup(ct, &ctx->key, now, NULL, NULL);
> +        if (!found) {
> +            if (!pkt_set_new_ct_state(pkt, ctx, alg_exp)) {
> +                ovs_mutex_unlock(&ct->ct_lock);
> +                return nc;
> +            }
> +            nc = conn_insert(ct, pkt, ctx, now, nat_action_info,
> +                             helper, alg_exp, ct_alg_ctl, tp_id);
> +        }
> +        ovs_mutex_unlock(&ct->ct_lock);
> +    } else {
> +        bool found = conn_lookup(ct, &ctx->key, now, NULL, NULL);
> +        if (!found) {
> +            pkt_set_new_ct_state(pkt, ctx, alg_exp);
> +        }
> +    }
> +    return nc;
> +}
> +
>  static bool
>  conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
>                    struct conn_lookup_ctx *ctx, struct conn *conn,
> @@ -1440,12 +1483,8 @@ process_one(struct conntrack *ct, struct dp_packet 
> *pkt,
>          }
>          ovs_rwlock_unlock(&ct->resources_lock);
>  
> -        ovs_mutex_lock(&ct->ct_lock);
> -        if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) {
> -            conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info,
> -                                  helper, alg_exp, ct_alg_ctl, tp_id);
> -        }
> -        ovs_mutex_unlock(&ct->ct_lock);
> +        conn = conn_maybe_not_found(ct, pkt, ctx, commit, now, 
> nat_action_info,
> +                                    helper, alg_exp, ct_alg_ctl, tp_id);
>      }
>  
>      write_ct_md(pkt, zone, conn, &ctx->key, alg_exp);
>
> base-commit: 57a9016751af1f9bf9af36d57c978838c00c0948

Not sure what this line is (I guess git generated it?)

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to