"hepeng.0320" <[email protected]> writes:

> From: hepeng <[email protected]>
>
> This patch tries to use ovs_spin_lock as an alternative to reduce the
> size of each conn. The ovs_mutex_lock consumes 48 bytes while the
> ovs_spin_lock only uses 16 bytes. Also, we remove the alg info into an
> extra space as alg is not common in the real world userspace ct use case.
> (mostly used as security group and nat in the cloud).
>
> The size of conn_tcp: 312 -> 240.

I didn't do a thorough review of either this patch or 3/4.

> Signed-off-by: Peng He <[email protected]>
> ---
>  lib/conntrack-private.h |  86 ++++++++++++++++++++++++--------
>  lib/conntrack-tp.c      |  18 +++----
>  lib/conntrack.c         | 108 +++++++++++++++++++++-------------------
>  3 files changed, 131 insertions(+), 81 deletions(-)
>
> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> index 2aa828674..51a3f5347 100644
> --- a/lib/conntrack-private.h
> +++ b/lib/conntrack-private.h
> @@ -93,48 +93,92 @@ enum ct_alg_ctl_type {
>      CT_ALG_CTL_MAX,
>  };
>  
> -#define CONN_FLAG_NAT_MASK 0xf
> -#define CONN_FLAG_CTL_FTP  (CT_ALG_CTL_FTP  << 4)
> -#define CONN_FLAG_CTL_TFTP (CT_ALG_CTL_TFTP << 4)
> -#define CONN_FLAG_CTL_SIP  (CT_ALG_CTL_SIP  << 4)
> +#define CONN_FLAG_NAT_MASK    0xf
> +#define CONN_FLAG_CTL_FTP     (CT_ALG_CTL_FTP  << 4)
> +#define CONN_FLAG_CTL_TFTP    (CT_ALG_CTL_TFTP << 4)
> +#define CONN_FLAG_CTL_SIP     (CT_ALG_CTL_SIP  << 4)
>  /* currently only 3 algs supported */
> -#define CONN_FLAG_ALG_MASK 0x70
> +#define CONN_FLAG_ALG_MASK    0x70
> +#define CONN_FLAG_ALG_RELATED 0x80
> +#define CONN_FLAG_CLEANED     0x100 /* indicate if it is removed from timer 
> */
> + 
>  
>  struct conn_dir {
>      struct cmap_node cm_node;
>      bool orig;
>  };
>  
> +#if HAVE_PTHREAD_SPIN_LOCK == 0
> +static inline void conn_lock(const struct ovs_mutex *lock) {
> +    ovs_mutex_lock(lock);
> +}
> +#else
> +static inline void conn_lock(const struct ovs_spin *lock) {
> +    ovs_spin_lock(lock);
> +}
> +#endif
> +
> +#if HAVE_PTHREAD_SPIN_LOCK == 0
> +static inline void conn_unlock(const struct ovs_mutex *lock) {
> +    ovs_mutex_unlock(lock);
> +}
> +#else
> +static inline void conn_unlock(const struct ovs_spin *lock) {
> +    ovs_spin_unlock(lock);
> +}
> +#endif
> +
> +#if HAVE_PTHREAD_SPIN_LOCK == 0
> +static inline void conn_lock_init(struct ovs_mutex *lock) {
> +    ovs_mutex_init_adaptive(lock);
> +}
> +#else
> +static inline void conn_lock_init(struct ovs_spin *lock) {
> +    ovs_spin_init(lock);
> +}
> +#endif
> +
> +#if HAVE_PTHREAD_SPIN_LOCK == 0
> +static inline void conn_lock_destroy(struct ovs_mutex *lock) {
> +    ovs_mutex_destroy(lock);
> +}
> +#else
> +static inline void conn_lock_destroy(struct ovs_spin *lock) {
> +    (void)lock;

Please use OVS_UNUSED instead of this hack.

> +}
> +#endif
> +
> +struct conn_alg {
> +    struct conn_key parent_key; /* Only used for orig_tuple support. */
> +    int seq_skew;
> +    bool seq_skew_dir; /* TCP sequence skew direction due to NATTing of FTP
> +                        * control messages; true if reply direction. */
> +};
> + 
>  struct conn {
>      /* Immutable data. */
>      struct conn_key key;
>      struct conn_dir orig;
>      struct conn_key rev_key;
>      struct conn_dir rev;
> -    struct conn_key parent_key; /* Only used for orig_tuple support. */
>      struct ovs_list exp_node;
>      uint64_t conn_flags;
>  
> +    /* Immutable data. */
> +    int32_t admit_zone; /* The zone for managing zone limit counts. */
> +    uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
> +    uint32_t tp_id; /* Timeout policy ID. */
> +    
>      /* Mutable data. */
> +#if HAVE_PTHREAD_SPIN_LOCK == 0
>      struct ovs_mutex lock; /* Guards all mutable fields. */
> +#else
> +    struct ovs_spin  lock; /* Guards all mutable fields. */
> +#endif
>      ovs_u128 label;
>      long long expiration;
> +    struct conn_alg *alg;
>      uint32_t mark;
> -    int seq_skew;
> -
> -    /* Immutable data. */
> -    int32_t admit_zone; /* The zone for managing zone limit counts. */
> -    uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
> -
> -    /* Mutable data. */
> -    bool seq_skew_dir; /* TCP sequence skew direction due to NATTing of FTP
> -                        * control messages; true if reply direction. */
> -    bool cleaned; /* True if cleaned from expiry lists. */
> -
> -    /* Immutable data. */
> -    bool alg_related; /* True if alg data connection. */
> -
> -    uint32_t tp_id; /* Timeout policy ID. */
>  };
>  
>  static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
> diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
> index a586d3a8d..549ec7632 100644
> --- a/lib/conntrack-tp.c
> +++ b/lib/conntrack-tp.c
> @@ -236,19 +236,19 @@ conn_update_expiration__(struct conntrack *ct, struct 
> conn *conn,
>                           uint32_t tp_value)
>      OVS_REQUIRES(conn->lock)
>  {
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>  
>      ovs_mutex_lock(&ct->ct_lock);
> -    ovs_mutex_lock(&conn->lock);
> -    if (!conn->cleaned) {
> +    conn_lock(&conn->lock);
> +    if (!(conn->conn_flags & CONN_FLAG_CLEANED)) {
>          conn->expiration = now + tp_value * 1000;
>          ovs_list_remove(&conn->exp_node);
>          ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
>      }
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>      ovs_mutex_unlock(&ct->ct_lock);
>  
> -    ovs_mutex_lock(&conn->lock);
> +    conn_lock(&conn->lock);
>  }
>  
>  /* The conn entry lock must be held on entry and exit. */
> @@ -260,20 +260,20 @@ conn_update_expiration(struct conntrack *ct, struct 
> conn *conn,
>      struct timeout_policy *tp;
>      uint32_t val;
>  
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>  
>      ovs_mutex_lock(&ct->ct_lock);
> -    ovs_mutex_lock(&conn->lock);
> +    conn_lock(&conn->lock);
>      tp = timeout_policy_lookup(ct, conn->tp_id);
>      if (tp) {
>          val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
>      } else {
>          val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
>      }
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>      ovs_mutex_unlock(&ct->ct_lock);
>  
> -    ovs_mutex_lock(&conn->lock);
> +    conn_lock(&conn->lock);
>      VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d "
>                  "val=%u sec.",
>                  ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index fffc617fb..f8c6900ef 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -177,7 +177,6 @@ typedef void (*after_helper)(struct conntrack *ct,
>                               struct conn *conn_for_expectation,
>                               long long now, bool nat, bool create_new_conn);
>  
> -
>  struct alg_helper_hook {
>      before_helper before_conn_update_hook;
>      after_helper  after_conn_update_hook;
> @@ -192,11 +191,11 @@ static void ftp_ctl_before_hook(struct conntrack *ct, 
> const struct conn_lookup_c
>      if (pkt_type == CT_FTP_CTL_INTEREST)
>          return;
>  
> -    ovs_mutex_lock(&ec->lock);
> -    if (ctx->reply != ec->seq_skew_dir) {
> +    conn_lock(&ec->lock);
> +    if (ctx->reply != ec->alg->seq_skew_dir) {
>          handle_ftp_ctl_other(ct, ctx, pkt, ec, now, nat);
>      }
> -    ovs_mutex_unlock(&ec->lock);
> +    conn_unlock(&ec->lock);
>  }
>  
>  static void ftp_ctl_after_hook(struct conntrack *ct, const struct 
> conn_lookup_ctx *ctx,
> @@ -208,11 +207,11 @@ static void ftp_ctl_after_hook(struct conntrack *ct, 
> const struct conn_lookup_ct
>          return;
>  
>      if (create_new_conn == false) {
> -        ovs_mutex_lock(&ec->lock);
> -        if (ctx->reply == ec->seq_skew_dir) {
> +        conn_lock(&ec->lock);
> +        if (ctx->reply == ec->alg->seq_skew_dir) {
>              handle_ftp_ctl_other(ct, ctx, pkt, ec, now, nat);
>          }
> -        ovs_mutex_unlock(&ec->lock);
> +        conn_unlock(&ec->lock);
>      }
>  }
>  
> @@ -223,9 +222,9 @@ static void handle_ftp_ctl(struct conntrack *ct, const 
> struct conn_lookup_ctx *c
>      if (detect_ftp_ctl_type(ctx, pkt) != CT_FTP_CTL_INTEREST)
>          return;
>  
> -    ovs_mutex_lock(&ec->lock);
> +    conn_lock(&ec->lock);
>      handle_ftp_ctl_interest(ct, ctx, pkt, ec, now, nat);
> -    ovs_mutex_unlock(&ec->lock);
> +    conn_unlock(&ec->lock);
>  }
>  
>  static struct alg_helper_hook alg_helper_hooks[] = {
> @@ -564,7 +563,7 @@ conn_clean(struct conntrack *ct, struct conn *conn)
>          cmap_remove(&ct->conns, &conn->rev.cm_node, hash);
>      }
>      ovs_list_remove(&conn->exp_node);
> -    conn->cleaned = true;
> +    conn->conn_flags |= CONN_FLAG_CLEANED;
>      ovsrcu_postpone(delete_conn, conn);
>      atomic_count_dec(&ct->n_conn);
>  }
> @@ -672,10 +671,10 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const 
> struct conn *conn,
>      pkt->md.ct_zone = zone;
>  
>      if (conn) {
> -        ovs_mutex_lock(&conn->lock);
> +        conn_lock(&conn->lock);
>          pkt->md.ct_mark = conn->mark;
>          pkt->md.ct_label = conn->label;
> -        ovs_mutex_unlock(&conn->lock);
> +        conn_unlock(&conn->lock);
>      } else {
>          pkt->md.ct_mark = 0;
>          pkt->md.ct_label = OVS_U128_ZERO;
> @@ -683,8 +682,8 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const 
> struct conn *conn,
>  
>      /* Use the original direction tuple if we have it. */
>      if (conn) {
> -        if (conn->alg_related) {
> -            key = &conn->parent_key;
> +        if (conn->conn_flags & CONN_FLAG_ALG_RELATED) {
> +            key = &conn->alg->parent_key;
>          } else {
>              key = &conn->key;
>          }
> @@ -1073,19 +1072,23 @@ conn_seq_skew_set(struct conntrack *ct, const struct 
> conn *conn_in,
>      OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
>      struct conn *conn;
> -    ovs_mutex_unlock(&conn_in->lock);
> +    conn_unlock(&conn_in->lock);
>      conn_lookup(ct, &conn_in->key, now, &conn, NULL);
> -    ovs_mutex_lock(&conn_in->lock);
> +    conn_lock(&conn_in->lock);
>  
>      if (conn && seq_skew) {
> -        conn->seq_skew = seq_skew;
> -        conn->seq_skew_dir = seq_skew_dir;
> +        conn->alg->seq_skew = seq_skew;
> +        conn->alg->seq_skew_dir = seq_skew_dir;
>      }
>  }
>  
>  static void
>  ct_set_alg(struct conn *conn , enum ct_alg_ctl_type ct_alg_ctl)
>  {
> +    if (ct_alg_ctl != CT_ALG_CTL_NONE) {
> +        conn->alg = xzalloc(sizeof(*conn->alg));
> +    }
> +
>      if (ct_alg_ctl == CT_ALG_CTL_FTP) {
>          conn->conn_flags |= CONN_FLAG_CTL_FTP;
>      } else if (ct_alg_ctl == CT_ALG_CTL_TFTP) {
> @@ -1149,10 +1152,12 @@ conn_not_found(struct conntrack *ct, struct dp_packet 
> *pkt,
>          ct_set_alg(nc, ct_alg_ctl);
>  
>          if (alg_exp) {
> -            nc->alg_related = true;
> +            nc->conn_flags |= CONN_FLAG_ALG_RELATED;
> +            if (!nc->alg)
> +                nc->alg = xzalloc(sizeof(*nc->alg));
>              nc->mark = alg_exp->parent_mark;
>              nc->label = alg_exp->parent_label;
> -            nc->parent_key = alg_exp->parent_key;
> +            nc->alg->parent_key = alg_exp->parent_key;
>          }
>  
>          if (nat_action_info) {
> @@ -1177,7 +1182,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet 
> *pkt,
>              cmap_insert(&ct->conns, &nc->rev.cm_node, nat_hash);
>          }
>  
> -        ovs_mutex_init_adaptive(&nc->lock);
> +        conn_lock_init(&nc->lock);
>          cmap_insert(&ct->conns, &nc->orig.cm_node, ctx->hash);
>          atomic_count_inc(&ct->n_conn);
>          ctx->conn = nc; /* For completeness. */
> @@ -1219,7 +1224,7 @@ conn_update_state(struct conntrack *ct, struct 
> dp_packet *pkt,
>              pkt->md.ct_state |= CS_REPLY_DIR;
>          }
>      } else {
> -        if (conn->alg_related) {
> +        if (conn->conn_flags & CONN_FLAG_ALG_RELATED) {
>              pkt->md.ct_state |= CS_RELATED;
>          }
>  
> @@ -1356,10 +1361,10 @@ process_one_fast(uint16_t zone, const uint32_t 
> *setmark,
>      }
>  
>      pkt->md.ct_zone = zone;
> -    ovs_mutex_lock(&conn->lock);
> +    conn_lock(&conn->lock);
>      pkt->md.ct_mark = conn->mark;
>      pkt->md.ct_label = conn->label;
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>  
>      if (setmark) {
>          set_mark(pkt, conn, setmark[0], setmark[1]);
> @@ -1519,14 +1524,14 @@ conntrack_clear(struct dp_packet *packet)
>  static void
>  set_mark(struct dp_packet *pkt, struct conn *conn, uint32_t val, uint32_t 
> mask)
>  {
> -    ovs_mutex_lock(&conn->lock);
> -    if (conn->alg_related) {
> +    conn_lock(&conn->lock);
> +    if (conn->conn_flags & CONN_FLAG_ALG_RELATED) {
>          pkt->md.ct_mark = conn->mark;
>      } else {
>          pkt->md.ct_mark = val | (pkt->md.ct_mark & ~(mask));
>          conn->mark = pkt->md.ct_mark;
>      }
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>  }
>  
>  static void
> @@ -1534,8 +1539,8 @@ set_label(struct dp_packet *pkt, struct conn *conn,
>            const struct ovs_key_ct_labels *val,
>            const struct ovs_key_ct_labels *mask)
>  {
> -    ovs_mutex_lock(&conn->lock);
> -    if (conn->alg_related) {
> +    conn_lock(&conn->lock);
> +    if (conn->conn_flags & CONN_FLAG_ALG_RELATED) {
>          pkt->md.ct_label = conn->label;
>      } else {
>          ovs_u128 v, m;
> @@ -1549,7 +1554,7 @@ set_label(struct dp_packet *pkt, struct conn *conn,
>                                | (pkt->md.ct_label.u64.hi & ~(m.u64.hi));
>          conn->label = pkt->md.ct_label;
>      }
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>  }
>  
>  
> @@ -1568,10 +1573,10 @@ ct_sweep(struct conntrack *ct, long long now, size_t 
> limit)
>  
>      for (unsigned i = 0; i < N_CT_TM; i++) {
>          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ct->exp_lists[i]) {
> -            ovs_mutex_lock(&conn->lock);
> +            conn_lock(&conn->lock);
>              if (now < conn->expiration || count >= limit) {
>                  min_expiration = MIN(min_expiration, conn->expiration);
> -                ovs_mutex_unlock(&conn->lock);
> +                conn_unlock(&conn->lock);
>                  if (count >= limit) {
>                      /* Do not check other lists. */
>                      COVERAGE_INC(conntrack_long_cleanup);
> @@ -1579,7 +1584,7 @@ ct_sweep(struct conntrack *ct, long long now, size_t 
> limit)
>                  }
>                  break;
>              } else {
> -                ovs_mutex_unlock(&conn->lock);
> +                conn_unlock(&conn->lock);
>                  conn_clean(ct, conn);
>              }
>              count++;
> @@ -2411,20 +2416,20 @@ static enum ct_update_res
>  conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
>              struct conn_lookup_ctx *ctx, long long now)
>  {
> -    ovs_mutex_lock(&conn->lock);
> +    conn_lock(&conn->lock);
>      enum ct_update_res update_res =
>          l4_protos[conn->key.nw_proto]->conn_update(ct, conn, pkt, ctx->reply,
>                                                     now);
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>      return update_res;
>  }
>  
>  static bool
>  conn_expired(struct conn *conn, long long now)
>  {
> -    ovs_mutex_lock(&conn->lock);
> +    conn_lock(&conn->lock);
>      bool expired = now >= conn->expiration ? true : false;
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>      return expired;
>  }
>  
> @@ -2444,13 +2449,14 @@ new_conn(struct conntrack *ct, struct dp_packet *pkt, 
> struct conn_key *key,
>  static void
>  delete_conn_cmn(struct conn *conn)
>  {
> +    free(conn->alg);
>      free(conn);
>  }
>  
>  static void
>  delete_conn(struct conn *conn)
>  {
> -    ovs_mutex_destroy(&conn->lock);
> +    conn_lock_destroy(&conn->lock);
>      delete_conn_cmn(conn);
>  }
>  
> @@ -2558,7 +2564,7 @@ conn_to_ct_dpif_entry(const struct conn *conn, struct 
> ct_dpif_entry *entry,
>  
>      entry->zone = conn->key.zone;
>  
> -    ovs_mutex_lock(&conn->lock);
> +    conn_lock(&conn->lock);
>      entry->mark = conn->mark;
>      memcpy(&entry->labels, &conn->label, sizeof entry->labels);
>  
> @@ -2568,7 +2574,7 @@ conn_to_ct_dpif_entry(const struct conn *conn, struct 
> ct_dpif_entry *entry,
>      if (class->conn_get_protoinfo) {
>          class->conn_get_protoinfo(conn, &entry->protoinfo);
>      }
> -    ovs_mutex_unlock(&conn->lock);
> +    conn_unlock(&conn->lock);
>  
>      entry->timeout = (expiration > 0) ? expiration / 1000 : 0;
>  
> @@ -3349,10 +3355,10 @@ handle_ftp_ctl_interest(struct conntrack *ct, const 
> struct conn_lookup_ctx *ctx,
>  
>      struct tcp_header *th = dp_packet_l4(pkt);
>  
> -    if (nat && ec->seq_skew != 0) {
> -        ctx->reply != ec->seq_skew_dir ?
> -            adj_seqnum(&th->tcp_ack, -ec->seq_skew) :
> -            adj_seqnum(&th->tcp_seq, ec->seq_skew);
> +    if (nat && ec->alg->seq_skew != 0) {
> +        ctx->reply != ec->alg->seq_skew_dir ?
> +            adj_seqnum(&th->tcp_ack, -ec->alg->seq_skew) :
> +            adj_seqnum(&th->tcp_seq, ec->alg->seq_skew);
>      }
>  
>      th->tcp_csum = 0;
> @@ -3368,7 +3374,7 @@ handle_ftp_ctl_interest(struct conntrack *ct, const 
> struct conn_lookup_ctx *ctx,
>      }
>  
>      if (seq_skew) {
> -        conn_seq_skew_set(ct, ec, now, seq_skew + ec->seq_skew,
> +        conn_seq_skew_set(ct, ec, now, seq_skew + ec->alg->seq_skew,
>                            ctx->reply);
>      }
>  }
> @@ -3382,10 +3388,10 @@ handle_ftp_ctl_other(struct conntrack *ct OVS_UNUSED, 
> const struct conn_lookup_c
>      struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>      struct ip_header *l3_hdr = dp_packet_l3(pkt);
>  
> -    if (nat && ec->seq_skew != 0) {
> -        ctx->reply != ec->seq_skew_dir ?
> -            adj_seqnum(&th->tcp_ack, -ec->seq_skew) :
> -            adj_seqnum(&th->tcp_seq, ec->seq_skew);
> +    if (nat && ec->alg->seq_skew != 0) {
> +        ctx->reply != ec->alg->seq_skew_dir ?
> +            adj_seqnum(&th->tcp_ack, -ec->alg->seq_skew) :
> +            adj_seqnum(&th->tcp_seq, ec->alg->seq_skew);
>          th->tcp_csum = 0;
>          if (!dp_packet_hwol_tx_l4_checksum(pkt)) {
>              if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
> @@ -3406,9 +3412,9 @@ handle_tftp_ctl(struct conntrack *ct,
>                  struct dp_packet *pkt, struct conn *conn_for_expectation,
>                  long long now OVS_UNUSED, bool nat OVS_UNUSED)
>  {
> -    ovs_mutex_lock(&conn_for_expectation->lock);
> +    conn_lock(&conn_for_expectation->lock);
>      expectation_create(ct, conn_for_expectation->key.src.port,
>                         conn_for_expectation,
>                         !!(pkt->md.ct_state & CS_REPLY_DIR), false, false);
> -    ovs_mutex_unlock(&conn_for_expectation->lock);
> +    conn_unlock(&conn_for_expectation->lock);
>  }

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to