Hi Gaetan, Thanks for the patch, looks very useful. I haven't tested it yet. Minor question/comments inline.
On Wed, Feb 17, 2021 at 8:34 AM Gaetan Rivet <[email protected]> wrote: > > Change the connection expiration lists from ovs_list to rculist. > This is a pre-step towards reducing the granularity of 'ct_lock'. > > Signed-off-by: Gaetan Rivet <[email protected]> > Reviewed-by: Eli Britstein <[email protected]> > --- > lib/conntrack-private.h | 76 +++++++++++++++++++++++++++-------------- > lib/conntrack-tp.c | 60 +++++++++++++++++++++++++------- > lib/conntrack.c | 14 ++++---- > 3 files changed, 105 insertions(+), 45 deletions(-) > > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h > index e8332bdba..4b6f9eae3 100644 > --- a/lib/conntrack-private.h > +++ b/lib/conntrack-private.h > @@ -29,6 +29,7 @@ > #include "openvswitch/list.h" > #include "openvswitch/types.h" > #include "packets.h" > +#include "rculist.h" > #include "unaligned.h" > #include "dp-packet.h" > > @@ -86,17 +87,55 @@ struct alg_exp_node { > bool nat_rpl_dst; > }; > > +/* Timeouts: all the possible timeout states passed to update_expiration() > + * are listed here. The name will be prefix by CT_TM_ and the value is in > + * milliseconds */ > +#define CT_TIMEOUTS \ > + CT_TIMEOUT(TCP_FIRST_PACKET) \ > + CT_TIMEOUT(TCP_OPENING) \ > + CT_TIMEOUT(TCP_ESTABLISHED) \ > + CT_TIMEOUT(TCP_CLOSING) \ > + CT_TIMEOUT(TCP_FIN_WAIT) \ > + CT_TIMEOUT(TCP_CLOSED) \ > + CT_TIMEOUT(OTHER_FIRST) \ > + CT_TIMEOUT(OTHER_MULTIPLE) \ > + CT_TIMEOUT(OTHER_BIDIR) \ > + CT_TIMEOUT(ICMP_FIRST) \ > + CT_TIMEOUT(ICMP_REPLY) > + > +enum ct_timeout { > +#define CT_TIMEOUT(NAME) CT_TM_##NAME, > + CT_TIMEOUTS > +#undef CT_TIMEOUT > + N_CT_TM > +}; > + any reason for moving this macro to here? > enum OVS_PACKED_ENUM ct_conn_type { > CT_CONN_TYPE_DEFAULT, > CT_CONN_TYPE_UN_NAT, > }; > > +struct conn_expire { > + /* Set once when initializing the expiration node. */ > + struct conntrack *ct; > + /* Timeout state of the connection. > + * It follows the connection state updates. > + */ > + enum ct_timeout tm; > + /* Insert and remove the expiration node only once per RCU syncs. > + * If multiple threads update the connection, its expiration should > + * be removed only once and added only once to timeout lists. > + */ > + atomic_flag insert_once; > + atomic_flag remove_once; > + struct rculist node; > +}; > + > struct conn { > /* Immutable data. */ > struct conn_key key; > struct conn_key rev_key; > struct conn_key parent_key; /* Only used for orig_tuple support. */ > - struct ovs_list exp_node; > struct cmap_node cm_node; > struct nat_action_info_t *nat_info; > char *alg; > @@ -104,6 +143,7 @@ struct conn { > > /* Mutable data. */ > struct ovs_mutex lock; /* Guards all mutable fields. */ > + struct conn_expire exp; > ovs_u128 label; > long long expiration; > uint32_t mark; > @@ -132,33 +172,10 @@ enum ct_update_res { > CT_UPDATE_VALID_NEW, > }; > > -/* Timeouts: all the possible timeout states passed to update_expiration() > - * are listed here. The name will be prefix by CT_TM_ and the value is in > - * milliseconds */ > -#define CT_TIMEOUTS \ > - CT_TIMEOUT(TCP_FIRST_PACKET) \ > - CT_TIMEOUT(TCP_OPENING) \ > - CT_TIMEOUT(TCP_ESTABLISHED) \ > - CT_TIMEOUT(TCP_CLOSING) \ > - CT_TIMEOUT(TCP_FIN_WAIT) \ > - CT_TIMEOUT(TCP_CLOSED) \ > - CT_TIMEOUT(OTHER_FIRST) \ > - CT_TIMEOUT(OTHER_MULTIPLE) \ > - CT_TIMEOUT(OTHER_BIDIR) \ > - CT_TIMEOUT(ICMP_FIRST) \ > - CT_TIMEOUT(ICMP_REPLY) > - > -enum ct_timeout { > -#define CT_TIMEOUT(NAME) CT_TM_##NAME, > - CT_TIMEOUTS > -#undef CT_TIMEOUT > - N_CT_TM > -}; > - > struct conntrack { > struct ovs_mutex ct_lock; /* Protects 2 following fields. */ > struct cmap conns OVS_GUARDED; > - struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED; > + struct rculist exp_lists[N_CT_TM] OVS_GUARDED; > struct hmap zone_limits OVS_GUARDED; > struct hmap timeout_policies OVS_GUARDED; > uint32_t hash_basis; /* Salt for hashing a connection key. */ > @@ -204,4 +221,13 @@ struct ct_l4_proto { > struct ct_dpif_protoinfo *); > }; > > +static inline void > +conn_expire_remove(struct conn_expire *exp) > +{ > + if (!atomic_flag_test_and_set(&exp->remove_once) > + && rculist_next(&exp->node)) { > + rculist_remove(&exp->node); > + } > +} > + > #endif /* conntrack-private.h */ > diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c > index a586d3a8d..30ba4bda8 100644 > --- a/lib/conntrack-tp.c > +++ b/lib/conntrack-tp.c > @@ -230,6 +230,50 @@ tm_to_ct_dpif_tp(enum ct_timeout tm) > return CT_DPIF_TP_ATTR_MAX; > } > > +static void > +conn_expire_init(struct conn *conn, struct conntrack *ct) > +{ > + struct conn_expire *exp = &conn->exp; > + > + if (exp->ct != NULL) { > + return; > + } > + > + exp->ct = ct; > + atomic_flag_clear(&exp->insert_once); > + atomic_flag_clear(&exp->remove_once); > + /* The expiration is initially unscheduled, flag it as 'removed'. */ > + atomic_flag_test_and_set(&exp->remove_once); > +} > + > +static void > +conn_expire_insert(struct conn *conn) > +{ > + struct conn_expire *exp = &conn->exp; > + > + ovs_mutex_lock(&exp->ct->ct_lock); > + ovs_mutex_lock(&conn->lock); > + > + rculist_push_back(&exp->ct->exp_lists[exp->tm], &exp->node); > + atomic_flag_clear(&exp->insert_once); > + atomic_flag_clear(&exp->remove_once); > + > + ovs_mutex_unlock(&conn->lock); > + ovs_mutex_unlock(&exp->ct->ct_lock); > +} > + > +static void > +conn_schedule_expiration(struct conntrack *ct, struct conn *conn, > + enum ct_timeout tm, long long now, uint32_t > tp_value) > +{ > + conn_expire_init(conn, ct); > + conn->expiration = now + tp_value * 1000; > + conn->exp.tm = tm; > + if (!atomic_flag_test_and_set(&conn->exp.insert_once)) { Does this mean we only insert the oldest one, not the latest one? So pkt1 arrives at t1, pkt2 arrives later at t2, then we update using t1 > + ovsrcu_postpone(conn_expire_insert, conn); > + } > +} > + rest looks good to me William _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
