Signed-off-by: Darrell Ball <[email protected]>
---
Documentation/faq/releases.rst | 4 +-
NEWS | 1 +
lib/conntrack-private.h | 1 +
lib/conntrack.c | 142 +++++++++++++++++++++++++++++++++++++++
lib/conntrack.h | 17 +++++
lib/dpif-netdev.c | 88 +++++++++++++++++++++++-
tests/system-kmod-macros.at | 7 --
tests/system-traffic.at | 1 -
tests/system-userspace-macros.at | 9 ---
9 files changed, 248 insertions(+), 22 deletions(-)
diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index e02dda1..6702c58 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -116,9 +116,9 @@ Q: Are all features available with all datapaths?
Feature Linux upstream Linux OVS tree Userspace Hyper-V
========================== ============== ============== ========= =======
Connection tracking 4.3 2.5 2.6 YES
- Conntrack Fragment Reass. 4.3 2.6 2.10 YES
+ Conntrack Fragment Reass. 4.3 2.6 2.12 YES
Conntrack Timeout Policies 5.2 2.12 NO NO
- Conntrack Zone Limit 4.18 2.10 NO YES
+ Conntrack Zone Limit 4.18 2.10 2.13 YES
Conntrack NAT 4.6 2.6 2.8 YES
Tunnel - LISP NO 2.11 NO NO
Tunnel - STT NO 2.4 NO YES
diff --git a/NEWS b/NEWS
index 222280b..17f92ba 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ Post-v2.12.0
- Userspace datapath:
* Add option to enable, disable and query TCP sequence checking in
conntrack.
+ * Add support for conntrack zone limits.
- AF_XDP:
* New option 'use-need-wakeup' for netdev-afxdp to control enabling
of corresponding 'need_wakeup' flag in AF_XDP rings. Enabled by default
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 590f139..22823cb 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -155,6 +155,7 @@ struct conntrack {
struct ovs_mutex ct_lock; /* Protects 2 following fields. */
struct cmap conns OVS_GUARDED;
struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
+ struct hmap zone_limits OVS_GUARDED;
uint32_t hash_basis; /* Salt for hashing a connection key. */
pthread_t clean_thread; /* Periodically cleans up connection tracker. */
struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
diff --git a/lib/conntrack.c b/lib/conntrack.c
index df7b9fa..59e1c51 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -76,6 +76,13 @@ enum ct_alg_ctl_type {
CT_ALG_CTL_SIP,
};
+struct zone_limit {
+ struct hmap_node node;
+ int32_t zone;
+ uint32_t limit;
+ uint32_t count;
+};
+
static bool conn_key_extract(struct conntrack *, struct dp_packet *,
ovs_be16 dl_type, struct conn_lookup_ctx *,
uint16_t zone);
@@ -305,6 +312,7 @@ conntrack_init(void)
for (unsigned i = 0; i < ARRAY_SIZE(ct->exp_lists); i++) {
ovs_list_init(&ct->exp_lists[i]);
}
+ hmap_init(&ct->zone_limits);
ovs_mutex_unlock(&ct->ct_lock);
ct->hash_basis = random_uint32();
@@ -318,6 +326,111 @@ conntrack_init(void)
return ct;
}
+static uint32_t
+zone_key_hash(int32_t zone, uint32_t basis)
+{
+ size_t hash = hash_int((OVS_FORCE uint32_t) zone, basis);
+ return hash;
+}
+
+static struct zone_limit *
+zone_limit_lookup(struct conntrack *ct, int32_t zone)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ uint32_t hash = zone_key_hash(zone, ct->hash_basis);
+ struct zone_limit *zl;
+ HMAP_FOR_EACH_WITH_HASH (zl, node, hash, &ct->zone_limits) {
+ if (zl->zone == zone) {
+ return zl;
+ }
+ }
+ return NULL;
+}
+
+struct conntrack_zone_limit
+zone_limit_get(struct conntrack *ct, int32_t zone)
+{
+ struct conntrack_zone_limit czl = {INVALID_ZONE, 0, 0};
+ ovs_mutex_lock(&ct->ct_lock);
+ struct zone_limit *zl = zone_limit_lookup(ct, zone);
+ if (zl) {
+ czl.zone = zl->zone;
+ czl.limit = zl->limit;
+ czl.count = zl->count;
+ } else {
+ zl = zone_limit_lookup(ct, DEFAULT_ZONE);
+ if (zl) {
+ czl.zone = zl->zone;
+ czl.limit = zl->limit;
+ czl.count = zl->count;
+ }
+ }
+ ovs_mutex_unlock(&ct->ct_lock);
+ return czl;
+}
+
+static int
+zone_limit_create(struct conntrack *ct, int32_t zone, uint32_t limit)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ if (zone >= DEFAULT_ZONE && zone <= MAX_ZONE) {
+ struct zone_limit *zl = xzalloc(sizeof *zl);
+ zl->limit = limit;
+ zl->zone = zone;
+ uint32_t hash = zone_key_hash(zone, ct->hash_basis);
+ hmap_insert(&ct->zone_limits, &zl->node, hash);
+ return 0;
+ } else {
+ return EINVAL;
+ }
+}
+
+int
+zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit)
+{
+ int err = 0;
+ ovs_mutex_lock(&ct->ct_lock);
+ struct zone_limit *zl = zone_limit_lookup(ct, zone);
+ if (zl) {
+ zl->limit = limit;
+ VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone);
+ } else {
+ err = zone_limit_create(ct, zone, limit);
+ if (!err) {
+ VLOG_INFO("Created zone limit of %u for zone %d", limit, zone);
+ } else {
+ VLOG_WARN("Request to create zone limit for invalid zone %d",
+ zone);
+ }
+ }
+ ovs_mutex_unlock(&ct->ct_lock);
+ return err;
+}
+
+static void
+zone_limit_clean(struct conntrack *ct, struct zone_limit *zl)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ hmap_remove(&ct->zone_limits, &zl->node);
+ free(zl);
+}
+
+int
+zone_limit_delete(struct conntrack *ct, int32_t zone)
+{
+ ovs_mutex_lock(&ct->ct_lock);
+ struct zone_limit *zl = zone_limit_lookup(ct, zone);
+ if (zl) {
+ zone_limit_clean(ct, zl);
+ VLOG_INFO("Deleted zone limit for zone %d", zone);
+ } else {
+ VLOG_INFO("Attempted delete of non-existent zone limit: zone %d",
+ zone);
+ }
+ ovs_mutex_unlock(&ct->ct_lock);
+ return 0;
+}
+
static void
conn_clean_cmn(struct conntrack *ct, struct conn *conn)
OVS_REQUIRES(ct->ct_lock)
@@ -328,6 +441,11 @@ conn_clean_cmn(struct conntrack *ct, struct conn *conn)
uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
cmap_remove(&ct->conns, &conn->cm_node, hash);
+
+ struct zone_limit *zl = zone_limit_lookup(ct, conn->key.zone);
+ if (zl) {
+ zl->count--;
+ }
}
/* Must be called with 'conn' of 'conn_type' CT_CONN_TYPE_DEFAULT. Also
@@ -378,6 +496,13 @@ conntrack_destroy(struct conntrack *ct)
conn_clean_one(ct, conn);
}
cmap_destroy(&ct->conns);
+
+ struct zone_limit *zl;
+ HMAP_FOR_EACH_POP (zl, node, &ct->zone_limits) {
+ free(zl);
+ }
+ hmap_destroy(&ct->zone_limits);
+
ovs_mutex_unlock(&ct->ct_lock);
ovs_mutex_destroy(&ct->ct_lock);
@@ -843,6 +968,8 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
return nc;
}
+
+
pkt->md.ct_state = CS_NEW;
if (alg_exp) {
@@ -857,6 +984,18 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
return nc;
}
+ struct zone_limit *zl = zone_limit_lookup(ct, ctx->key.zone);
+ if (zl) {
+ if (zl->count >= zl->limit) {
+ return nc;
+ }
+ } else {
+ zl = zone_limit_lookup(ct, DEFAULT_ZONE);
+ if (zl && zl->count >= zl->limit) {
+ return nc;
+ }
+ }
+
nc = new_conn(ct, pkt, &ctx->key, now);
memcpy(&nc->key, &ctx->key, sizeof nc->key);
memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
@@ -915,6 +1054,9 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
atomic_count_inc(&ct->n_conn);
ctx->conn = nc; /* For completeness. */
+ if (zl) {
+ zl->count++;
+ }
}
return nc;
diff --git a/lib/conntrack.h b/lib/conntrack.h
index 75409ba..e407228 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -104,6 +104,19 @@ struct conntrack_dump {
uint16_t zone;
};
+struct conntrack_zone_limit {
+ int32_t zone;
+ uint32_t limit;
+ uint32_t count;
+};
+
+enum zone_limits_e {
+ DEFAULT_ZONE = -1,
+ INVALID_ZONE = -2,
+ MIN_ZONE = 0,
+ MAX_ZONE = 0xFFFF,
+};
+
struct ct_dpif_entry;
struct ct_dpif_tuple;
@@ -121,5 +134,9 @@ int conntrack_get_nconns(struct conntrack *ct, uint32_t
*nconns);
int conntrack_set_tcp_seq_chk(struct conntrack *ct, bool enabled);
bool conntrack_get_tcp_seq_chk(struct conntrack *ct);
struct ipf *conntrack_ipf_ctx(struct conntrack *ct);
+struct conntrack_zone_limit zone_limit_get(struct conntrack *ct,
+ int32_t zone);
+int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit);
+int zone_limit_delete(struct conntrack *ct, int32_t zone);
#endif /* conntrack.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 885a4df..1e54936 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7471,6 +7471,88 @@ dpif_netdev_ct_get_tcp_seq_chk(struct dpif *dpif, bool
*enabled)
}
static int
+dpif_netdev_ct_set_limits(struct dpif *dpif OVS_UNUSED,
+ const uint32_t *default_limits,
+ const struct ovs_list *zone_limits)
+{
+ int err = 0;
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ if (default_limits) {
+ err = zone_limit_update(dp->conntrack, DEFAULT_ZONE, *default_limits);
+ if (err != 0) {
+ return err;
+ }
+ }
+
+ struct ct_dpif_zone_limit *zone_limit;
+ LIST_FOR_EACH (zone_limit, node, zone_limits) {
+ err = zone_limit_update(dp->conntrack, zone_limit->zone,
+ zone_limit->limit);
+ if (err != 0) {
+ break;
+ }
+ }
+ return err;
+}
+
+static int
+dpif_netdev_ct_get_limits(struct dpif *dpif OVS_UNUSED,
+ uint32_t *default_limit,
+ const struct ovs_list *zone_limits_request,
+ struct ovs_list *zone_limits_reply)
+{
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ struct conntrack_zone_limit czl;
+
+ czl = zone_limit_get(dp->conntrack, DEFAULT_ZONE);
+ if (czl.zone == DEFAULT_ZONE) {
+ *default_limit = czl.limit;
+ } else {
+ return EINVAL;
+ }
+
+ if (!ovs_list_is_empty(zone_limits_request)) {
+ struct ct_dpif_zone_limit *zone_limit;
+ LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
+ czl = zone_limit_get(dp->conntrack, zone_limit->zone);
+ if (czl.zone == zone_limit->zone || czl.zone == DEFAULT_ZONE) {
+ ct_dpif_push_zone_limit(zone_limits_reply, zone_limit->zone,
+ czl.limit, czl.count);
+ } else {
+ return EINVAL;
+ }
+ }
+ } else {
+ for (int z = MIN_ZONE; z <= MAX_ZONE; z++) {
+ czl = zone_limit_get(dp->conntrack, z);
+ if (czl.zone == z) {
+ ct_dpif_push_zone_limit(zone_limits_reply, z, czl.limit,
+ czl.count);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
+ const struct ovs_list *zone_limits)
+{
+ int err = 0;
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ struct ct_dpif_zone_limit *zone_limit;
+ LIST_FOR_EACH (zone_limit, node, zone_limits) {
+ err = zone_limit_delete(dp->conntrack, zone_limit->zone);
+ if (err != 0) {
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int
dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
@@ -7576,9 +7658,9 @@ const struct dpif_class dpif_netdev_class = {
dpif_netdev_ct_get_nconns,
dpif_netdev_ct_set_tcp_seq_chk,
dpif_netdev_ct_get_tcp_seq_chk,
- NULL, /* ct_set_limits */
- NULL, /* ct_get_limits */
- NULL, /* ct_del_limits */
+ dpif_netdev_ct_set_limits,
+ dpif_netdev_ct_get_limits,
+ dpif_netdev_ct_del_limits,
NULL, /* ct_set_timeout_policy */
NULL, /* ct_get_timeout_policy */
NULL, /* ct_del_timeout_policy */
diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at
index 9e89aec..daf66bd 100644
--- a/tests/system-kmod-macros.at
+++ b/tests/system-kmod-macros.at
@@ -110,13 +110,6 @@ m4_define([CHECK_CONNTRACK_TIMEOUT],
on_exit 'modprobe -r nfnetlink_cttimeout'
])
-# CHECK_CT_DPIF_PER_ZONE_LIMIT()
-#
-# Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per
-# zone. The kernel datapath does support this feature. Will remove this check
-# after both kernel and userspace datapath support it.
-m4_define([CHECK_CT_DPIF_PER_ZONE_LIMIT])
-
# CHECK_CT_DPIF_SET_GET_MAXCONNS()
#
# Perform requirements checks for running ovs-dpctl ct-set-maxconns or
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index cde7429..0fb7aac 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -3574,7 +3574,6 @@ AT_CLEANUP
AT_SETUP([conntrack - limit by zone])
CHECK_CONNTRACK()
-CHECK_CT_DPIF_PER_ZONE_LIMIT()
OVS_TRAFFIC_VSWITCHD_START()
ADD_NAMESPACES(at_ns0, at_ns1)
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index a419f30..ba7f410 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -106,15 +106,6 @@ m4_define([CHECK_CONNTRACK_TIMEOUT],
AT_SKIP_IF([:])
])
-# CHECK_CT_DPIF_PER_ZONE_LIMIT()
-#
-# Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per
-# zone. The userspace datapath does not support this feature yet.
-m4_define([CHECK_CT_DPIF_PER_ZONE_LIMIT],
-[
- AT_SKIP_IF([:])
-])
-
# CHECK_CT_DPIF_SET_GET_MAXCONNS()
#
# Perform requirements checks for running ovs-dpctl ct-set-maxconns or
--
1.9.1
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev