This patch backports several critical bug fixes related to
locking and data consistency in nf_conncount code.
This backport is based on the following upstream net-next upstream commits.
4cd273b ("netfilter: nf_conncount: don't skip eviction when age is negative")
d4e7df1 ("netfilter: nf_conncount: use rb_link_node_rcu() instead of
rb_link_node()")
53ca0f2 ("netfilter: nf_conncount: remove wrong condition check routine")
3c5cdb1 ("netfilter: nf_conncount: fix unexpected permanent node of list.")
31568ec ("netfilter: nf_conncount: fix list_del corruption in conn_free")
fd3e71a ("netfilter: nf_conncount: use spin_lock_bh instead of spin_lock")
This patch also added additional compat code so that it can build on
all supported kernel versions.
Travis tests are at
https://travis-ci.org/yifsun/ovs-travis/builds/568603796
VMware-BZ: #2396471
CC: Taehee Yoo <[email protected]>
Signed-off-by: Yifeng Sun <[email protected]>
---
v1->v2: Add fixes to support old kernel versions. Thanks YiHung for reviewing.
acinclude.m4 | 2 ++
datapath/linux/Modules.mk | 3 +-
datapath/linux/compat/include/linux/rbtree.h | 19 ++++++++++++
datapath/linux/compat/nf_conncount.c | 46 ++++++++++++++++++----------
4 files changed, 52 insertions(+), 18 deletions(-)
create mode 100644 datapath/linux/compat/include/linux/rbtree.h
diff --git a/acinclude.m4 b/acinclude.m4
index 116ffcf9096d..f8e856d3303f 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -1012,6 +1012,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
[OVS_DEFINE([HAVE_GRE_CALC_HLEN])])
OVS_GREP_IFELSE([$KSRC/include/net/gre.h], [ip_gre_calc_hlen],
[OVS_DEFINE([HAVE_IP_GRE_CALC_HLEN])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/rbtree.h], [rb_link_node_rcu],
+ [OVS_DEFINE([HAVE_RBTREE_RB_LINK_NODE_RCU])])
if cmp -s datapath/linux/kcompat.h.new \
datapath/linux/kcompat.h >/dev/null 2>&1; then
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index cbb29f1c69d0..69d7faeac414 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -116,5 +116,6 @@ openvswitch_headers += \
linux/compat/include/uapi/linux/netfilter.h \
linux/compat/include/linux/mm.h \
linux/compat/include/linux/netfilter.h \
- linux/compat/include/linux/overflow.h
+ linux/compat/include/linux/overflow.h \
+ linux/compat/include/linux/rbtree.h
EXTRA_DIST += linux/compat/build-aux/export-check-whitelist
diff --git a/datapath/linux/compat/include/linux/rbtree.h
b/datapath/linux/compat/include/linux/rbtree.h
new file mode 100644
index 000000000000..dbf20ff0e0b8
--- /dev/null
+++ b/datapath/linux/compat/include/linux/rbtree.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_RBTREE_WRAPPER_H
+#define __LINUX_RBTREE_WRAPPER_H 1
+
+#include_next <linux/rbtree.h>
+
+#ifndef HAVE_RBTREE_RB_LINK_NODE_RCU
+#include <linux/rcupdate.h>
+
+static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node
*parent,
+ struct rb_node **rb_link)
+{
+ node->__rb_parent_color = (unsigned long)parent;
+ node->rb_left = node->rb_right = NULL;
+
+ rcu_assign_pointer(*rb_link, node);
+}
+#endif
+
+#endif /* __LINUX_RBTREE_WRAPPER_H */
diff --git a/datapath/linux/compat/nf_conncount.c
b/datapath/linux/compat/nf_conncount.c
index eeae440f872d..6a4d058e7fac 100644
--- a/datapath/linux/compat/nf_conncount.c
+++ b/datapath/linux/compat/nf_conncount.c
@@ -54,6 +54,7 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
+ bool dead;
struct rcu_head rcu_head;
};
@@ -111,15 +112,16 @@ nf_conncount_add(struct nf_conncount_list *list,
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
- spin_lock(&list->list_lock);
+ conn->dead = false;
+ spin_lock_bh(&list->list_lock);
if (list->dead == true) {
kmem_cache_free(conncount_conn_cachep, conn);
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_SKIP;
}
list_add_tail(&conn->node, &list->head);
list->count++;
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_ADDED;
}
@@ -136,19 +138,22 @@ static bool conn_free(struct nf_conncount_list *list,
{
bool free_entry = false;
- spin_lock(&list->list_lock);
+ spin_lock_bh(&list->list_lock);
- if (list->count == 0) {
- spin_unlock(&list->list_lock);
+ if (conn->dead) {
+ spin_unlock_bh(&list->list_lock);
return free_entry;
}
list->count--;
+ conn->dead = true;
list_del_rcu(&conn->node);
- if (list->count == 0)
+ if (list->count == 0) {
+ list->dead = true;
free_entry = true;
+ }
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
call_rcu(&conn->rcu_head, __conn_free);
return free_entry;
}
@@ -160,7 +165,7 @@ find_or_evict(struct net *net, struct nf_conncount_list
*list,
const struct nf_conntrack_tuple_hash *found;
unsigned long a, b;
int cpu = raw_smp_processor_id();
- __s32 age;
+ u32 age;
found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
if (found)
@@ -248,7 +253,7 @@ static void nf_conncount_list_init(struct nf_conncount_list
*list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
- list->count = 1;
+ list->count = 0;
list->dead = false;
}
@@ -261,6 +266,7 @@ static bool nf_conncount_gc_list(struct net *net,
struct nf_conn *found_ct;
unsigned int collected = 0;
bool free_entry = false;
+ bool ret = false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry);
@@ -290,7 +296,15 @@ static bool nf_conncount_gc_list(struct net *net,
if (collected > CONNCOUNT_GC_MAX_NODES)
return false;
}
- return false;
+
+ spin_lock_bh(&list->list_lock);
+ if (!list->count) {
+ list->dead = true;
+ ret = true;
+ }
+ spin_unlock_bh(&list->list_lock);
+
+ return ret;
}
static void __tree_nodes_free(struct rcu_head *h)
@@ -310,11 +324,8 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock);
- if (rbconn->list.count == 0 && rbconn->list.dead == false) {
- rbconn->list.dead = true;
- rb_erase(&rbconn->node, root);
- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
- }
+ rb_erase(&rbconn->node, root);
+ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
spin_unlock(&rbconn->list.list_lock);
}
}
@@ -415,8 +426,9 @@ insert_tree(struct net *net,
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
count = 1;
+ rbconn->list.count = count;
- rb_link_node(&rbconn->node, parent, rbnode);
+ rb_link_node_rcu(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
out_unlock:
spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
--
2.7.4
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev