This patch backports several critical bug fixes related to
locking and data consistency in nf_conncount code.

This backport is based on the following upstream net-next upstream commits.
4cd273b ("netfilter: nf_conncount: don't skip eviction when age is negative")
d4e7df1 ("netfilter: nf_conncount: use rb_link_node_rcu() instead of 
rb_link_node()")
53ca0f2 ("netfilter: nf_conncount: remove wrong condition check routine")
3c5cdb1 ("netfilter: nf_conncount: fix unexpected permanent node of list.")
31568ec ("netfilter: nf_conncount: fix list_del corruption in conn_free")
fd3e71a ("netfilter: nf_conncount: use spin_lock_bh instead of spin_lock")

This patch also added additional compat code so that it can build on
all supported kernel versions.

Travis tests are at
https://travis-ci.org/yifsun/ovs-travis/builds/568603796

VMware-BZ: #2396471

CC: Taehee Yoo <ap420...@gmail.com>
Signed-off-by: Yifeng Sun <pkusunyif...@gmail.com>
---
v1->v2: Add fixes to support old kernel versions. Thanks YiHung for reviewing.

 acinclude.m4                                 |  2 ++
 datapath/linux/Modules.mk                    |  3 +-
 datapath/linux/compat/include/linux/rbtree.h | 19 ++++++++++++
 datapath/linux/compat/nf_conncount.c         | 46 ++++++++++++++++++----------
 4 files changed, 52 insertions(+), 18 deletions(-)
 create mode 100644 datapath/linux/compat/include/linux/rbtree.h

diff --git a/acinclude.m4 b/acinclude.m4
index 116ffcf9096d..f8e856d3303f 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -1012,6 +1012,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
                   [OVS_DEFINE([HAVE_GRE_CALC_HLEN])])
   OVS_GREP_IFELSE([$KSRC/include/net/gre.h], [ip_gre_calc_hlen],
                   [OVS_DEFINE([HAVE_IP_GRE_CALC_HLEN])])
+  OVS_GREP_IFELSE([$KSRC/include/linux/rbtree.h], [rb_link_node_rcu],
+                  [OVS_DEFINE([HAVE_RBTREE_RB_LINK_NODE_RCU])])
 
   if cmp -s datapath/linux/kcompat.h.new \
             datapath/linux/kcompat.h >/dev/null 2>&1; then
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index cbb29f1c69d0..69d7faeac414 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -116,5 +116,6 @@ openvswitch_headers += \
        linux/compat/include/uapi/linux/netfilter.h \
        linux/compat/include/linux/mm.h \
        linux/compat/include/linux/netfilter.h \
-       linux/compat/include/linux/overflow.h
+       linux/compat/include/linux/overflow.h \
+       linux/compat/include/linux/rbtree.h
 EXTRA_DIST += linux/compat/build-aux/export-check-whitelist
diff --git a/datapath/linux/compat/include/linux/rbtree.h 
b/datapath/linux/compat/include/linux/rbtree.h
new file mode 100644
index 000000000000..dbf20ff0e0b8
--- /dev/null
+++ b/datapath/linux/compat/include/linux/rbtree.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_RBTREE_WRAPPER_H
+#define __LINUX_RBTREE_WRAPPER_H 1
+
+#include_next <linux/rbtree.h>
+
+#ifndef HAVE_RBTREE_RB_LINK_NODE_RCU
+#include <linux/rcupdate.h>
+
+static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node 
*parent,
+                                   struct rb_node **rb_link)
+{
+       node->__rb_parent_color = (unsigned long)parent;
+       node->rb_left = node->rb_right = NULL;
+
+       rcu_assign_pointer(*rb_link, node);
+}
+#endif
+
+#endif /* __LINUX_RBTREE_WRAPPER_H */
diff --git a/datapath/linux/compat/nf_conncount.c 
b/datapath/linux/compat/nf_conncount.c
index eeae440f872d..6a4d058e7fac 100644
--- a/datapath/linux/compat/nf_conncount.c
+++ b/datapath/linux/compat/nf_conncount.c
@@ -54,6 +54,7 @@ struct nf_conncount_tuple {
        struct nf_conntrack_zone        zone;
        int                             cpu;
        u32                             jiffies32;
+       bool                            dead;
        struct rcu_head                 rcu_head;
 };
 
@@ -111,15 +112,16 @@ nf_conncount_add(struct nf_conncount_list *list,
        conn->zone = *zone;
        conn->cpu = raw_smp_processor_id();
        conn->jiffies32 = (u32)jiffies;
-       spin_lock(&list->list_lock);
+       conn->dead = false;
+       spin_lock_bh(&list->list_lock);
        if (list->dead == true) {
                kmem_cache_free(conncount_conn_cachep, conn);
-               spin_unlock(&list->list_lock);
+               spin_unlock_bh(&list->list_lock);
                return NF_CONNCOUNT_SKIP;
        }
        list_add_tail(&conn->node, &list->head);
        list->count++;
-       spin_unlock(&list->list_lock);
+       spin_unlock_bh(&list->list_lock);
        return NF_CONNCOUNT_ADDED;
 }
 
@@ -136,19 +138,22 @@ static bool conn_free(struct nf_conncount_list *list,
 {
        bool free_entry = false;
 
-       spin_lock(&list->list_lock);
+       spin_lock_bh(&list->list_lock);
 
-       if (list->count == 0) {
-               spin_unlock(&list->list_lock);
+       if (conn->dead) {
+               spin_unlock_bh(&list->list_lock);
                return free_entry;
        }
 
        list->count--;
+       conn->dead = true;
        list_del_rcu(&conn->node);
-       if (list->count == 0)
+       if (list->count == 0) {
+               list->dead = true;
                free_entry = true;
+       }
 
-       spin_unlock(&list->list_lock);
+       spin_unlock_bh(&list->list_lock);
        call_rcu(&conn->rcu_head, __conn_free);
        return free_entry;
 }
@@ -160,7 +165,7 @@ find_or_evict(struct net *net, struct nf_conncount_list 
*list,
        const struct nf_conntrack_tuple_hash *found;
        unsigned long a, b;
        int cpu = raw_smp_processor_id();
-       __s32 age;
+       u32 age;
 
        found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
        if (found)
@@ -248,7 +253,7 @@ static void nf_conncount_list_init(struct nf_conncount_list 
*list)
 {
        spin_lock_init(&list->list_lock);
        INIT_LIST_HEAD(&list->head);
-       list->count = 1;
+       list->count = 0;
        list->dead = false;
 }
 
@@ -261,6 +266,7 @@ static bool nf_conncount_gc_list(struct net *net,
        struct nf_conn *found_ct;
        unsigned int collected = 0;
        bool free_entry = false;
+       bool ret = false;
 
        list_for_each_entry_safe(conn, conn_n, &list->head, node) {
                found = find_or_evict(net, list, conn, &free_entry);
@@ -290,7 +296,15 @@ static bool nf_conncount_gc_list(struct net *net,
                if (collected > CONNCOUNT_GC_MAX_NODES)
                        return false;
        }
-       return false;
+
+       spin_lock_bh(&list->list_lock);
+       if (!list->count) {
+               list->dead = true;
+               ret = true;
+       }
+       spin_unlock_bh(&list->list_lock);
+
+       return ret;
 }
 
 static void __tree_nodes_free(struct rcu_head *h)
@@ -310,11 +324,8 @@ static void tree_nodes_free(struct rb_root *root,
        while (gc_count) {
                rbconn = gc_nodes[--gc_count];
                spin_lock(&rbconn->list.list_lock);
-               if (rbconn->list.count == 0 && rbconn->list.dead == false) {
-                       rbconn->list.dead = true;
-                       rb_erase(&rbconn->node, root);
-                       call_rcu(&rbconn->rcu_head, __tree_nodes_free);
-               }
+               rb_erase(&rbconn->node, root);
+               call_rcu(&rbconn->rcu_head, __tree_nodes_free);
                spin_unlock(&rbconn->list.list_lock);
        }
 }
@@ -415,8 +426,9 @@ insert_tree(struct net *net,
        nf_conncount_list_init(&rbconn->list);
        list_add(&conn->node, &rbconn->list.head);
        count = 1;
+       rbconn->list.count = count;
 
-       rb_link_node(&rbconn->node, parent, rbnode);
+       rb_link_node_rcu(&rbconn->node, parent, rbnode);
        rb_insert_color(&rbconn->node, root);
 out_unlock:
        spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
-- 
2.7.4

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to