The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear 
at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.32
------>
commit 0eea24f6f2cbcd40d90a611e0f528bcfad337125
Author: Konstantin Khorenko <[email protected]>
Date:   Mon May 24 15:58:49 2021 +0300

    ve/netfilter: Implement pernet net->ct.max / virtualize "nf_conntrack_max" 
sysctl
    
    Rebasing and splitting netfilters sybsystem
    (port 66-diff-ve-net-netfilter-combined).
    Part 1.
    
    https://jira.sw.ru/browse/PSBM-18322
    
    Signed-off-by: Kirill Tkhai <[email protected]>
    
    (cherry picked from vz7 commit c34a99c00f9d ("ve/netfilter: Implement
    pernet net->ct.max / virtualize "nf_conntrack_max" sysctl"))
    
    VZ 8 rebase part https://jira.sw.ru/browse/PSBM-127783
    
    Signed-off-by: Alexander Mikhalitsyn <[email protected]>
    
    +++
    ve/nf_conntrack: expose "nf_conntrack_max" in containers
    
    Series:
    This series brings to vz7 all the nf_conntrack sysctl's,
    which are available in vz6.
    
    https://jira.sw.ru/browse/PSBM-40044
    
    This sysctl table contains only one entry: "/proc/sys/net/nf_conntrack_max".
    This is now visible inside ct.
    However, have to say, that "/proc/sys/net/netfilter/nf_conntrack_max" and
    friends (despite on they are containerized) arebehind init_user_ns.
    
    Signed-off-by: Stanislav Kinsburskiy <[email protected]>
    Reviewed-by: Kirill Tkhai <[email protected]>
    
    (cherry picked from vz7 commit 9d3a8c692557 ("ve/nf_conntrack: expose
    "nf_conntrack_max" in containers"))
    
    VZ 8 rebase part https://jira.sw.ru/browse/PSBM-127783
    
    Signed-off-by: Alexander Mikhalitsyn <[email protected]>
---
 include/net/netns/conntrack.h           |  2 +
 net/netfilter/nf_conntrack_core.c       | 19 ++++----
 net/netfilter/nf_conntrack_netlink.c    |  2 +-
 net/netfilter/nf_conntrack_standalone.c | 82 +++++++++++++++++++++++++--------
 4 files changed, 75 insertions(+), 30 deletions(-)

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index fe2331b75cc7..19bcf4173ccb 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -106,6 +106,7 @@ struct ct_pcpu {
 
 struct netns_ct {
        atomic_t                count;
+       unsigned int            max;
        unsigned int            expect_count;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
        struct delayed_work ecache_dwork;
@@ -113,6 +114,7 @@ struct netns_ct {
 #endif
        unsigned int            expect_max;
 #ifdef CONFIG_SYSCTL
+       struct ctl_table_header *netfilter_header;
        struct ctl_table_header *sysctl_header;
        struct ctl_table_header *acct_sysctl_header;
        struct ctl_table_header *tstamp_sysctl_header;
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index 23cbe8ed81e2..7deb88926a8c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -182,8 +182,6 @@ static void nf_conntrack_all_unlock(void)
 unsigned int nf_conntrack_htable_size __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
-unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
 seqcount_t nf_conntrack_generation __read_mostly;
 static unsigned int nf_conntrack_hash_rnd __read_mostly;
 
@@ -1345,7 +1343,6 @@ static void gc_worker(struct work_struct *work)
 {
        unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
        unsigned int i, goal, buckets = 0, expired_count = 0;
-       unsigned int nf_conntrack_max95 = 0;
        struct conntrack_gc_work *gc_work;
        unsigned int ratio, scanned = 0;
        unsigned long next_run;
@@ -1354,8 +1351,6 @@ static void gc_worker(struct work_struct *work)
 
        goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
        i = gc_work->last_bucket;
-       if (gc_work->early_drop)
-               nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
 
        do {
                struct nf_conntrack_tuple_hash *h;
@@ -1373,8 +1368,13 @@ static void gc_worker(struct work_struct *work)
 
                hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
                        struct net *net;
+                       unsigned int nf_conntrack_max95 = 0;
 
                        tmp = nf_ct_tuplehash_to_ctrack(h);
+                       net = nf_ct_net(tmp);
+
+                       if (gc_work->early_drop)
+                               nf_conntrack_max95 = net->ct.max / 100u * 95u;
 
                        scanned++;
                        if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
@@ -1391,7 +1391,6 @@ static void gc_worker(struct work_struct *work)
                        if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
                                continue;
 
-                       net = nf_ct_net(tmp);
                        if (atomic_read(&net->ct.count) < nf_conntrack_max95)
                                continue;
 
@@ -1471,13 +1470,14 @@ __nf_conntrack_alloc(struct net *net,
                     const struct nf_conntrack_tuple *repl,
                     gfp_t gfp, u32 hash)
 {
+       unsigned int ct_max = net->ct.max ? net->ct.max : init_net.ct.max;
        struct nf_conn *ct;
 
        /* We don't want any race condition at early drop stage */
        atomic_inc(&net->ct.count);
 
-       if (nf_conntrack_max &&
-           unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
+       if (ct_max &&
+           unlikely(atomic_read(&net->ct.count) > ct_max)) {
                if (!early_drop(net, hash)) {
                        if (!conntrack_gc_work.early_drop)
                                conntrack_gc_work.early_drop = true;
@@ -2635,7 +2635,7 @@ int nf_conntrack_init_start(void)
        if (!nf_conntrack_hash)
                return -ENOMEM;
 
-       nf_conntrack_max = max_factor * nf_conntrack_htable_size;
+       init_net.ct.max = max_factor * nf_conntrack_htable_size;
 
        nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
                                                sizeof(struct nf_conn),
@@ -2735,6 +2735,7 @@ int nf_conntrack_init_net(struct net *net)
 
        BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
        atomic_set(&net->ct.count, 0);
+       net->ct.max = init_net.ct.max;
 
        net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
        if (!net->ct.pcpu_lists)
diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index 0fb92033409a..9de8059325da 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2275,7 +2275,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 
portid, u32 seq, u32 type,
        if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
                goto nla_put_failure;
 
-       if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, 
htonl(nf_conntrack_max)))
+       if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, htonl(net->ct.max)))
                goto nla_put_failure;
 
        nlmsg_end(skb, nlh);
diff --git a/net/netfilter/nf_conntrack_standalone.c 
b/net/netfilter/nf_conntrack_standalone.c
index e4fcb939e19a..61aa2a7a8182 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -515,12 +515,10 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int 
write,
        return ret;
 }
 
-static struct ctl_table_header *nf_ct_netfilter_header;
-
 static struct ctl_table nf_ct_sysctl_table[] = {
        {
                .procname       = "nf_conntrack_max",
-               .data           = &nf_conntrack_max,
+               .data           = &init_net.ct.max,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
@@ -568,7 +566,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 static struct ctl_table nf_ct_netfilter_table[] = {
        {
                .procname       = "nf_conntrack_max",
-               .data           = &nf_conntrack_max,
+               .data           = &init_net.ct.max,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
@@ -576,6 +574,44 @@ static struct ctl_table nf_ct_netfilter_table[] = {
        { }
 };
 
+static int zero;
+
+static int nf_conntrack_netfilter_init_sysctl(struct net *net)
+{
+       struct ctl_table *table;
+
+       table = kmemdup(nf_ct_netfilter_table, sizeof(nf_ct_netfilter_table),
+                       GFP_KERNEL);
+       if (!table)
+               goto out_kmemdup;
+
+       table[0].data = &net->ct.max;
+
+       /* Don't export sysctls to unprivileged users */
+       if (ve_net_hide_sysctl(net))
+               table[0].procname = NULL;
+
+       net->ct.netfilter_header = register_net_sysctl(net, "net", table);
+       if (!net->ct.netfilter_header)
+               goto out_unregister_netfilter;
+
+       return 0;
+
+out_unregister_netfilter:
+       kfree(table);
+out_kmemdup:
+       return -ENOMEM;
+}
+
+static void nf_conntrack_netfilter_fini_sysctl(struct net *net)
+{
+       struct ctl_table *table;
+
+       table = net->ct.netfilter_header->ctl_table_arg;
+       unregister_net_sysctl_table(net->ct.netfilter_header);
+       kfree(table);
+}
+
 static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
        struct ctl_table *table;
@@ -585,6 +621,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net 
*net)
        if (!table)
                goto out_kmemdup;
 
+       table[0].data = &net->ct.max;
        table[1].data = &net->ct.count;
        table[3].data = &net->ct.sysctl_checksum;
        table[4].data = &net->ct.sysctl_log_invalid;
@@ -594,8 +631,12 @@ static int nf_conntrack_standalone_init_sysctl(struct net 
*net)
        if (ve_net_hide_sysctl(net))
                table[0].procname = NULL;
 
-       if (!net_eq(&init_net, net))
+       if (!net_eq(&init_net, net)) {
+               table[0].proc_handler = proc_dointvec_minmax;
+               table[0].extra1 = &zero;
+               table[0].extra2 = &init_net.ct.max;
                table[2].mode = 0444;
+       }
 
        net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", 
table);
        if (!net->ct.sysctl_header)
@@ -618,6 +659,15 @@ static void nf_conntrack_standalone_fini_sysctl(struct net 
*net)
        kfree(table);
 }
 #else
+static int nf_conntrack_netfilter_init_sysctl(struct net *net)
+{
+       return 0;
+}
+
+static void nf_conntrack_netfilter_fini_sysctl(struct net *net)
+{
+}
+
 static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
        return 0;
@@ -646,8 +696,14 @@ static int nf_conntrack_pernet_init(struct net *net)
        if (ret < 0)
                goto out_sysctl;
 
+       ret = nf_conntrack_netfilter_init_sysctl(net);
+       if (ret < 0)
+               goto out_netfilter_sysctl;
+
        return 0;
 
+out_netfilter_sysctl:
+       nf_conntrack_standalone_fini_sysctl(net);
 out_sysctl:
        nf_conntrack_standalone_fini_proc(net);
 out_proc:
@@ -661,6 +717,7 @@ static void nf_conntrack_pernet_exit(struct list_head 
*net_exit_list)
        struct net *net;
 
        list_for_each_entry(net, net_exit_list, exit_list) {
+               nf_conntrack_netfilter_fini_sysctl(net);
                nf_conntrack_standalone_fini_sysctl(net);
                nf_conntrack_standalone_fini_proc(net);
        }
@@ -684,14 +741,6 @@ static int __init nf_conntrack_standalone_init(void)
        BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER);
 
 #ifdef CONFIG_SYSCTL
-       nf_ct_netfilter_header =
-               register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
-       if (!nf_ct_netfilter_header) {
-               pr_err("nf_conntrack: can't register to sysctl.\n");
-               ret = -ENOMEM;
-               goto out_sysctl;
-       }
-
        nf_conntrack_htable_size_user = nf_conntrack_htable_size;
 #endif
 
@@ -703,10 +752,6 @@ static int __init nf_conntrack_standalone_init(void)
        return 0;
 
 out_pernet:
-#ifdef CONFIG_SYSCTL
-       unregister_net_sysctl_table(nf_ct_netfilter_header);
-out_sysctl:
-#endif
        nf_conntrack_cleanup_end();
 out_start:
        return ret;
@@ -716,9 +761,6 @@ static void __exit nf_conntrack_standalone_fini(void)
 {
        nf_conntrack_cleanup_start();
        unregister_pernet_subsys(&nf_conntrack_net_ops);
-#ifdef CONFIG_SYSCTL
-       unregister_net_sysctl_table(nf_ct_netfilter_header);
-#endif
        nf_conntrack_cleanup_end();
 }
 
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to