We currently have /proc/<pid>/net/rpc/kill-tasks feature for aborting pending RPC tasks. To fast-stop container, we need to abort all RPC tasks, so we must iterate over all VE PIDs.
There are several problems with this approach: 1. In real life with cgroups-v2 we just process container init PID, all other network namespaces are ignored. 2. There is a risk of breaking unrelated NFS connections if a PID, that was a container process, is reused while we are stopping container. 3. This may be slow - container might have a lot of processes and a few network namespaces. To fix this all, create a per-ve interface for aborting RPC requests in all VE network namespaces. https://virtuozzo.atlassian.net/browse/VSTOR-126316 Feature: improve kill-tasks Signed-off-by: Vladimir Riabchun <[email protected]> --- v1 -> v2: - Add __rcu annotation for rpc_kill_net_fn in ve.c - Use rcu_assign_pointer/rcu_dereference instead of {READ, WRITE}_ONCE include/linux/sunrpc/clnt.h | 2 ++ include/linux/ve.h | 4 ++++ kernel/ve/ve.c | 44 +++++++++++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 3 ++- net/sunrpc/sunrpc_syms.c | 3 +++ 5 files changed, 55 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 0133aeba248a..0e7c7c9107a2 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -253,6 +253,8 @@ void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *, const char *rpc_proc_name(const struct rpc_task *task); + +void rpc_kill_tasks(struct net *net); int rpc_task_kill_proc_init(struct net *net); void rpc_task_kill_proc_fini(struct net *net); diff --git a/include/linux/ve.h b/include/linux/ve.h index 224acf012821..95a83c7bc7de 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -279,6 +279,8 @@ extern bool is_ve_init_net(const struct net *net); void ve_setup_task(struct task_struct *p, struct ve_struct *ve); +void ve_set_rpc_kill_fn(void (*fn)(struct net *)); + #else /* CONFIG_VE */ #include <linux/init_task.h> #define get_ve(ve) ((void)(ve), NULL) @@ -336,6 +338,8 @@ static inline int vz_security_protocol_check(struct net *net, int protocol) { re static inline void ve_setup_task(struct task_struct *p, struct ve_struct *ve) { } +static inline void ve_set_rpc_kill_fn(void (*fn)(struct net *)) { } + #endif /* CONFIG_VE */ struct seq_file; diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index 198c82f010cc..c05108f0ecd3 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -113,6 +113,8 @@ EXPORT_SYMBOL(nr_ve); static DEFINE_IDR(ve_idr); +static void (__rcu *rpc_kill_net_fn)(struct net *); + struct ve_struct *get_ve(struct ve_struct *ve) { if (ve) @@ -1713,6 +1715,43 @@ static ssize_t ve_write_ctty(struct kernfs_open_file *of, char *buf, return ret; } +void ve_set_rpc_kill_fn(void (*fn)(struct net *)) +{ + rcu_assign_pointer(rpc_kill_net_fn, fn); + /* ve_rpc_kill_write might be using old function. + * rpc_kill_tasks -> NULL is dangerous, block sunrpc exit while we use + * functions from this module + */ + synchronize_rcu(); +} +EXPORT_SYMBOL(ve_set_rpc_kill_fn); + +static int ve_rpc_kill_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 val) +{ + struct net *net; + struct ve_struct *ve = css_to_ve(css); + void (*fn)(struct net *net); + + guard(rwsem_read)(&net_rwsem); /* for_each_net protection */ + guard(rcu)(); /* Begin rpc_kill_net_fn usage section */ + + fn = rcu_dereference(rpc_kill_net_fn); + if (!fn) { + pr_info_ratelimited("SUNRPC module is not loaded.\n"); + return 0; + } + + for_each_net(net) { + if (net->owner_ve != ve) + continue; + /* rpc_kill_tasks is atomic. */ + fn(net); + } + + return 0; +} + static struct cftype ve_cftypes[] = { { @@ -1808,6 +1847,11 @@ static struct cftype ve_cftypes[] = { .flags = CFTYPE_ONLY_ON_ROOT, .write = ve_write_ctty, }, + { + .name = "rpc_kill", + .flags = CFTYPE_NOT_ON_ROOT, + .write_u64 = ve_rpc_kill_write, + }, { } }; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cf77eec661c1..2cda358894be 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -3406,8 +3406,9 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); #endif /* CONFIG_SUNRPC_SWAP */ -static void rpc_kill_tasks(struct net *net) +void rpc_kill_tasks(struct net *net) { + /* Note: function must be atomic, used under RCU read-lock. */ struct rpc_clnt *clnt; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index bab6cab29405..0cb280569135 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -13,6 +13,7 @@ #include <linux/uio.h> #include <linux/unistd.h> #include <linux/init.h> +#include <linux/ve.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/clnt.h> @@ -114,6 +115,7 @@ init_sunrpc(void) #endif svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ + ve_set_rpc_kill_fn(rpc_kill_tasks); return 0; out5: @@ -131,6 +133,7 @@ init_sunrpc(void) static void __exit cleanup_sunrpc(void) { + ve_set_rpc_kill_fn(NULL); rpc_sysfs_exit(); rpc_cleanup_clids(); xprt_cleanup_ids(); -- 2.47.1 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
