Commited to rh10-6.12.0-55.13.1.2.12.vz10

On 10/21/25 17:18, Aleksei Oladko wrote:
This patch reintroduces the feature from commit 6d43ed1, which was reverted
due to a scenario where a hanging user-mode-helper in one container could
block the startup of other containers.

Race can be reproduced with steps below:

1) Add these test patch to increase the race probability:

kernel/umh.c
@@ -455,6 +456,8 @@ int call_usermodehelper_exec_ve(struct ve_struct *ve,
                 sub_info->queue = call_usermodehelper_queue_ve;
                 kthread_init_work(&sub_info->ve_work,
                                   call_usermodehelper_exec_work_ve);
+             while (VE_IS_RUNNING(ve))
+                       cond_resched();
         } else {
                 sub_info->queue = call_usermodehelper_queue;
                 INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);

2) Set corepattern to type pipe in CT:

echo "|/bin/dd of=/vz/coredumps/core-%e-%t.%p" > /proc/sys/kernel/core_pattern

3) Generate high CPU load on all cores using tools like stress-ng

stress-ng --futex $(nproc) --timeout 60s

4) Produce a segfault inside a container and next try to stop the
container killing init.

Coredump creates "dd" work and ads it to ve_umh_worker, which is already
stopped and will never handle these work, and our work will hang
forever, and container will never stop:

[<0>] call_usermodehelper_exec+0x168/0x1a0
[<0>] call_usermodehelper_exec_ve+0x96/0xe0
[<0>] do_coredump+0x60f/0xf40
[<0>] get_signal+0x834/0x960
[<0>] arch_do_signal_or_restart+0x29/0xf0
[<0>] irqentry_exit_to_user_mode+0x12e/0x1a0
[<0>] asm_exc_page_fault+0x26/0x30

Fix is:

1) Before calling call_usermodehelper_exec for a ve, check that
the ve is running before adding work.

2) Add separate hepler counters for each ve.

3) In ve_stop_ns, after setting the VE_STATE_STOPPING state,
wait for the running helpers count to reach 0 before stopping umh.

There are 2 cases:

If the call_usermodehelper_exec_ve thread reaches call_usermodehelper_exec
after ve_stop_ns started wait_khelpers, it will already see that
the ve is no in the running state and will no queue the work.

If the call_usermodehelper_exec_ve thread reaches call_usermodehelper_exec
before the VE_STATE_STOPPING state is set for the ve, then ve_stop_ns will
wait in wait_khelpers until all works have been queued. After that all
queued works will be processed in kthread_flush_worker.

https://virtuozzo.atlassian.net/browse/VSTOR-106887

Signed-off-by: Aleksei Oladko <[email protected]>
---
  include/linux/umh.h |  2 ++
  include/linux/ve.h  |  2 ++
  kernel/umh.c        | 27 ++++++++++++++++++++++++++-
  kernel/ve/ve.c      |  5 +++++
  4 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/include/linux/umh.h b/include/linux/umh.h
index 5647f1e64e39..35fc4023df74 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -56,6 +56,8 @@ extern int
  call_usermodehelper_exec_ve(struct ve_struct *ve,
                            struct subprocess_info *info, int wait);
+extern void wait_khelpers(struct ve_struct *ve);
+
  #else /* CONFIG_VE */
#define call_usermodehelper_ve(ve, ...) \
diff --git a/include/linux/ve.h b/include/linux/ve.h
index e944132f972f..37562dff25aa 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -93,6 +93,8 @@ struct ve_struct {
        struct kthread_worker   *kthreadd_worker;
        struct task_struct      *kthreadd_task;
+ atomic_t umh_running_helpers;
+       struct wait_queue_head  umh_helpers_waitq;
        struct kthread_worker   umh_worker;
        struct task_struct      *umh_task;
diff --git a/kernel/umh.c b/kernel/umh.c
index bd49c108eb90..699403efe382 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -447,9 +447,30 @@ static void call_usermodehelper_exec_work_ve(struct 
kthread_work *work)
        }
  }
+static void umh_running_helpers_inc(struct ve_struct *ve)
+{
+       atomic_inc(&ve->umh_running_helpers);
+       smp_mb__after_atomic();
+}
+
+static void umh_running_helpers_dec(struct ve_struct *ve)
+{
+       if (atomic_dec_and_test(&ve->umh_running_helpers))
+               wake_up(&ve->umh_helpers_waitq);
+}
+
+void wait_khelpers(struct ve_struct *ve)
+{
+       wait_event(ve->umh_helpers_waitq,
+                  atomic_read(&ve->umh_running_helpers) == 0);
+}
+EXPORT_SYMBOL(wait_khelpers);
+
  int call_usermodehelper_exec_ve(struct ve_struct *ve,
                                struct subprocess_info *sub_info, int wait)
  {
+       int ret = -EBUSY;
+
        if (!ve_is_super(ve)) {
                sub_info->ve = ve;
                sub_info->queue = call_usermodehelper_queue_ve;
@@ -460,7 +481,11 @@ int call_usermodehelper_exec_ve(struct ve_struct *ve,
                INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
        }
- return call_usermodehelper_exec(sub_info, wait);
+       umh_running_helpers_inc(ve);
+       if (VE_IS_RUNNING(ve))
+               ret = call_usermodehelper_exec(sub_info, wait);
+       umh_running_helpers_dec(ve);
+       return ret;
  }
  EXPORT_SYMBOL(call_usermodehelper_exec_ve);
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index cb77f7b7e4cd..d5dc15942ab5 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -88,6 +88,8 @@ struct ve_struct ve0 = {
        .nd_neigh_nr            = ATOMIC_INIT(0),
        .mnt_nr                 = ATOMIC_INIT(0),
        .meminfo_val            = VE_MEMINFO_SYSTEM,
+       .umh_running_helpers    = ATOMIC_INIT(0),
+       .umh_helpers_waitq      = 
__WAIT_QUEUE_HEAD_INITIALIZER(ve0.umh_helpers_waitq),
        .vdso_64                = (struct vdso_image*)&vdso_image_64,
        .vdso_32                = (struct vdso_image*)&vdso_image_32,
        .release_list_lock      = __SPIN_LOCK_UNLOCKED(
@@ -480,6 +482,8 @@ static int ve_start_umh(struct ve_struct *ve)
  {
        struct task_struct *task;
+ atomic_set(&ve->umh_running_helpers, 0);
+       init_waitqueue_head(&ve->umh_helpers_waitq);
        kthread_init_worker(&ve->umh_worker);
task = kthread_create_on_node_ve_flags(ve, 0, kthread_worker_fn,
@@ -814,6 +818,7 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
         */
        up_write(&ve->op_sem);
+ wait_khelpers(ve);
        /*
         * release_agent works on top of umh_worker, so we must make sure, that
         * ve workqueue is stopped first.

--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to