From: "Palik, Imre" <im...@amazon.de>

When file auditing is enabled, during a low memory situation, a memory
allocation with __GFP_FS can lead to pruning the inode cache.  Which can,
in turn lead to audit_tree_freeing_mark() being called.  This can call
audit_schedule_prune(), that tries to fork a pruning thread, and
waits until the thread is created.  But forking needs memory, and the
memory allocations there are done with __GFP_FS.

So we are waiting merrily for some __GFP_FS memory allocations to complete,
while holding some filesystem locks.  This can take a while ...

This patch creates a single thread for pruning the tree from
audit_add_tree_rule(), and thus avoids the deadlock that the on-demand thread
creation can cause.

Reported-by: Matt Wilson <m...@amazon.com>
Cc: Matt Wilson <m...@amazon.com>
Signed-off-by: Imre Palik <im...@amazon.de>
---
 kernel/audit_tree.c |   91 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 63 insertions(+), 28 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0caf1f8..0ada577 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -37,6 +37,7 @@ struct audit_chunk {
 
 static LIST_HEAD(tree_list);
 static LIST_HEAD(prune_list);
+static struct task_struct *prune_thread;
 
 /*
  * One struct chunk is attached to each inode of interest.
@@ -641,6 +642,55 @@ static int tag_mount(struct vfsmount *mnt, void *arg)
        return tag_chunk(mnt->mnt_root->d_inode, arg);
 }
 
+/*
+ * That gets run when evict_chunk() ends up needing to kill audit_tree.
+ * Runs from a separate thread.
+ */
+static int prune_tree_thread(void *unused)
+{
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (list_empty(&prune_list))
+                       schedule();
+               __set_current_state(TASK_RUNNING);
+
+               mutex_lock(&audit_cmd_mutex);
+               mutex_lock(&audit_filter_mutex);
+
+               while (!list_empty(&prune_list)) {
+                       struct audit_tree *victim;
+
+                       victim = list_entry(prune_list.next,
+                                       struct audit_tree, list);
+                       list_del_init(&victim->list);
+
+                       mutex_unlock(&audit_filter_mutex);
+
+                       prune_one(victim);
+
+                       mutex_lock(&audit_filter_mutex);
+               }
+
+               mutex_unlock(&audit_filter_mutex);
+               mutex_unlock(&audit_cmd_mutex);
+       }
+       return 0;
+}
+
+static int launch_prune_thread(void)
+{
+       prune_thread = kthread_create(prune_tree_thread, NULL,
+                               "audit_prune_tree");
+       if (IS_ERR(prune_thread)) {
+               audit_panic("cannot start thread audit_prune_tree");
+               prune_thread = NULL;
+               return -ENOSYS;
+       } else {
+               wake_up_process(prune_thread);
+               return 0;
+       }
+}
+
 /* called with audit_filter_mutex */
 int audit_add_tree_rule(struct audit_krule *rule)
 {
@@ -663,6 +713,12 @@ int audit_add_tree_rule(struct audit_krule *rule)
        /* do not set rule->tree yet */
        mutex_unlock(&audit_filter_mutex);
 
+       if (unlikely(!prune_thread)) {
+               err = launch_prune_thread();
+               if (err)
+                       goto Err;
+       }
+
        err = kern_path(tree->pathname, 0, &path);
        if (err)
                goto Err;
@@ -713,6 +769,9 @@ int audit_tag_tree(char *old, char *new)
        struct vfsmount *tagged;
        int err;
 
+       if (!prune_thread)
+               return -ENOSYS;
+
        err = kern_path(new, 0, &path2);
        if (err)
                return err;
@@ -800,36 +859,11 @@ int audit_tag_tree(char *old, char *new)
        return failed;
 }
 
-/*
- * That gets run when evict_chunk() ends up needing to kill audit_tree.
- * Runs from a separate thread.
- */
-static int prune_tree_thread(void *unused)
-{
-       mutex_lock(&audit_cmd_mutex);
-       mutex_lock(&audit_filter_mutex);
-
-       while (!list_empty(&prune_list)) {
-               struct audit_tree *victim;
-
-               victim = list_entry(prune_list.next, struct audit_tree, list);
-               list_del_init(&victim->list);
-
-               mutex_unlock(&audit_filter_mutex);
-
-               prune_one(victim);
-
-               mutex_lock(&audit_filter_mutex);
-       }
-
-       mutex_unlock(&audit_filter_mutex);
-       mutex_unlock(&audit_cmd_mutex);
-       return 0;
-}
 
 static void audit_schedule_prune(void)
 {
-       kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+       BUG_ON(!prune_thread);
+       wake_up_process(prune_thread);
 }
 
 /*
@@ -896,9 +930,10 @@ static void evict_chunk(struct audit_chunk *chunk)
        for (n = 0; n < chunk->count; n++)
                list_del_init(&chunk->owners[n].list);
        spin_unlock(&hash_lock);
+       mutex_unlock(&audit_filter_mutex);
        if (need_prune)
                audit_schedule_prune();
-       mutex_unlock(&audit_filter_mutex);
+
 }
 
 static int audit_tree_handle_event(struct fsnotify_group *group,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to