Plugin the actual worker core-dumper routine.

This is possibly a naive implementation, and looks very similar to
do_coredump(), with certain subtle differences:

- We currently don't honour rlimits (should we?). The coredump_filter
  though, is honoured.
- We build the core_state and core_thread chains on the fly.
- No pipe support. The core is saved in the format core.<pid>.<timestamp>
  where the timestamp is the ktime_get() value when the dump occurs.

Certainly, there are assumptions I've made that may be incorrect. The
prototype works for the most part. Only occasionally, a subtle race
which am yet to determine, causes a failure. As with fatal dumps, gdb is
able to decode the core.


Signed-off-by: Ananth N Mavinakayanahalli <ana...@in.ibm.com>
---
 fs/proc/proc_gencore.c |  144 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

Index: utrace-13jul/fs/proc/proc_gencore.c
===================================================================
--- utrace-13jul.orig/fs/proc/proc_gencore.c
+++ utrace-13jul/fs/proc/proc_gencore.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/seq_file.h>
+#include <linux/binfmts.h>
 #include <linux/utrace.h>
 #include "internal.h"
 
@@ -47,14 +48,153 @@ struct core_task {
        bool quiesced;
 };
 
+static void cleanup_core_state(struct core_state *core_state)
+{
+       struct core_thread *core_thread, *temp;
+
+       if (!core_state)
+               return;
+
+       core_thread = core_state->dumper.next;
+       while (core_thread != NULL) {
+               temp = core_thread->next;
+               kfree(core_thread);
+               core_thread = temp;
+       }
+       kfree(core_state);
+}
+
+static struct core_state *build_core_state(struct core_proc *core_proc)
+{
+       struct task_struct *t = current;
+       struct core_task *core_task, *temp;
+       struct core_state *core_state;
+
+       if (!core_proc)
+               return ERR_PTR(-EINVAL);
+
+       /*
+        * We already know all threads have either been quiesced or blocked
+        * in the kernel. Even for threads bloced in the kernel, we know for
+        * sure that they'll not run userspace before quiescing.
+        *
+        * If this is a single-threaded process and its already exiting,
+        * we still hold the mmap_sem and we'll also see it in core_exit.
+        */
+       core_state = kzalloc(sizeof(*core_state), GFP_KERNEL);
+       if (!core_state)
+               return ERR_PTR(-ENOMEM);
+
+       core_state->dumper.task = t;
+       core_state->dumper.next = NULL;
+       t->mm->core_state = core_state;
+       /* Make sure this is visible to do_exit */
+       wmb();
+
+       list_for_each_entry_safe(core_task, temp, &core_proc->list, list) {
+               struct core_thread *core_thread;
+
+               if (core_task->task == t)
+                       continue;
+               core_thread = kzalloc(sizeof(*core_thread), GFP_KERNEL);
+               if (!core_thread) {
+                       cleanup_core_state(core_state);
+                       return ERR_PTR(-ENOMEM);
+               }
+               core_thread->task = core_task->task;
+               core_thread->next = xchg(&core_state->dumper.next,
+                               core_thread);
+       }
+       return core_state;
+}
+
+/*
+ * A 0 or negative return => failure.
+ * elf_core_dump returns 1 on SUCCESS
+ */
+static int write_core(struct core_proc *core_proc)
+{
+       struct core_state *core_state;
+       char corename[CORENAME_MAX_SIZE + 1];
+       struct mm_struct *mm = current->mm;
+       struct linux_binfmt *binfmt;
+       struct inode *inode;
+       struct file *file;
+       int retval = -1;
+       int flag = 0;
+
+       binfmt = current->binfmt;
+       if (!core_proc || !binfmt || !binfmt->core_dump)
+               return -EINVAL;
+
+       down_write(&mm->mmap_sem);
+       if (mm->core_state || !get_dumpable(mm)) {
+               up_write(&mm->mmap_sem);
+               return -EALREADY;
+       }
+
+       core_state = build_core_state(core_proc);
+       if (IS_ERR(core_state)) {
+               up_write(&mm->mmap_sem);
+               return PTR_ERR(core_state);
+       }
+
+       up_write(&mm->mmap_sem);
+
+       /* For now, core files will have the core.<pid>.timestamp format */
+       memset(corename, 0x00, (CORENAME_MAX_SIZE + 1));
+       strncpy(corename, "core.", strlen("core."));
+       snprintf((corename + strlen(corename)),
+                       (CORENAME_MAX_SIZE - strlen(corename)),
+                       "%d.", task_tgid_vnr(current));
+       snprintf((corename + strlen(corename)),
+                       (CORENAME_MAX_SIZE - strlen(corename)),
+                       "%lld", (ktime_to_us(ktime_get())));
+
+       file = filp_open(corename,
+                       O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+                       0600);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+       inode = file->f_path.dentry->d_inode;
+       if (inode->i_nlink > 1)
+               goto close_fail;
+       if (d_unhashed(file->f_path.dentry))
+               goto close_fail;
+       if (!S_ISREG(inode->i_mode))
+               goto close_fail;
+       if (inode->i_uid != current_fsuid())
+               goto close_fail;
+       if (!file->f_op)
+               goto close_fail;
+       if (!file->f_op->write)
+               goto close_fail;
+       if (do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+               goto close_fail;
+
+       /* XXX coresize is currently hardcoded. need to fix that */
+       retval = binfmt->core_dump(0, NULL, file, 0xFFFFFFFF);
+
+close_fail:
+       filp_close(file, NULL);
+       cleanup_core_state(core_state);
+       current->mm->core_state = NULL;
+       wmb();
+       return retval;
+}
+
 static void cleanup_core_proc(struct core_proc *core_proc)
 {
        struct core_task *core_task, *temp;
+       struct core_state *core_state = core_proc->tgid_task->mm->core_state;
        int ret;
 
        if (core_proc == NULL)
                return;
 
+       if (core_state)
+               cleanup_core_state(core_state);
+
        spin_lock(&core_proc->lock);
        if (list_empty(&core_proc->list))
                goto out;
@@ -215,6 +355,10 @@ static u32 core_quiesce(enum utrace_resu
                        atomic_read(&core_proc->num_threads)) {
 
                /* All threads quiesced, do your thing :-) */
+               int ret = write_core(core_proc);
+               if (ret == -EALREADY)
+                       /* Dump already in progress, hold thread */
+                       return UTRACE_STOP;
 
                /* Let everyone run, indicate we are done! */
                quiesce_all_threads(core_proc, false);

Reply via email to