Plugin the actual worker core-dumper routine. This is possibly a naive implementation, and looks very similar to do_coredump(), with certain subtle differences:
- We currently don't honour rlimits (should we?). The coredump_filter though, is honoured. - We build the core_state and core_thread chains on the fly. - No pipe support. The core is saved in the format core.<pid>.<timestamp> where the timestamp is the ktime_get() value when the dump occurs. Certainly, there are assumptions I've made that may be incorrect. The prototype works for the most part. Only occasionally, a subtle race which am yet to determine, causes a failure. As with fatal dumps, gdb is able to decode the core. Signed-off-by: Ananth N Mavinakayanahalli <ana...@in.ibm.com> --- fs/proc/proc_gencore.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) Index: utrace-13jul/fs/proc/proc_gencore.c =================================================================== --- utrace-13jul.orig/fs/proc/proc_gencore.c +++ utrace-13jul/fs/proc/proc_gencore.c @@ -19,6 +19,7 @@ */ #include <linux/seq_file.h> +#include <linux/binfmts.h> #include <linux/utrace.h> #include "internal.h" @@ -47,14 +48,153 @@ struct core_task { bool quiesced; }; +static void cleanup_core_state(struct core_state *core_state) +{ + struct core_thread *core_thread, *temp; + + if (!core_state) + return; + + core_thread = core_state->dumper.next; + while (core_thread != NULL) { + temp = core_thread->next; + kfree(core_thread); + core_thread = temp; + } + kfree(core_state); +} + +static struct core_state *build_core_state(struct core_proc *core_proc) +{ + struct task_struct *t = current; + struct core_task *core_task, *temp; + struct core_state *core_state; + + if (!core_proc) + return ERR_PTR(-EINVAL); + + /* + * We already know all threads have either been quiesced or blocked + * in the kernel. Even for threads bloced in the kernel, we know for + * sure that they'll not run userspace before quiescing. + * + * If this is a single-threaded process and its already exiting, + * we still hold the mmap_sem and we'll also see it in core_exit. + */ + core_state = kzalloc(sizeof(*core_state), GFP_KERNEL); + if (!core_state) + return ERR_PTR(-ENOMEM); + + core_state->dumper.task = t; + core_state->dumper.next = NULL; + t->mm->core_state = core_state; + /* Make sure this is visible to do_exit */ + wmb(); + + list_for_each_entry_safe(core_task, temp, &core_proc->list, list) { + struct core_thread *core_thread; + + if (core_task->task == t) + continue; + core_thread = kzalloc(sizeof(*core_thread), GFP_KERNEL); + if (!core_thread) { + cleanup_core_state(core_state); + return ERR_PTR(-ENOMEM); + } + core_thread->task = core_task->task; + core_thread->next = xchg(&core_state->dumper.next, + core_thread); + } + return core_state; +} + +/* + * A 0 or negative return => failure. + * elf_core_dump returns 1 on SUCCESS + */ +static int write_core(struct core_proc *core_proc) +{ + struct core_state *core_state; + char corename[CORENAME_MAX_SIZE + 1]; + struct mm_struct *mm = current->mm; + struct linux_binfmt *binfmt; + struct inode *inode; + struct file *file; + int retval = -1; + int flag = 0; + + binfmt = current->binfmt; + if (!core_proc || !binfmt || !binfmt->core_dump) + return -EINVAL; + + down_write(&mm->mmap_sem); + if (mm->core_state || !get_dumpable(mm)) { + up_write(&mm->mmap_sem); + return -EALREADY; + } + + core_state = build_core_state(core_proc); + if (IS_ERR(core_state)) { + up_write(&mm->mmap_sem); + return PTR_ERR(core_state); + } + + up_write(&mm->mmap_sem); + + /* For now, core files will have the core.<pid>.timestamp format */ + memset(corename, 0x00, (CORENAME_MAX_SIZE + 1)); + strncpy(corename, "core.", strlen("core.")); + snprintf((corename + strlen(corename)), + (CORENAME_MAX_SIZE - strlen(corename)), + "%d.", task_tgid_vnr(current)); + snprintf((corename + strlen(corename)), + (CORENAME_MAX_SIZE - strlen(corename)), + "%lld", (ktime_to_us(ktime_get()))); + + file = filp_open(corename, + O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, + 0600); + if (IS_ERR(file)) + return PTR_ERR(file); + inode = file->f_path.dentry->d_inode; + if (inode->i_nlink > 1) + goto close_fail; + if (d_unhashed(file->f_path.dentry)) + goto close_fail; + if (!S_ISREG(inode->i_mode)) + goto close_fail; + if (inode->i_uid != current_fsuid()) + goto close_fail; + if (!file->f_op) + goto close_fail; + if (!file->f_op->write) + goto close_fail; + if (do_truncate(file->f_path.dentry, 0, 0, file) != 0) + goto close_fail; + + /* XXX coresize is currently hardcoded. need to fix that */ + retval = binfmt->core_dump(0, NULL, file, 0xFFFFFFFF); + +close_fail: + filp_close(file, NULL); + cleanup_core_state(core_state); + current->mm->core_state = NULL; + wmb(); + return retval; +} + static void cleanup_core_proc(struct core_proc *core_proc) { struct core_task *core_task, *temp; + struct core_state *core_state = core_proc->tgid_task->mm->core_state; int ret; if (core_proc == NULL) return; + if (core_state) + cleanup_core_state(core_state); + spin_lock(&core_proc->lock); if (list_empty(&core_proc->list)) goto out; @@ -215,6 +355,10 @@ static u32 core_quiesce(enum utrace_resu atomic_read(&core_proc->num_threads)) { /* All threads quiesced, do your thing :-) */ + int ret = write_core(core_proc); + if (ret == -EALREADY) + /* Dump already in progress, hold thread */ + return UTRACE_STOP; /* Let everyone run, indicate we are done! */ quiesce_all_threads(core_proc, false);