Module Name: src Committed By: kre Date: Wed Nov 9 00:30:17 UTC 2016
Modified Files: src/sys/kern: kern_exit.c Log Message: PR kern/51600 ; PR standards/51606 Revert 1.264 - that was intended to fix 51600, but didn't, it just hid the problem, and caused 51606. This fixes 51606. Handle waiting on a process that has been detatched from its parent because of being ptrace'd by some other process. This fixes 51600. ("handle" here means that the wait() hangs, or with WNOHANG, returns 0, we cannot actually wait on a process that is not currently an attached child.) Note: the detatched process waiting is not yet perfect (it fails to take account of options like WALLSIG and WALTSIG) - suport for those (that is, ignoring a detatched child that one of those options will later cause to be ignored when the process is re-attached.) For now, for ither than when waiting for a specific process ID, when a process does a wait() sys call (any of them), has no applicable children attached that can be returned, and has at least one detatched child, then we do a linear search of all processes to look for a suitable detatched child. This is likely to be slow - but very rare. Eventually it might be better to keep a list of detatched children per process. To generate a diff of this commit: cvs rdiff -u -r1.264 -r1.265 src/sys/kern/kern_exit.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_exit.c diff -u src/sys/kern/kern_exit.c:1.264 src/sys/kern/kern_exit.c:1.265 --- src/sys/kern/kern_exit.c:1.264 Sat Nov 5 02:59:22 2016 +++ src/sys/kern/kern_exit.c Wed Nov 9 00:30:17 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_exit.c,v 1.264 2016/11/05 02:59:22 christos Exp $ */ +/* $NetBSD: kern_exit.c,v 1.265 2016/11/09 00:30:17 kre Exp $ */ /*- * Copyright (c) 1998, 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.264 2016/11/05 02:59:22 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.265 2016/11/09 00:30:17 kre Exp $"); #include "opt_ktrace.h" #include "opt_dtrace.h" @@ -826,7 +826,7 @@ sys_wait6(struct lwp *l, const struct sy * 2: This is the only match */ static int -match_process(struct proc *pp, struct proc **q, idtype_t idtype, id_t id, +match_process(const struct proc *pp, struct proc **q, idtype_t idtype, id_t id, int options, struct wrusage *wrusage, siginfo_t *siginfo) { struct rusage *rup; @@ -930,6 +930,66 @@ match_process(struct proc *pp, struct pr } /* + * Determine if there are existing processes being debugged + * that used to be (and sometime later will be again) children + * of a specific parent (while matching wait criteria) + */ +static bool +debugged_child_exists(idtype_t idtype, id_t id, int options, siginfo_t *si, + const struct proc *parent) +{ + struct proc *pp; + + /* + * If we are searching for a specific pid, we can optimise a little + */ + if (idtype == P_PID) { + /* + * Check the specific process to see if its real parent is us + */ + pp = proc_find_raw((pid_t)id); + if (pp != NULL && pp->p_stat != SIDL && pp->p_opptr == parent) { + /* + * using P_ALL here avoids match_process() doing the + * same work that we just did, but incorrectly for + * this scenario. + */ + if (match_process(parent, &pp, P_ALL, id, options, + NULL, si)) + return true; + } + return false; + } + + /* + * For the hard cases, just look everywhere to see if some + * stolen (reparented) process is really our lost child. + * Then check if that process could satisfy the wait conditions. + */ + + /* + * XXX inefficient, but hopefully fairly rare. + * XXX should really use a list of reparented processes. + */ + PROCLIST_FOREACH(pp, &allproc) { + if (pp->p_stat == SIDL) /* XXX impossible ?? */ + continue; + if (pp->p_opptr == parent && + match_process(parent, &pp, idtype, id, options, NULL, si)) + return true; + } + PROCLIST_FOREACH(pp, &zombproc) { + if (pp->p_stat == SIDL) /* XXX impossible ?? */ + continue; + if (pp->p_opptr == parent && + match_process(parent, &pp, idtype, id, options, NULL, si)) + return true; + } + + return false; +} + +/* * Scan list of child processes for a child process that has stopped or * exited. Used by sys_wait4 and 'compat' equivalents. * @@ -940,7 +1000,7 @@ find_stopped_child(struct proc *parent, struct proc **child_p, struct wrusage *wru, siginfo_t *si) { struct proc *child, *dead; - int error, nohang; + int error; KASSERT(mutex_owned(proc_lock)); @@ -969,7 +1029,6 @@ find_stopped_child(struct proc *parent, idtype = P_PGID; } - nohang = (options & WNOHANG) != 0; for (;;) { error = ECHILD; dead = NULL; @@ -1053,9 +1112,22 @@ find_stopped_child(struct proc *parent, } } - if (child != NULL || error != 0 || (nohang && dead == NULL)) { + /* + * If we found nothing, but we are the bereaved parent + * of a stolen child, look and see if that child (or + * one of them) meets our search criteria. If so, then + * we cannot succeed, but we can hang (wait...), + * or if WNOHANG, return 0 instead of ECHILD + */ + if (child == NULL && error == ECHILD && + (parent->p_slflag & PSL_CHTRACED) && + debugged_child_exists(idtype, id, options, si, parent)) + error = 0; + + if (child != NULL || error != 0 || + ((options & WNOHANG) != 0 && dead == NULL)) { *child_p = child; - return (nohang && error == ECHILD) ? 0 : error; + return error; } /*