On Mon, Jul 29, 2013 at 3:34 PM, Roland Mainz <[email protected]> wrote:
> On Sat, Jul 27, 2013 at 6:17 PM, Glenn Fowler <[email protected]> wrote:
[snip]
> I hit (again) one of the nasty and hard to reproduce problems (which
> means: No... I don't have a testcase... ;-( ) related to job
> management:
> -- snip --
[snip]
> -- snip --
>
> ... this hang occured because ksh93 tries to do job management from
> within the signal handler... which is IMO _far_ from being safe to
> do...
>
> David: Is there *ANY* way the job management can be moved out of the
> signal handler and just rely on the siginfo data ?
Attached (as
"astksh20130727_prototype_process_sigchld_outside_signalhandler001.diff.txt")
is a prototype (hacked) patch for discussion.
* The good news are:
- SIGCHLD processing now seems to be reliable. I can't find a simple
testcase to break it (but see the item about the testsuite below)
- SIGCHLD processing now works without problems under "valgrind" control
- The patch seems to fix a lot of Cedric's issues with signals being
lost. Even the testcase where a $ ... while ! wait ; sleep 8 ; done #
was loosing signals now works... if I replace the "sleep 8" with
"compound -a dummy ; poll -t 8 dummy" (so there seems to be a seperate
problem with "sleep" ... somehow)
* The bad news are:
- The patch doesn't work very well for interactive shells
- The "sigchld.sh" testsuite module fails in several places... AFAIK
the reason is that we don't check the list of queued CHLD signals in
all places where they should be checked (e.g. each time before we
execute a shell statement and before and after running external
processes and each time wait(1) internally wakes up from a signal)
- $ jobs -l # reports already purged processes... erm... no clue why... ;-(
* Notes about interactive mode:
An interactive shell AFAIK requires to report terminating childs
"immediately" if some shell options are set (erm... David: Which
option is this again ?). AFAIK this can be archived by two ways:
1. wake up from time-to-time (e.g. polling) ... but this has a bad
effect in CPU usage (imagine 1000 shell sessions in a multi-user
system waking-up 10 times per second... or imagine the impact for
battery life in a laptop)
2. add some glue to the editor code to wake up for each signald and
process it when |sfpkrd()| returns...
-- snip --
(gdb) where
#0 0x00007fc2be588860 in __poll_nocancel () at
../sysdeps/unix/syscall-template.S:81
#1 0x000000000050280d in sfpkrd (fd=0, argbuf=0x7fff33545080, n=80,
rc=0, tm=-1, action=1)
at
/home/test001/work/ast_ksh_20130727/build_i386_64bit_debug_patched/src/lib/libast/sfio/sfpkrd.c:142
#2 0x0000000000410fb9 in ed_read (context=0x7fc2bf063960, fd=0,
buff=0x7fff33545080 "\240PT3\377\177", size=80, reedit=0)
at
/home/test001/work/ast_ksh_20130727/build_i386_64bit_debug_patched/src/cmd/ksh93/edit/edit.c:884
-- snip --
... that's the strategy my patch attempts...
David: What do you think ?
----
Bye,
Roland
--
__ . . __
(o.\ \/ /.o) [email protected]
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
diff -r -u original/src/cmd/ksh93/edit/edit.c
build_i386_64bit_debug/src/cmd/ksh93/edit/edit.c
--- src/cmd/ksh93/edit/edit.c 2013-03-29 00:02:06.000000000 +0100
+++ src/cmd/ksh93/edit/edit.c 2013-07-30 15:28:25.920003108 +0200
@@ -882,6 +882,8 @@
errno = 0;
if(!waitevent || (rv=(*waitevent)(fd,-1L,0))>=0)
rv = sfpkrd(fd,buff,size,delim,-1L,mode);
+
+ job_waitsafe(SIGCHLD, NULL, NULL);
}
if(rv < 0)
{
diff -r -u original/src/cmd/ksh93/sh/fault.c
build_i386_64bit_debug/src/cmd/ksh93/sh/fault.c
--- src/cmd/ksh93/sh/fault.c 2013-07-23 19:24:50.000000000 +0200
+++ src/cmd/ksh93/sh/fault.c 2013-07-29 20:57:34.568716091 +0200
@@ -117,8 +117,13 @@
register char *trap;
register struct checkpt *pp = (struct checkpt*)shp->jmplist;
int action=0;
- if(sig==SIGCHLD)
- sfprintf(sfstdout,"childsig\n");
+
+ if (sig==SIGCHLD)
+ {
+ set_trapinfo(shp,sig,info);
+ return;
+ }
+
#ifdef SIGWINCH
if(sig==SIGWINCH)
{
@@ -139,13 +144,7 @@
sigrelease(sig);
kill(getpid(),sig);
}
- if(shp->savesig)
- {
- /* critical region, save and process later */
- if(!(shp->sigflag[sig]&SH_SIGIGNORE))
- shp->savesig = sig;
- return;
- }
+
/* handle ignored signals */
if(trap && *trap==0)
return;
@@ -292,6 +291,8 @@
shp->st.trapcom = (char**)calloc(n,sizeof(char*));
shp->sigflag = (unsigned char*)calloc(n,sizeof(char));
shp->gd->sigmsg = (char**)calloc(n,sizeof(char*));
+ shp->siginfo = (void**)calloc(sizeof(void*),shp->gd->sigmax);
+
for(tp=shtab_signals; sig=tp->sh_number; tp++)
{
n = (sig>>SH_SIGBITS);
@@ -329,8 +330,6 @@
}
else
{
- if(!shp->siginfo)
- shp->siginfo =
(void**)calloc(sizeof(void*),shp->gd->sigmax);
flag |= SH_SIGFAULT;
if(sig==SIGALRM && fun!=SIG_DFL &&
fun!=(sh_sigfun_t)sh_fault)
signal(sig,fun);
@@ -472,6 +471,7 @@
shp->sigflag[sig] &= ~SH_SIGTRAP;
if(sig==SIGCHLD)
{
+ job_waitsafe(sig, NULL, NULL);
job_chldtrap(shp,shp->st.trapcom[SIGCHLD],1);
continue;
}
@@ -686,7 +686,14 @@
signal(sig,SIG_DFL);
sigrelease(sig);
kill(getpid(),sig);
+#if 1
+ while(shp->siginfo[SIGCHLD])
+ {
+ job_waitsafe(SIGCHLD, NULL, NULL);
+ }
+#else
pause();
+#endif
}
#if SHOPT_KIA
if(sh_isoption(shp,SH_NOEXEC))
diff -r -u original/src/cmd/ksh93/sh/jobs.c
build_i386_64bit_debug/src/cmd/ksh93/sh/jobs.c
--- src/cmd/ksh93/sh/jobs.c 2013-07-17 18:22:06.000000000 +0200
+++ src/cmd/ksh93/sh/jobs.c 2013-07-29 15:52:05.790485028 +0200
@@ -362,9 +362,9 @@
* This is the SIGCLD interrupt routine
*/
#ifdef _lib_sigaction
-static void job_waitsafe(int sig, siginfo_t *info, void *context)
+void job_waitsafe(int sig, siginfo_t *info, void *context)
#else
-static void job_waitsafe(int sig)
+void job_waitsafe(int sig)
#endif
{
if(job.in_critical || vmbusy())
@@ -648,9 +648,9 @@
{
register int ntry=0;
job.fd = JOBTTY;
- signal(SIGCHLD,job_waitsafe);
+ signal(SIGCHLD,sh_fault);
# if defined(SIGCLD) && (SIGCLD!=SIGCHLD)
- signal(SIGCLD,job_waitsafe);
+ signal(SIGCLD,sh_fault);
# endif
if(njob_savelist < NJOB_SAVELIST)
init_savelist();
diff -r -u original/src/cmd/ksh93/sh/xec.c
build_i386_64bit_debug/src/cmd/ksh93/sh/xec.c
--- src/cmd/ksh93/sh/xec.c 2013-07-25 02:37:26.000000000 +0200
+++ src/cmd/ksh93/sh/xec.c 2013-07-28 05:36:31.827214685 +0200
@@ -2710,8 +2710,10 @@
Namval_t *oldnspace = shp->namespace;
int offset = stktell(stkp);
int flag=NV_NOASSIGN|NV_NOARRAY|NV_VARNAME;
+#if 0
if(cp)
errormsg(SH_DICT,ERROR_exit(1),e_ident,fname);
+#endif
sfputc(stkp,'.');
sfputr(stkp,fname,0);
np =
nv_open(stkptr(stkp,offset),shp->var_tree,flag);
_______________________________________________
ast-developers mailing list
[email protected]
http://lists.research.att.com/mailman/listinfo/ast-developers