Hi,
we found a bug caused by compiler optimizations.
The top of stack looks like this:
#0 job_chksave (pid=5066) at jobs.c:1949
#1 job_reap (sig=17) at obs.c:428
#2 <signal handler called>
#3 job_subsave () at sh/jobs.c:1990
#4 sh_subshell (shp=0x76cba0, t=0x7fd6050c9fe0, flags=4, comsub=3) at
subshell.c:520
It's based on patched 2012-08-01 so line numbers won't match precisely.
The interesting part is that job_reap was executed during job_subsave
while job_lock should prevent this:
void *job_subsave(void)
{
struct back_save *bp = new_of(struct back_save,0);
job_lock();
*bp = bck;
bp->prev = bck.prev;
bck.count = 0;
bck.list = 0; <<---- HERE when signal came
bck.prev = bp;
job_unlock();
return((void*)bp);
}
when checking the job.in_critical, gdb showed that it is zero
(gdb) p job.in_critical
$1 = 0
So the locking was not effective. I've checked the disassembled
job_subsave and to no surprise:
│0x429801 <job_subsave+1> mov $0x18,%edi
│0x429806 <job_subsave+6> callq 0x405de0 <malloc@plt>
│0x42980b <job_subsave+11> mov 0x343e9e(%rip),%rdx #
0x76d6b0 <bck>
│0x429812 <job_subsave+18> mov %rax,%rbx
│0x429815 <job_subsave+21> mov 0x343bed(%rip),%eax #
0x76d408 <job+40>
│0x42981b <job_subsave+27> movl $0x0,0x343e8b(%rip) #
0x76d6b0 <bck>
│0x429825 <job_subsave+37> mov %rdx,(%rbx)
│0x429828 <job_subsave+40> mov 0x343e89(%rip),%rdx #
0x76d6b8 <bck+8>
│0x42982f <job_subsave+47> test %eax,%eax
│0x429831 <job_subsave+49> movq $0x0,0x343e7c(%rip) #
0x76d6b8 <bck+8>
│0x42983c <job_subsave+60> mov %rdx,0x8(%rbx)
│0x429840 <job_subsave+64> mov 0x343e79(%rip),%rdx #
0x76d6c0 <bck+16>
│0x429847 <job_subsave+71> mov %rbx,0x343e72(%rip) #
0x76d6c0 <bck+16>
│0x42984e <job_subsave+78> mov %rdx,0x10(%rbx)
│0x429852 <job_subsave+82> jne 0x429887 <job_subsave+135>
│0x429854 <job_subsave+84> mov 0x343bb2(%rip),%edi #
0x76d40c <job+44>
│0x42985a <job_subsave+90> test %edi,%edi
│0x42985c <job_subsave+92> je 0x429887 <job_subsave+135>
│0x42985e <job_subsave+94> mov 0x341ca3(%rip),%rax #
0x76b508 <Vmregion>
│0x429865 <job_subsave+101> movl $0x1,0x343b99(%rip) #
0x76d408 <job+40>
│0x42986f <job_subsave+111> mov 0x60(%rax),%rdx
│0x429873 <job_subsave+115> mov $0x1,%eax
│0x429878 <job_subsave+120> mov (%rdx),%esi
│0x42987a <job_subsave+122> test %esi,%esi
│0x42987c <job_subsave+124> je 0x429890 <job_subsave+144>
│0x42987e <job_subsave+126> sub $0x1,%eax
│0x429881 <job_subsave+129> mov %eax,0x343b81(%rip) #
0x76d408 <job+40>
│0x429887 <job_subsave+135> mov %rbx,%rax
│0x42988a <job_subsave+138> pop %rbx
│0x42988b <job_subsave+139> retq
│0x42988c <job_subsave+140> nopl 0x0(%rax)
│0x429890 <job_subsave+144> callq 0x428d60 <job_reap>
│0x429895 <job_subsave+149> mov 0x343b6d(%rip),%eax #
0x76d408 <job+40>
│0x42989b <job_subsave+155> jmp 0x42987e <job_subsave+126>
there is no job.in_critical AKA <job+40> ++ nor --
Even with volatile attribute, gcc reorders the code so it locks,
immediately decrements in_critical again as part of unlocking and then
do the code that's supposed to be lock protected. Adding memory barriers
was necessary to prevent compiler from reorganizing the code.
See the attached patch.
Michal
diff -up ksh-20140606/src/cmd/ksh93/include/jobs.h.locking ksh-20140606/src/cmd/ksh93/include/jobs.h
--- ksh-20140606/src/cmd/ksh93/include/jobs.h.locking 2014-06-25 12:07:13.967670951 +0200
+++ ksh-20140606/src/cmd/ksh93/include/jobs.h 2014-06-25 12:09:54.780820267 +0200
@@ -143,7 +143,7 @@ struct jobs
#define JOB_QFLAG 0x100
#define JOB_QQFLAG 0x200
-extern struct jobs job;
+extern volatile struct jobs job;
#ifdef JOBS
@@ -165,10 +165,11 @@ extern struct jobs job;
# define vmbusy() 0
#endif
-#define job_lock() (job.in_critical++)
+#define job_lock() do { ++job.in_critical; __asm__ __volatile__("":::"memory"); } while(0)
#define job_unlock() \
do { \
int _sig; \
+ __asm__ __volatile__("":::"memory"); \
if (!--job.in_critical && (_sig = job.savesig)) \
{ \
if (!job.in_critical++ && !vmbusy()) \
diff -up ksh-20140606/src/cmd/ksh93/sh/defs.c.locking ksh-20140606/src/cmd/ksh93/sh/defs.c
--- ksh-20140606/src/cmd/ksh93/sh/defs.c.locking 2010-07-21 18:13:03.000000000 +0200
+++ ksh-20140606/src/cmd/ksh93/sh/defs.c 2014-06-25 12:07:13.967670951 +0200
@@ -43,6 +43,6 @@ Dtdisc_t _Nvdisc =
/* reserve room for writable state table */
char *sh_lexstates[ST_NONE] = {0};
-struct jobs job = {0};
+volatile struct jobs job = {0};
int32_t sh_mailchk = 600;
_______________________________________________
ast-developers mailing list
ast-developers@lists.research.att.com
http://lists.research.att.com/mailman/listinfo/ast-developers