Hi,

we found a bug caused by compiler optimizations.

The top of stack looks like this:

#0  job_chksave (pid=5066) at jobs.c:1949
#1  job_reap (sig=17) at obs.c:428
#2  <signal handler called>
#3  job_subsave () at sh/jobs.c:1990
#4 sh_subshell (shp=0x76cba0, t=0x7fd6050c9fe0, flags=4, comsub=3) at subshell.c:520

It's based on patched 2012-08-01 so line numbers won't match precisely.

The interesting part is that job_reap was executed during job_subsave while job_lock should prevent this:

void *job_subsave(void)
{
   struct back_save *bp = new_of(struct back_save,0);
   job_lock();
   *bp = bck;
   bp->prev = bck.prev;
   bck.count = 0;
   bck.list = 0; <<---- HERE when signal came
   bck.prev = bp;
   job_unlock();
   return((void*)bp);
}

when checking the job.in_critical, gdb showed that it is zero
(gdb) p job.in_critical
$1 = 0

So the locking was not effective. I've checked the disassembled job_subsave and to no surprise:
   │0x429801 <job_subsave+1>        mov    $0x18,%edi
   │0x429806 <job_subsave+6>        callq  0x405de0 <malloc@plt>
│0x42980b <job_subsave+11> mov 0x343e9e(%rip),%rdx # 0x76d6b0 <bck>
   │0x429812 <job_subsave+18>       mov    %rax,%rbx
│0x429815 <job_subsave+21> mov 0x343bed(%rip),%eax # 0x76d408 <job+40> │0x42981b <job_subsave+27> movl $0x0,0x343e8b(%rip) # 0x76d6b0 <bck>
   │0x429825 <job_subsave+37>       mov    %rdx,(%rbx)
│0x429828 <job_subsave+40> mov 0x343e89(%rip),%rdx # 0x76d6b8 <bck+8>
   │0x42982f <job_subsave+47>       test   %eax,%eax
│0x429831 <job_subsave+49> movq $0x0,0x343e7c(%rip) # 0x76d6b8 <bck+8>
   │0x42983c <job_subsave+60>       mov    %rdx,0x8(%rbx)
│0x429840 <job_subsave+64> mov 0x343e79(%rip),%rdx # 0x76d6c0 <bck+16> │0x429847 <job_subsave+71> mov %rbx,0x343e72(%rip) # 0x76d6c0 <bck+16>
   │0x42984e <job_subsave+78>       mov    %rdx,0x10(%rbx)
   │0x429852 <job_subsave+82>       jne    0x429887 <job_subsave+135>
│0x429854 <job_subsave+84> mov 0x343bb2(%rip),%edi # 0x76d40c <job+44>
   │0x42985a <job_subsave+90>       test   %edi,%edi
   │0x42985c <job_subsave+92>       je     0x429887 <job_subsave+135>
│0x42985e <job_subsave+94> mov 0x341ca3(%rip),%rax # 0x76b508 <Vmregion> │0x429865 <job_subsave+101> movl $0x1,0x343b99(%rip) # 0x76d408 <job+40>
   │0x42986f <job_subsave+111>      mov    0x60(%rax),%rdx
   │0x429873 <job_subsave+115>      mov    $0x1,%eax
   │0x429878 <job_subsave+120>      mov    (%rdx),%esi
   │0x42987a <job_subsave+122>      test   %esi,%esi
   │0x42987c <job_subsave+124>      je     0x429890 <job_subsave+144>
   │0x42987e <job_subsave+126>      sub    $0x1,%eax
│0x429881 <job_subsave+129> mov %eax,0x343b81(%rip) # 0x76d408 <job+40>
   │0x429887 <job_subsave+135>      mov    %rbx,%rax
   │0x42988a <job_subsave+138>      pop    %rbx
   │0x42988b <job_subsave+139>      retq
   │0x42988c <job_subsave+140>      nopl   0x0(%rax)
   │0x429890 <job_subsave+144>      callq  0x428d60 <job_reap>
│0x429895 <job_subsave+149> mov 0x343b6d(%rip),%eax # 0x76d408 <job+40>
   │0x42989b <job_subsave+155>      jmp    0x42987e <job_subsave+126>

there is no job.in_critical AKA <job+40> ++ nor --

Even with volatile attribute, gcc reorders the code so it locks, immediately decrements in_critical again as part of unlocking and then do the code that's supposed to be lock protected. Adding memory barriers was necessary to prevent compiler from reorganizing the code.

See the attached patch.

Michal


diff -up ksh-20140606/src/cmd/ksh93/include/jobs.h.locking ksh-20140606/src/cmd/ksh93/include/jobs.h
--- ksh-20140606/src/cmd/ksh93/include/jobs.h.locking	2014-06-25 12:07:13.967670951 +0200
+++ ksh-20140606/src/cmd/ksh93/include/jobs.h	2014-06-25 12:09:54.780820267 +0200
@@ -143,7 +143,7 @@ struct jobs
 #define JOB_QFLAG	0x100
 #define JOB_QQFLAG	0x200
 
-extern struct jobs job;
+extern volatile struct jobs job;
 
 #ifdef JOBS
 
@@ -165,10 +165,11 @@ extern struct jobs job;
 #   define vmbusy()	0
 #endif
 
-#define job_lock()	(job.in_critical++)
+#define job_lock()	do { ++job.in_critical; __asm__ __volatile__("":::"memory"); } while(0)
 #define job_unlock()	\
 	do { \
 		int	_sig; \
+		__asm__ __volatile__("":::"memory"); \
 		if (!--job.in_critical && (_sig = job.savesig)) \
 		{ \
 			if (!job.in_critical++ && !vmbusy()) \
diff -up ksh-20140606/src/cmd/ksh93/sh/defs.c.locking ksh-20140606/src/cmd/ksh93/sh/defs.c
--- ksh-20140606/src/cmd/ksh93/sh/defs.c.locking	2010-07-21 18:13:03.000000000 +0200
+++ ksh-20140606/src/cmd/ksh93/sh/defs.c	2014-06-25 12:07:13.967670951 +0200
@@ -43,6 +43,6 @@ Dtdisc_t	_Nvdisc =
 /* reserve room for writable state table */
 char *sh_lexstates[ST_NONE] = {0};
 
-struct jobs	job = {0};
+volatile struct jobs	job = {0};
 int32_t		sh_mailchk = 600;
 
_______________________________________________
ast-developers mailing list
ast-developers@lists.research.att.com
http://lists.research.att.com/mailman/listinfo/ast-developers

Reply via email to