All,

We have see the following ksh hang with the 93t version of ksh running on
Red Hat Enterprise Linux AS release 4 (Nahant Update 5).

As per the below analysis the while loop atline 1657 never exits as it is 
examining a linked list
that that has become circular (function job_chksave was called from function 
job_post with
parameter pid = 27676 which is a child process that has been spawned by our 
hung ksh process).

This looks similar to

- redhat bug https://bugzilla.redhat.com/show_bug.cgi?id=435159 in this case 
pid is not 0 and
  the list has become circular although that was job_chksave called from 
job_unpost rather
  than job_post.

- redhat bug https://bugzilla.redhat.com/show_bug.cgi?id=518942 that contains a 
fix dated
  20090630 on top of a version of ksh that contains a check for a circular 
linked list but
  this is when job_chksave is called from job_subrestore not job_post.

As per http://www2.research.att.com/~gsf/download/gen/ast-ksh.html#ksh93 there 
is a 93t+
version of ksh that contains a fix for

"09-01-13 A bug in which background jobs and pipelines that were not waited for 
could, in rare
instances, cause the shell to go into an infinite loop or fail has been fixed."

Please confirm does the 93t+ verison of ksh contain the fix for either of the 
above redhat bugs
and the issue we are seeing?

Analysis
======

(gdb) where
#0  0x0000000000417634 in job_chksave ()
#1  0x00000000004169e3 in job_post ()
#2  0x000000000043e109 in _sh_fork ()
#3  0x000000000043fb4f in sh_ntfork ()
#4  0x000000000043b2c2 in sh_exec ()
#5  0x0000000000407de1 in exfile ()
#6  0x00000000004073a9 in sh_main ()
#7  0x00000000004067b6 in main ()


display/i $pc
1: x/i $pc  0x417634 <job_chksave+25>:  cmp    %eax,0x8(%rdi)
(gdb) stepi
0x0000000000417637 in job_chksave ()
1: x/i $pc  0x417637 <job_chksave+28>:  je     0x417652 <job_chksave+55>
(gdb) stepi
0x0000000000417639 in job_chksave ()
1: x/i $pc  0x417639 <job_chksave+30>:  test   %eax,%eax
(gdb) stepi
0x000000000041763b in job_chksave ()
1: x/i $pc  0x41763b <job_chksave+32>:  jne    0x417643 <job_chksave+40>
(gdb) stepi
0x0000000000417643 in job_chksave ()
1: x/i $pc  0x417643 <job_chksave+40>:  mov    %rdi,%rdx
(gdb) stepi
0x0000000000417646 in job_chksave ()
1: x/i $pc  0x417646 <job_chksave+43>:  mov    (%rdi),%rdi
(gdb) stepi
0x0000000000417649 in job_chksave ()
1: x/i $pc  0x417649 <job_chksave+46>:  test   %rdi,%rdi
(gdb) stepi
0x000000000041764c in job_chksave ()
1: x/i $pc  0x41764c <job_chksave+49>:  jne    0x417634 <job_chksave+25>
(gdb) stepi
0x0000000000417634 in job_chksave ()
1: x/i $pc  0x417634 <job_chksave+25>:  cmp    %eax,0x8(%rdi)


Dump of assembler code for function job_chksave:
0x000000000041761b <job_chksave+0>:     push   %rbx
0x000000000041761c <job_chksave+1>:     mov    %edi,%eax
0x000000000041761e <job_chksave+3>:     mov    2180275(%rip),%rdi        # 
0x62bad8 <bck+8>
0x0000000000417625 <job_chksave+10>:    mov    $0x0,%edx
0x000000000041762a <job_chksave+15>:    mov    $0xffffffff,%ebx                 
       r = -1
0x000000000041762f <job_chksave+20>:    test   %rdi,%rdi                        
       %rdi is variable jp - while (jp)
0x0000000000417632 <job_chksave+23>:    je     0x4176aa <job_chksave+143>
0x0000000000417634 <job_chksave+25>:    cmp    %eax,0x8(%rdi)                   
  <--- %eax is variable pid check if (jp->pid = pid)
0x0000000000417637 <job_chksave+28>:    je     0x417652 <job_chksave+55>        
  <---
0x0000000000417639 <job_chksave+30>:    test   %eax,%eax                        
  <--- check if pid == 0
0x000000000041763b <job_chksave+32>:    jne    0x417643 <job_chksave+40>        
  <---
0x000000000041763d <job_chksave+34>:    cmpq   $0x0,(%rdi)                      
       check !jp->next
0x0000000000417641 <job_chksave+38>:    je     0x417652 <job_chksave+55>
0x0000000000417643 <job_chksave+40>:    mov    %rdi,%rdx                        
  <--- jpold=jp
0x0000000000417646 <job_chksave+43>:    mov    (%rdi),%rdi                      
  <--- jp = jp -> next
0x0000000000417649 <job_chksave+46>:    test   %rdi,%rdi                        
  <--- while (jp)
0x000000000041764c <job_chksave+49>:    jne    0x417634 <job_chksave+25>        
  <---
0x000000000041764e <job_chksave+51>:    data16
0x000000000041764f <job_chksave+52>:    nop
0x0000000000417650 <job_chksave+53>:    jmp    0x4176aa <job_chksave+143>
0x0000000000417652 <job_chksave+55>:    test   %rdi,%rdi
0x0000000000417655 <job_chksave+58>:    je     0x4176aa <job_chksave+143>
0x0000000000417657 <job_chksave+60>:    mov    $0x0,%ebx                        
***  r= 0;
0x000000000041765c <job_chksave+65>:    test   %eax,%eax
0x000000000041765e <job_chksave+67>:    data16
0x000000000041765f <job_chksave+68>:    nop
0x0000000000417660 <job_chksave+69>:    je     0x417666 <job_chksave+75>
0x0000000000417662 <job_chksave+71>:    movzwl 0xc(%rdi),%ebx
0x0000000000417666 <job_chksave+75>:    test   %rdx,%rdx
0x0000000000417669 <job_chksave+78>:    je     0x417673 <job_chksave+88>
0x000000000041766b <job_chksave+80>:    mov    (%rdi),%rax
0x000000000041766e <job_chksave+83>:    mov    %rax,(%rdx)
0x0000000000417671 <job_chksave+86>:    jmp    0x41767d <job_chksave+98>
0x0000000000417673 <job_chksave+88>:    mov    (%rdi),%rax
0x0000000000417676 <job_chksave+91>:    mov    %rax,2180187(%rip)        # 
0x62bad8 <bck+8>
0x000000000041767d <job_chksave+98>:    decl   2180173(%rip)        # 0x62bad0 
<bck>
0x0000000000417683 <job_chksave+104>:   cmpl   $0x3,2179974(%rip)        # 
0x62ba10 <njob_savelist>
0x000000000041768a <job_chksave+111>:   jg     0x4176a5 <job_chksave+138>
0x000000000041768c <job_chksave+113>:   incl   2179966(%rip)        # 0x62ba10 
<njob_savelist>
0x0000000000417692 <job_chksave+119>:   mov    2179951(%rip),%rax        # 
0x62ba08 <job_savelist>
0x0000000000417699 <job_chksave+126>:   mov    %rax,(%rdi)
0x000000000041769c <job_chksave+129>:   mov    %rdi,2179941(%rip)        # 
0x62ba08 <job_savelist>
0x00000000004176a3 <job_chksave+136>:   jmp    0x4176aa <job_chksave+143>
0x00000000004176a5 <job_chksave+138>:   callq  0x4b2100 <free>
0x00000000004176aa <job_chksave+143>:   mov    %ebx,%eax
0x00000000004176ac <job_chksave+145>:   pop    %rbx
0x00000000004176ad <job_chksave+146>:   retq


stepping though the code variable jp (%rdi) becomes

rdi            0x683570 6829424
rdi            0x6836f0 6829808
rdi            0x683270 6828656
rdi            0x6832a0 6828704
rdi            0x683540 6829376
rdi            0x683680 6829696
rdi            0x683570 6829424

at job_chksave+30 eax (variable pid is not 0 - it is 27676 which is the child 
pid of the ksh process).

1: x/i $pc  0x417639 <job_chksave+30>:  test   %eax,%eax
(gdb) info registers
rax            0x6c1c   27676
rbx            0xffffffff       4294967295
rcx            0x64d6b0 6608560
rdx            0x683680 6829696
rsi            0x0      0
rdi            0x683570 6829424
rbp            0x0      0x0
rsp            0x7fbfffd5e0     0x7fbfffd5e0
r8             0x0      0
r9             0x678b30 6785840
r10            0x7fbfffd4c0     548682060992
r11            0x246    582
r12            0x6c1c   27676
r13            0x0      0
r14            0x62afa0 6467488
r15            0x0      0
rip            0x417639 0x417639 <job_chksave+30>
eflags         0x397    919
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x0      0

Regards,
David Williams
Morgan Stanley | Technology & Data
25 Cabot Square | Canary Wharf | Floor 02
London, E14 4QA
Phone: +44 20 7677-1968
[email protected]<mailto:[email protected]>


--------------------------------------------------------------------------
NOTICE: If received in error, please destroy, and notify sender. Sender does 
not intend to waive confidentiality or privilege. Use of this email is 
prohibited when received in error. We may monitor and store emails to the 
extent permitted by applicable law.
_______________________________________________
ast-developers mailing list
[email protected]
https://mailman.research.att.com/mailman/listinfo/ast-developers

Reply via email to