All, We have see the following ksh hang with the 93t version of ksh running on Red Hat Enterprise Linux AS release 4 (Nahant Update 5).
As per the below analysis the while loop atline 1657 never exits as it is examining a linked list that that has become circular (function job_chksave was called from function job_post with parameter pid = 27676 which is a child process that has been spawned by our hung ksh process). This looks similar to - redhat bug https://bugzilla.redhat.com/show_bug.cgi?id=435159 in this case pid is not 0 and the list has become circular although that was job_chksave called from job_unpost rather than job_post. - redhat bug https://bugzilla.redhat.com/show_bug.cgi?id=518942 that contains a fix dated 20090630 on top of a version of ksh that contains a check for a circular linked list but this is when job_chksave is called from job_subrestore not job_post. As per http://www2.research.att.com/~gsf/download/gen/ast-ksh.html#ksh93 there is a 93t+ version of ksh that contains a fix for "09-01-13 A bug in which background jobs and pipelines that were not waited for could, in rare instances, cause the shell to go into an infinite loop or fail has been fixed." Please confirm does the 93t+ verison of ksh contain the fix for either of the above redhat bugs and the issue we are seeing? Analysis ====== (gdb) where #0 0x0000000000417634 in job_chksave () #1 0x00000000004169e3 in job_post () #2 0x000000000043e109 in _sh_fork () #3 0x000000000043fb4f in sh_ntfork () #4 0x000000000043b2c2 in sh_exec () #5 0x0000000000407de1 in exfile () #6 0x00000000004073a9 in sh_main () #7 0x00000000004067b6 in main () display/i $pc 1: x/i $pc 0x417634 <job_chksave+25>: cmp %eax,0x8(%rdi) (gdb) stepi 0x0000000000417637 in job_chksave () 1: x/i $pc 0x417637 <job_chksave+28>: je 0x417652 <job_chksave+55> (gdb) stepi 0x0000000000417639 in job_chksave () 1: x/i $pc 0x417639 <job_chksave+30>: test %eax,%eax (gdb) stepi 0x000000000041763b in job_chksave () 1: x/i $pc 0x41763b <job_chksave+32>: jne 0x417643 <job_chksave+40> (gdb) stepi 0x0000000000417643 in job_chksave () 1: x/i $pc 0x417643 <job_chksave+40>: mov %rdi,%rdx (gdb) stepi 0x0000000000417646 in job_chksave () 1: x/i $pc 0x417646 <job_chksave+43>: mov (%rdi),%rdi (gdb) stepi 0x0000000000417649 in job_chksave () 1: x/i $pc 0x417649 <job_chksave+46>: test %rdi,%rdi (gdb) stepi 0x000000000041764c in job_chksave () 1: x/i $pc 0x41764c <job_chksave+49>: jne 0x417634 <job_chksave+25> (gdb) stepi 0x0000000000417634 in job_chksave () 1: x/i $pc 0x417634 <job_chksave+25>: cmp %eax,0x8(%rdi) Dump of assembler code for function job_chksave: 0x000000000041761b <job_chksave+0>: push %rbx 0x000000000041761c <job_chksave+1>: mov %edi,%eax 0x000000000041761e <job_chksave+3>: mov 2180275(%rip),%rdi # 0x62bad8 <bck+8> 0x0000000000417625 <job_chksave+10>: mov $0x0,%edx 0x000000000041762a <job_chksave+15>: mov $0xffffffff,%ebx r = -1 0x000000000041762f <job_chksave+20>: test %rdi,%rdi %rdi is variable jp - while (jp) 0x0000000000417632 <job_chksave+23>: je 0x4176aa <job_chksave+143> 0x0000000000417634 <job_chksave+25>: cmp %eax,0x8(%rdi) <--- %eax is variable pid check if (jp->pid = pid) 0x0000000000417637 <job_chksave+28>: je 0x417652 <job_chksave+55> <--- 0x0000000000417639 <job_chksave+30>: test %eax,%eax <--- check if pid == 0 0x000000000041763b <job_chksave+32>: jne 0x417643 <job_chksave+40> <--- 0x000000000041763d <job_chksave+34>: cmpq $0x0,(%rdi) check !jp->next 0x0000000000417641 <job_chksave+38>: je 0x417652 <job_chksave+55> 0x0000000000417643 <job_chksave+40>: mov %rdi,%rdx <--- jpold=jp 0x0000000000417646 <job_chksave+43>: mov (%rdi),%rdi <--- jp = jp -> next 0x0000000000417649 <job_chksave+46>: test %rdi,%rdi <--- while (jp) 0x000000000041764c <job_chksave+49>: jne 0x417634 <job_chksave+25> <--- 0x000000000041764e <job_chksave+51>: data16 0x000000000041764f <job_chksave+52>: nop 0x0000000000417650 <job_chksave+53>: jmp 0x4176aa <job_chksave+143> 0x0000000000417652 <job_chksave+55>: test %rdi,%rdi 0x0000000000417655 <job_chksave+58>: je 0x4176aa <job_chksave+143> 0x0000000000417657 <job_chksave+60>: mov $0x0,%ebx *** r= 0; 0x000000000041765c <job_chksave+65>: test %eax,%eax 0x000000000041765e <job_chksave+67>: data16 0x000000000041765f <job_chksave+68>: nop 0x0000000000417660 <job_chksave+69>: je 0x417666 <job_chksave+75> 0x0000000000417662 <job_chksave+71>: movzwl 0xc(%rdi),%ebx 0x0000000000417666 <job_chksave+75>: test %rdx,%rdx 0x0000000000417669 <job_chksave+78>: je 0x417673 <job_chksave+88> 0x000000000041766b <job_chksave+80>: mov (%rdi),%rax 0x000000000041766e <job_chksave+83>: mov %rax,(%rdx) 0x0000000000417671 <job_chksave+86>: jmp 0x41767d <job_chksave+98> 0x0000000000417673 <job_chksave+88>: mov (%rdi),%rax 0x0000000000417676 <job_chksave+91>: mov %rax,2180187(%rip) # 0x62bad8 <bck+8> 0x000000000041767d <job_chksave+98>: decl 2180173(%rip) # 0x62bad0 <bck> 0x0000000000417683 <job_chksave+104>: cmpl $0x3,2179974(%rip) # 0x62ba10 <njob_savelist> 0x000000000041768a <job_chksave+111>: jg 0x4176a5 <job_chksave+138> 0x000000000041768c <job_chksave+113>: incl 2179966(%rip) # 0x62ba10 <njob_savelist> 0x0000000000417692 <job_chksave+119>: mov 2179951(%rip),%rax # 0x62ba08 <job_savelist> 0x0000000000417699 <job_chksave+126>: mov %rax,(%rdi) 0x000000000041769c <job_chksave+129>: mov %rdi,2179941(%rip) # 0x62ba08 <job_savelist> 0x00000000004176a3 <job_chksave+136>: jmp 0x4176aa <job_chksave+143> 0x00000000004176a5 <job_chksave+138>: callq 0x4b2100 <free> 0x00000000004176aa <job_chksave+143>: mov %ebx,%eax 0x00000000004176ac <job_chksave+145>: pop %rbx 0x00000000004176ad <job_chksave+146>: retq stepping though the code variable jp (%rdi) becomes rdi 0x683570 6829424 rdi 0x6836f0 6829808 rdi 0x683270 6828656 rdi 0x6832a0 6828704 rdi 0x683540 6829376 rdi 0x683680 6829696 rdi 0x683570 6829424 at job_chksave+30 eax (variable pid is not 0 - it is 27676 which is the child pid of the ksh process). 1: x/i $pc 0x417639 <job_chksave+30>: test %eax,%eax (gdb) info registers rax 0x6c1c 27676 rbx 0xffffffff 4294967295 rcx 0x64d6b0 6608560 rdx 0x683680 6829696 rsi 0x0 0 rdi 0x683570 6829424 rbp 0x0 0x0 rsp 0x7fbfffd5e0 0x7fbfffd5e0 r8 0x0 0 r9 0x678b30 6785840 r10 0x7fbfffd4c0 548682060992 r11 0x246 582 r12 0x6c1c 27676 r13 0x0 0 r14 0x62afa0 6467488 r15 0x0 0 rip 0x417639 0x417639 <job_chksave+30> eflags 0x397 919 cs 0x33 51 ss 0x2b 43 ds 0x0 0 es 0x0 0 fs 0x0 0 gs 0x0 0 Regards, David Williams Morgan Stanley | Technology & Data 25 Cabot Square | Canary Wharf | Floor 02 London, E14 4QA Phone: +44 20 7677-1968 [email protected]<mailto:[email protected]> -------------------------------------------------------------------------- NOTICE: If received in error, please destroy, and notify sender. Sender does not intend to waive confidentiality or privilege. Use of this email is prohibited when received in error. We may monitor and store emails to the extent permitted by applicable law.
_______________________________________________ ast-developers mailing list [email protected] https://mailman.research.att.com/mailman/listinfo/ast-developers
