Hi

Attached patch fixes a race condition which causes Vim to hang
when resuming (with fg) after having suspended Vim with CTRL-Z.
Bug does not happen 100% of the time, but once in a while.  I
can reproduce the bug with Vim-7.2.121 in a xterm on Linux x86
after doing

CTRL-Z
fg
CTRL-Z
fg
, etc, ... several times until Vim hangs.

Pressing CTRL-C is a workaround when it hangs.

See also Ubuntu bug https://bugs.launchpad.net/bugs/291373

Bug only happens when configure script defined -D_REENTRANT
which happens at least when I configure Vim as follows:

 ./configure --enable-tclinterp --enable-rubyinterp \
   --enable-perlinterp  --enable-pythoninterp \
  --with-features=huge --enable-gui=gnome2

The problem happens I think because the following 2 lines
in os_unix.c in mch_suspect() have a race condition:

    if (!sigcont_received)
        pause();

If the signal is handled _after_ the test of sigcont_received
variable but _before_ pause() is executed, then pause() will
wait for ever since signal already happened.

Also I think that the variable sigcont_received should also be
declared volatile since it's modified by a signal handler.

Attached patch fixes the race condition by waiting until
sigcont_received is set without calling pause():

        long wait;
        for (wait = 0; !sigcont_received && wait <= 3L; wait++)
            /* Loop is not entered most of the time */
            mch_delay(wait, FALSE);

Note that most of the time the loop does not even need
to sleep() at all (sigcont_received is already set most of
the time before entering the loop).  And when it it sleeps,
a sleep(0) seems always enough in practice.  I've put
debug printf() after the loop that waits for sigcont_received
and I see this when doing several suspend (CTRL-Z, fg):

**** wait=0
**** wait=0
**** wait=0
**** wait=0
**** wait=0
**** wait=0
**** wait=0
**** wait=1
**** wait=0
**** wait=0
**** wait=0
**** wait=0
**** wait=0
**** wait=1
**** wait=0
**** wait=0
**** wait=0
**** wait=0

Notice that in 2 occasions, wait was 1 (meaning that the loop
iterated once with a sleep of 0 (yielding the CPU which is
enough to get the signal handled).

Regards
-- Dominique

--~--~---------~--~----~------------~-------~--~----~
You received this message from the "vim_dev" maillist.
For more information, visit http://www.vim.org/maillist.php
-~----------~----~----~----~------~----~------~--~---

Index: os_unix.c
===================================================================
RCS file: /cvsroot/vim/vim7/src/os_unix.c,v
retrieving revision 1.90
diff -c -r1.90 os_unix.c
*** os_unix.c	22 Feb 2009 01:52:46 -0000	1.90
--- os_unix.c	22 Feb 2009 19:05:29 -0000
***************
*** 1068,1080 ****
      SIGRETURN;
  }
  
! #ifdef _REENTRANT
  /*
   * On Solaris with multi-threading, suspending might not work immediately.
   * Catch the SIGCONT signal, which will be used as an indication whether the
   * suspending has been done or not.
   */
! static int sigcont_received;
  static RETSIGTYPE sigcont_handler __ARGS(SIGPROTOARG);
  
  /*
--- 1068,1086 ----
      SIGRETURN;
  }
  
! #if defined(_REENTRANT) && defined(SIGCONT)
  /*
   * On Solaris with multi-threading, suspending might not work immediately.
   * Catch the SIGCONT signal, which will be used as an indication whether the
   * suspending has been done or not.
+  *
+  * On Linux, signal is not always handled immediately either.
+  * See https://bugs.launchpad.net/bugs/291373
+  *
+  * sigcont_received is declared volatile to prevent compiler optimizations
+  * since this variable is changed in a signal handler.
   */
! static volatile int sigcont_received;
  static RETSIGTYPE sigcont_handler __ARGS(SIGPROTOARG);
  
  /*
***************
*** 1118,1132 ****
      }
  # endif
  
! # ifdef _REENTRANT
      sigcont_received = FALSE;
  # endif
      kill(0, SIGTSTP);	    /* send ourselves a STOP signal */
! # ifdef _REENTRANT
!     /* When we didn't suspend immediately in the kill(), do it now.  Happens
!      * on multi-threaded Solaris. */
!     if (!sigcont_received)
! 	pause();
  # endif
  
  # ifdef FEAT_TITLE
--- 1124,1151 ----
      }
  # endif
  
! # if defined(_REENTRANT) && defined(SIGCONT)
      sigcont_received = FALSE;
  # endif
      kill(0, SIGTSTP);	    /* send ourselves a STOP signal */
! # if defined(_REENTRANT) && defined(SIGCONT)
!     /*
!      * Wait for the SIGCONT signal to be handled. It generally happens
!      * immediately, but somehow not all the time. Do not call pause()
!      * because there would be race condition which would hang Vim if
!      * signal happened in between the test of sigcont_received and the
!      * call to pause(). If signal is not yet received, call sleep(0)
!      * to just yield CPU. Signal should then be received. If somehow
!      * it's still not received, sleep 1, 2, 3 ms. Don't bother waiting
!      * further if signal is not received after 1+2+3+4 ms (not expected
!      * to happen).
!      */
!     {
! 	long wait;
! 	for (wait = 0; !sigcont_received && wait <= 3L; wait++)
! 	    /* Loop is not entered most of the time */
! 	    mch_delay(wait, FALSE); 
!     }
  # endif
  
  # ifdef FEAT_TITLE
***************
*** 1175,1181 ****
  #ifdef SIGTSTP
      signal(SIGTSTP, restricted ? SIG_IGN : SIG_DFL);
  #endif
! #ifdef _REENTRANT
      signal(SIGCONT, sigcont_handler);
  #endif
  
--- 1194,1200 ----
  #ifdef SIGTSTP
      signal(SIGTSTP, restricted ? SIG_IGN : SIG_DFL);
  #endif
! #if defined(_REENTRANT) && defined(SIGCONT)
      signal(SIGCONT, sigcont_handler);
  #endif
  
***************
*** 1234,1240 ****
  reset_signals()
  {
      catch_signals(SIG_DFL, SIG_DFL);
! #ifdef _REENTRANT
      /* SIGCONT isn't in the list, because its default action is ignore */
      signal(SIGCONT, SIG_DFL);
  #endif
--- 1253,1259 ----
  reset_signals()
  {
      catch_signals(SIG_DFL, SIG_DFL);
! #if defined(_REENTRANT) && defined(SIGCONT)
      /* SIGCONT isn't in the list, because its default action is ignore */
      signal(SIGCONT, SIG_DFL);
  #endif
***************
*** 5899,5905 ****
--- 5918,5926 ----
  	     * we are going to suspend or starting an external process
  	     * so we shouldn't  have problem with this
  	     */
+ # ifdef SIGTSTP
  	    signal(SIGTSTP, restricted ? SIG_IGN : SIG_DFL);
+ # endif
  	    return 1; /* succeed */
  	}
  	if (gpm_fd == -2)

Raspunde prin e-mail lui