I put together this patch which switches 2.5.4 over to use the newgil.
This was generated by diffing change 76193 (last in the newgil branch)
against change 76189 and applying that on top of the changes listed in
issue 4293 (http://bugs.python.org/issue4293), specifically 68460, 68461
and 68722. There were only a couple of rejects, mostly in docs and tests
plus some irrelevant bits. I had to fix up one or 2 places by hand which
were pretty straightforward.

Only 2 tests are failing. test_capi looks to be a problem with the test
because it was from the py3k branch and test_command is failing for me,
which I need to look into.

Some performance tests (taken from
http://www.mail-archive.com/python-dev@python.org/msg43407.html):
Processor: Intel(R) Core(TM)2 Quad  CPU   Q9300  @ 2.50GHz
-j0
2.5.4 : 20.380s
newgil: 16.590s

-j4
2.5.4 : 27.440s
newgil: 20.120s

Comments? Suggestions? I'm going to continue fixing this up, but was
wondering if this could possibly make it into python 2.7.

Ross
diff --git a/Include/ceval.h b/Include/ceval.h
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -69,10 +69,6 @@
 PyAPI_FUNC(PyObject *) PyEval_EvalFrame(struct _frame *);
 PyAPI_FUNC(PyObject *) PyEval_EvalFrameEx(struct _frame *f, int exc);
 
-/* this used to be handled on a per-thread basis - now just two globals */
-PyAPI_DATA(volatile int) _Py_Ticker;
-PyAPI_DATA(int) _Py_CheckInterval;
-
 /* Interface for threads.
 
    A module that plans to do a blocking system call (or something else
@@ -131,6 +127,9 @@
 PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate);
 PyAPI_FUNC(void) PyEval_ReInitThreads(void);
 
+PyAPI_FUNC(void) _PyEval_SetSwitchInterval(unsigned long microseconds);
+PyAPI_FUNC(unsigned long) _PyEval_GetSwitchInterval(void);
+
 #define Py_BEGIN_ALLOW_THREADS { \
 			PyThreadState *_save; \
 			_save = PyEval_SaveThread();
@@ -149,6 +148,7 @@
 #endif /* !WITH_THREAD */
 
 PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *);
+PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
 
 
 #ifdef __cplusplus
diff --git a/Include/pystate.h b/Include/pystate.h
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -82,6 +82,8 @@
 
     PyObject *dict;  /* Stores per-thread state */
 
+    /* XXX doesn't mean anything anymore (the comment below is obsolete)
+       => deprecate or remove? */
     /* tick_counter is incremented whenever the check_interval ticker
      * reaches zero. The purpose is to give a useful measure of the number
      * of interpreted bytecode instructions in a given thread.  This
diff --git a/Include/sysmodule.h b/Include/sysmodule.h
--- a/Include/sysmodule.h
+++ b/Include/sysmodule.h
@@ -19,7 +19,6 @@
 			Py_GCC_ATTRIBUTE((format(printf, 1, 2)));
 
 PyAPI_DATA(PyObject *) _PySys_TraceFunc, *_PySys_ProfileFunc;
-PyAPI_DATA(int) _PySys_CheckInterval;
 
 PyAPI_FUNC(void) PySys_ResetWarnOptions(void);
 PyAPI_FUNC(void) PySys_AddWarnOption(char *);
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@@ -1,10 +1,98 @@
 # Run the _testcapi module tests (tests for the Python/C API):  by defn,
 # these are all functions _testcapi exports whose name begins with 'test_'.
 
+from __future__ import with_statement
 import sys
+import time
+import random
+import unittest
+import threading
 from test import test_support
 import _testcapi
 
+class TestPendingCalls(unittest.TestCase):
+
+    def pendingcalls_submit(self, l, n):
+        def callback():
+            #this function can be interrupted by thread switching so let's
+            #use an atomic operation
+            l.append(None)
+
+        for i in range(n):
+            time.sleep(random.random()*0.02) #0.01 secs on average
+            #try submitting callback until successful.
+            #rely on regular interrupt to flush queue if we are
+            #unsuccessful.
+            while True:
+                if _testcapi._pending_threadfunc(callback):
+                    break;
+
+    def pendingcalls_wait(self, l, n, context = None):
+        #now, stick around until l[0] has grown to 10
+        count = 0;
+        while len(l) != n:
+            #this busy loop is where we expect to be interrupted to
+            #run our callbacks.  Note that callbacks are only run on the
+            #main thread
+            if False and test_support.verbose:
+                print "(%i)"%(len(l),),
+            for i in xrange(1000):
+                a = i*i
+            if context and not context.event.is_set():
+                continue
+            count += 1
+            self.failUnless(count < 10000,
+                "timeout waiting for %i callbacks, got %i"%(n, len(l)))
+        if False and test_support.verbose:
+            print "(%i)"%(len(l),)
+
+    def test_pendingcalls_threaded(self):
+
+        #do every callback on a separate thread
+        n = 32 #total callbacks
+        threads = []
+        class foo(object):pass
+        context = foo()
+        context.l = []
+        context.n = 2 #submits per thread
+        context.nThreads = n / context.n
+        context.nFinished = 0
+        context.lock = threading.Lock()
+        context.event = threading.Event()
+
+        for i in range(context.nThreads):
+            t = threading.Thread(target=self.pendingcalls_thread, args = (context,))
+            t.start()
+            threads.append(t)
+
+        self.pendingcalls_wait(context.l, n, context)
+
+        for t in threads:
+            t.join()
+
+    def pendingcalls_thread(self, context):
+        try:
+            self.pendingcalls_submit(context.l, context.n)
+        finally:
+            with context.lock:
+                context.nFinished += 1
+                nFinished = context.nFinished
+                if False and test_support.verbose:
+                    print "finished threads: ", nFinished
+            if nFinished == context.nThreads:
+                context.event.set()
+
+    def test_pendingcalls_non_threaded(self):
+        #again, just using the main thread, likely they will all be dispathced at
+        #once.  It is ok to ask for too many, because we loop until we find a slot.
+        #the loop can be interrupted to dispatch.
+        #there are only 32 dispatch slots, so we go for twice that!
+        l = []
+        n = 64
+        self.pendingcalls_submit(l, n)
+        self.pendingcalls_wait(l, n)
+
+
 def test_main():
 
     for name in dir(_testcapi):
@@ -51,5 +139,7 @@
         t.start()
         t.join()
 
+    test_support.run_unittest(TestPendingCalls)
+
 if __name__ == "__main__":
     test_main()
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -189,6 +189,21 @@
             sys.setcheckinterval(n)
             self.assertEquals(sys.getcheckinterval(), n)
 
+    def test_switchinterval(self):
+        self.assertRaises(TypeError, sys.setswitchinterval)
+        self.assertRaises(TypeError, sys.setswitchinterval, "a")
+        self.assertRaises(ValueError, sys.setswitchinterval, -1.0)
+        self.assertRaises(ValueError, sys.setswitchinterval, 0.0)
+        orig = sys.getswitchinterval()
+        # sanity check
+        self.assertTrue(orig < 0.5, orig)
+        try:
+            for n in 0.00001, 0.05, 3.0, orig:
+                sys.setswitchinterval(n)
+                self.assertAlmostEquals(sys.getswitchinterval(), n)
+        finally:
+            sys.setswitchinterval(orig)
+
     def test_recursionlimit(self):
         self.assertRaises(TypeError, sys.getrecursionlimit, 42)
         oldlimit = sys.getrecursionlimit()
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -673,6 +673,43 @@
 	PyThread_free_lock(thread_done);
 	Py_RETURN_NONE;
 }
+
+/* test Py_AddPendingCalls using threads */
+static int _pending_callback(void *arg)
+{
+	/* we assume the argument is callable object to which we own a reference */
+	PyObject *callable = (PyObject *)arg;
+	PyObject *r = PyObject_CallObject(callable, NULL);
+	Py_DECREF(callable);
+	Py_XDECREF(r);
+	return r != NULL ? 0 : -1;
+}
+
+/* The following requests n callbacks to _pending_callback.  It can be
+ * run from any python thread.
+ */
+PyObject *pending_threadfunc(PyObject *self, PyObject *arg)
+{
+	PyObject *callable;
+	int r;
+	if (PyArg_ParseTuple(arg, "O", &callable) == 0)
+		return NULL;
+
+	/* create the reference for the callbackwhile we hold the lock */
+	Py_INCREF(callable);
+
+	Py_BEGIN_ALLOW_THREADS
+	r = Py_AddPendingCall(&_pending_callback, callable);
+	Py_END_ALLOW_THREADS
+
+	if (r<0) {
+		Py_DECREF(callable); /* unsuccessful add, destroy the extra reference */
+		Py_INCREF(Py_False);
+		return Py_False;
+	}
+	Py_INCREF(Py_True);
+	return Py_True;
+}
 #endif
 
 /* Some tests of PyString_FromFormat().  This needs more tests. */
@@ -755,6 +792,7 @@
 #endif
 #ifdef WITH_THREAD
 	{"_test_thread_state",  test_thread_state, 		 METH_VARARGS},
+	{"_pending_threadfunc",	pending_threadfunc,		 METH_VARARGS},
 #endif
 	{NULL, NULL} /* sentinel */
 };
diff --git a/Objects/longobject.c b/Objects/longobject.c
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -38,10 +38,7 @@
 static PyObject *long_format(PyObject *aa, int base, int addL);
 
 #define SIGCHECK(PyTryBlock) \
-	if (--_Py_Ticker < 0) { \
-		_Py_Ticker = _Py_CheckInterval; \
-		if (PyErr_CheckSignals()) PyTryBlock \
-	}
+	if (PyErr_CheckSignals()) PyTryBlock \
 
 /* Normalize (remove leading zeros from) a long int object.
    Doesn't attempt to free the storage--in most cases, due to the nature
diff --git a/Python/ceval.c b/Python/ceval.c
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -203,6 +203,28 @@
 #endif
 
 
+#define COMPUTE_EVAL_BREAKER() \
+	(eval_breaker = gil_drop_request | pendingcalls_to_do | pending_async_exc)
+
+#define SET_GIL_DROP_REQUEST() \
+	do { gil_drop_request = 1; eval_breaker = 1; } while (0)
+
+#define RESET_GIL_DROP_REQUEST() \
+	do { gil_drop_request = 0; COMPUTE_EVAL_BREAKER(); } while (0)
+
+#define SIGNAL_PENDING_CALLS() \
+	do { pendingcalls_to_do = 1; eval_breaker = 1; } while (0)
+
+#define UNSIGNAL_PENDING_CALLS() \
+	do { pendingcalls_to_do = 0; COMPUTE_EVAL_BREAKER(); } while (0)
+
+#define SIGNAL_ASYNC_EXC() \
+	do { pending_async_exc = 1; eval_breaker = 1; } while (0)
+
+#define UNSIGNAL_ASYNC_EXC() \
+	do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
+
+
 #ifdef WITH_THREAD
 
 #ifdef HAVE_ERRNO_H
@@ -210,35 +232,55 @@
 #endif
 #include "pythread.h"
 
-static PyThread_type_lock interpreter_lock = 0; /* This is the GIL */
+static PyThread_type_lock pending_lock = 0; /* for pending calls */
 static long main_thread = 0;
+/* This single variable consolidates all requests to break out of the fast path
+   in the eval loop. */
+static volatile int eval_breaker = 0;
+/* Request for droppping the GIL */
+static volatile int gil_drop_request = 0;
+/* Request for running pending calls */
+static volatile int pendingcalls_to_do = 0; 
+/* Request for looking at the `async_exc` field of the current thread state */
+static volatile int pending_async_exc = 0;
+
+#include "ceval_gil.h"
 
 int
 PyEval_ThreadsInitialized(void)
 {
-	return interpreter_lock != 0;
+	return gil_created();
 }
 
 void
 PyEval_InitThreads(void)
 {
-	if (interpreter_lock)
+	if (gil_created())
 		return;
-	interpreter_lock = PyThread_allocate_lock();
-	PyThread_acquire_lock(interpreter_lock, 1);
+	create_gil();
+	take_gil(PyThreadState_GET());
 	main_thread = PyThread_get_thread_ident();
+	if (!pending_lock)
+		pending_lock = PyThread_allocate_lock();
 }
 
 void
 PyEval_AcquireLock(void)
 {
-	PyThread_acquire_lock(interpreter_lock, 1);
+	PyThreadState *tstate = PyThreadState_GET();
+	if (tstate == NULL)
+		Py_FatalError("PyEval_AcquireLock: current thread state is NULL");
+	take_gil(tstate);
 }
 
 void
 PyEval_ReleaseLock(void)
 {
-	PyThread_release_lock(interpreter_lock);
+	/* This function must succeed when the current thread state is NULL.
+	   We therefore avoid PyThreadState_GET() which dumps a fatal error
+	   in debug mode.
+	*/
+	drop_gil(_PyThreadState_Current);
 }
 
 void
@@ -247,8 +289,8 @@
 	if (tstate == NULL)
 		Py_FatalError("PyEval_AcquireThread: NULL new thread state");
 	/* Check someone has called PyEval_InitThreads() to create the lock */
-	assert(interpreter_lock);
-	PyThread_acquire_lock(interpreter_lock, 1);
+	assert(gil_created());
+	take_gil(tstate);
 	if (PyThreadState_Swap(tstate) != NULL)
 		Py_FatalError(
 			"PyEval_AcquireThread: non-NULL old thread state");
@@ -261,7 +303,7 @@
 		Py_FatalError("PyEval_ReleaseThread: NULL thread state");
 	if (PyThreadState_Swap(NULL) != tstate)
 		Py_FatalError("PyEval_ReleaseThread: wrong thread state");
-	PyThread_release_lock(interpreter_lock);
+	drop_gil(tstate);
 }
 
 /* This function is called from PyOS_AfterFork to ensure that newly
@@ -273,16 +315,17 @@
 PyEval_ReInitThreads(void)
 {
 	PyObject *threading, *result;
-	PyThreadState *tstate;
-
-	if (!interpreter_lock)
+	PyThreadState *tstate = PyThreadState_GET();
+
+	if (!gil_created())
 		return;
 	/*XXX Can't use PyThread_free_lock here because it does too
 	  much error-checking.  Doing this cleanly would require
 	  adding a new function to each thread_*.h.  Instead, just
 	  create a new lock and waste a little bit of memory */
-	interpreter_lock = PyThread_allocate_lock();
-	PyThread_acquire_lock(interpreter_lock, 1);
+	recreate_gil();
+	pending_lock = PyThread_allocate_lock();
+	take_gil(tstate);
 	main_thread = PyThread_get_thread_ident();
 
 	/* Update the threading module with the new state.
@@ -302,7 +345,21 @@
 		Py_DECREF(result);
 	Py_DECREF(threading);
 }
-#endif
+
+#else
+static int eval_breaker = 0;
+static int gil_drop_request = 0;
+static int pending_async_exc = 0;
+#endif /* WITH_THREAD */
+
+/* This function is used to signal that async exceptions are waiting to be
+   raised, therefore it is also useful in non-threaded builds. */
+
+void
+_PyEval_SignalAsyncExc(void)
+{
+	SIGNAL_ASYNC_EXC();
+}
 
 /* Functions save_thread and restore_thread are always defined so
    dynamically loaded modules needn't be compiled separately for use
@@ -315,8 +372,8 @@
 	if (tstate == NULL)
 		Py_FatalError("PyEval_SaveThread: NULL tstate");
 #ifdef WITH_THREAD
-	if (interpreter_lock)
-		PyThread_release_lock(interpreter_lock);
+	if (gil_created())
+		drop_gil(tstate);
 #endif
 	return tstate;
 }
@@ -327,9 +384,9 @@
 	if (tstate == NULL)
 		Py_FatalError("PyEval_RestoreThread: NULL tstate");
 #ifdef WITH_THREAD
-	if (interpreter_lock) {
+	if (gil_created()) {
 		int err = errno;
-		PyThread_acquire_lock(interpreter_lock, 1);
+		take_gil(tstate);
 		errno = err;
 	}
 #endif
@@ -354,19 +411,146 @@
 #ifdef WITH_THREAD
    Any thread can schedule pending calls, but only the main thread
    will execute them.
+   There is no facility to schedule calls to a particular thread, but
+   that should be easy to change, should that ever be required.  In
+   that case, the static variables here should go into the python
+   threadstate.
 #endif
-
-   XXX WARNING!  ASYNCHRONOUSLY EXECUTING CODE!
+*/
+
+#ifdef WITH_THREAD
+
+/* The WITH_THREAD implementation is thread-safe.  It allows
+   scheduling to be made from any thread, and even from an executing
+   callback.
+ */
+
+#define NPENDINGCALLS 32
+static struct {
+	int (*func)(void *);
+	void *arg;
+} pendingcalls[NPENDINGCALLS];
+static int pendingfirst = 0;
+static int pendinglast = 0;
+static char pendingbusy = 0;
+
+int
+Py_AddPendingCall(int (*func)(void *), void *arg)
+{
+	int i, j, result=0;
+	PyThread_type_lock lock = pending_lock;
+	
+	/* try a few times for the lock.  Since this mechanism is used
+	 * for signal handling (on the main thread), there is a (slim)
+	 * chance that a signal is delivered on the same thread while we
+	 * hold the lock during the Py_MakePendingCalls() function.
+	 * This avoids a deadlock in that case.
+	 * Note that signals can be delivered on any thread.  In particular,
+	 * on Windows, a SIGINT is delivered on a system-created worker
+	 * thread.
+	 * We also check for lock being NULL, in the unlikely case that
+	 * this function is called before any bytecode evaluation takes place.
+	 */
+	if (lock != NULL) {
+		for (i = 0; i<100; i++) {
+			if (PyThread_acquire_lock(lock, NOWAIT_LOCK))
+				break;
+		}
+		if (i == 100)
+			return -1;
+	}
+
+	i = pendinglast;
+	j = (i + 1) % NPENDINGCALLS;
+	if (j == pendingfirst) {
+		result = -1; /* Queue full */
+	} else {
+		pendingcalls[i].func = func;
+		pendingcalls[i].arg = arg;
+		pendinglast = j;
+	}
+	/* signal main loop */
+	SIGNAL_PENDING_CALLS();
+	if (lock != NULL)
+		PyThread_release_lock(lock);
+	return result;
+}
+
+int
+Py_MakePendingCalls(void)
+{
+	int i;
+	int r = 0;
+
+	if (!pending_lock) {
+		/* initial allocation of the lock */
+		pending_lock = PyThread_allocate_lock();
+		if (pending_lock == NULL)
+			return -1;
+	}
+
+	/* only service pending calls on main thread */
+	if (main_thread && PyThread_get_thread_ident() != main_thread)
+		return 0; 
+	/* don't perform recursive pending calls */
+	if (pendingbusy)
+		return 0;
+	pendingbusy = 1;
+	/* perform a bounded number of calls, in case of recursion */
+	for (i=0; i<NPENDINGCALLS; i++) {
+		int j;  
+		int (*func)(void *);
+		void *arg;
+		
+		/* pop one item off the queue while holding the lock */
+		PyThread_acquire_lock(pending_lock, WAIT_LOCK);
+		j = pendingfirst;
+		if (j == pendinglast) {
+			func = NULL; /* Queue empty */
+		} else {
+			func = pendingcalls[j].func;
+			arg = pendingcalls[j].arg;
+			pendingfirst = (j + 1) % NPENDINGCALLS;
+		}
+		if (pendingfirst != pendinglast)
+			SIGNAL_PENDING_CALLS();
+		else
+			UNSIGNAL_PENDING_CALLS();
+		PyThread_release_lock(pending_lock);
+		/* having released the lock, perform the callback */
+		if (func == NULL)
+			break;
+		r = func(arg);
+		if (r)
+			break;
+	}
+	pendingbusy = 0;
+	return r;
+}
+
+#else /* if ! defined WITH_THREAD */
+
+/*
+   WARNING!  ASYNCHRONOUSLY EXECUTING CODE!
+   This code is used for signal handling in python that isn't built
+   with WITH_THREAD.
+   Don't use this implementation when Py_AddPendingCalls() can happen
+   on a different thread!
+ 
    There are two possible race conditions:
-   (1) nested asynchronous registry calls;
-   (2) registry calls made while pending calls are being processed.
-   While (1) is very unlikely, (2) is a real possibility.
+   (1) nested asynchronous calls to Py_AddPendingCall()
+   (2) AddPendingCall() calls made while pending calls are being processed.
+   
+   (1) is very unlikely because typically signal delivery
+   is blocked during signal handling.  So it should be impossible.
+   (2) is a real possibility.
    The current code is safe against (2), but not against (1).
    The safety against (2) is derived from the fact that only one
-   thread (the main thread) ever takes things out of the queue.
-
-   XXX Darn!  With the advent of thread state, we should have an array
-   of pending calls per thread in the thread state!  Later...
+   thread is present, interrupted by signals, and that the critical
+   section is protected with the "busy" variable.  On Windows, which
+   delivers SIGINT on a system thread, this does not hold and therefore
+   Windows really shouldn't use this version.
+   The two threads could theoretically wiggle around the "busy" variable.
 */
 
 #define NPENDINGCALLS 32
@@ -376,7 +560,7 @@
 } pendingcalls[NPENDINGCALLS];
 static volatile int pendingfirst = 0;
 static volatile int pendinglast = 0;
-static volatile int things_to_do = 0;
+static volatile int pendingcalls_to_do = 0;
 
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
@@ -384,8 +568,6 @@
 	static volatile int busy = 0;
 	int i, j;
 	/* XXX Begin critical section */
-	/* XXX If you want this to be safe against nested
-	   XXX asynchronous calls, you'll have to work harder! */
 	if (busy)
 		return -1;
 	busy = 1;
@@ -399,8 +581,7 @@
 	pendingcalls[i].arg = arg;
 	pendinglast = j;
 
-	_Py_Ticker = 0;
-	things_to_do = 1; /* Signal main loop */
+	SIGNAL_PENDING_CALLS();
 	busy = 0;
 	/* XXX End critical section */
 	return 0;
@@ -410,14 +591,10 @@
 Py_MakePendingCalls(void)
 {
 	static int busy = 0;
-#ifdef WITH_THREAD
-	if (main_thread && PyThread_get_thread_ident() != main_thread)
-		return 0;
-#endif
 	if (busy)
 		return 0;
 	busy = 1;
-	things_to_do = 0;
+	UNSIGNAL_PENDING_CALLS();
 	for (;;) {
 		int i;
 		int (*func)(void *);
@@ -430,7 +607,7 @@
 		pendingfirst = (i + 1) % NPENDINGCALLS;
 		if (func(arg) < 0) {
 			busy = 0;
-			things_to_do = 1; /* We're not done yet */
+			SIGNAL_PENDING_CALLS(); /* We're not done yet */
 			return -1;
 		}
 	}
@@ -438,6 +615,8 @@
 	return 0;
 }
 
+#endif /* WITH_THREAD */
+
 
 /* The interpreter's recursion limit */
 
@@ -502,10 +681,7 @@
 static enum why_code do_raise(PyObject *, PyObject *, PyObject *);
 static int unpack_iterable(PyObject *, int, PyObject **);
 
-/* for manipulating the thread switch and periodic "stuff" - used to be
-   per thread, now just a pair o' globals */
-int _Py_CheckInterval = 100;
-volatile int _Py_Ticker = 100;
+
 
 PyObject *
 PyEval_EvalCode(PyCodeObject *co, PyObject *globals, PyObject *locals)
@@ -809,60 +985,51 @@
 		/* Do periodic things.  Doing this every time through
 		   the loop would add too much overhead, so we do it
 		   only every Nth instruction.  We also do it if
-		   ``things_to_do'' is set, i.e. when an asynchronous
+		   ``pendingcalls_to_do'' is set, i.e. when an asynchronous
 		   event needs attention (e.g. a signal handler or
 		   async I/O handler); see Py_AddPendingCall() and
 		   Py_MakePendingCalls() above. */
 
-		if (--_Py_Ticker < 0) {
+                   if (eval_breaker) {
                         if (*next_instr == SETUP_FINALLY) {
                                 /* Make the last opcode before
                                    a try: finally: block uninterruptable. */
                                 goto fast_next_opcode;
                         }
-			_Py_Ticker = _Py_CheckInterval;
 			tstate->tick_counter++;
 #ifdef WITH_TSC
 			ticked = 1;
 #endif
-			if (things_to_do) {
+			if (pendingcalls_to_do) {
 				if (Py_MakePendingCalls() < 0) {
 					why = WHY_EXCEPTION;
 					goto on_error;
 				}
-				if (things_to_do)
-					/* MakePendingCalls() didn't succeed.
-					   Force early re-execution of this
-					   "periodic" code, possibly after
-					   a thread switch */
-					_Py_Ticker = 0;
 			}
+			if (gil_drop_request) {
 #ifdef WITH_THREAD
-			if (interpreter_lock) {
 				/* Give another thread a chance */
-
 				if (PyThreadState_Swap(NULL) != tstate)
 					Py_FatalError("ceval: tstate mix-up");
-				PyThread_release_lock(interpreter_lock);
-
+				drop_gil(tstate);
+	
 				/* Other threads may run now */
-
-				PyThread_acquire_lock(interpreter_lock, 1);
+	
+				take_gil(tstate);
 				if (PyThreadState_Swap(tstate) != NULL)
 					Py_FatalError("ceval: orphan tstate");
-
-				/* Check for thread interrupts */
-
-				if (tstate->async_exc != NULL) {
-					x = tstate->async_exc;
-					tstate->async_exc = NULL;
-					PyErr_SetNone(x);
-					Py_DECREF(x);
-					why = WHY_EXCEPTION;
-					goto on_error;
-				}
+#endif
 			}
-#endif
+			/* Check for asynchronous exceptions. */
+			if (tstate->async_exc != NULL) {
+				x = tstate->async_exc;
+				tstate->async_exc = NULL;
+				UNSIGNAL_ASYNC_EXC();
+				PyErr_SetNone(x);
+				Py_DECREF(x);
+				why = WHY_EXCEPTION;
+				goto on_error;
+			}
 		}
 
 	fast_next_opcode:
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
new file mode 100644
--- /dev/null
+++ b/Python/ceval_gil.h
@@ -0,0 +1,335 @@
+/*
+ * Implementation of the Global Interpreter Lock (GIL).
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+
+
+/* First some general settings */
+
+/* microseconds (the Python API uses seconds, though) */
+#define DEFAULT_INTERVAL 5000
+static unsigned long gil_interval = DEFAULT_INTERVAL;
+#define INTERVAL (gil_interval >= 1 ? gil_interval : 1)
+
+/* Enable if you want to force the switching of threads at least every `gil_interval` */
+#undef FORCE_SWITCHING
+#define FORCE_SWITCHING
+
+
+/*
+   Notes about the implementation:
+
+   - The GIL is just a boolean variable (gil_locked) whose access is protected
+     by a mutex (gil_mutex), and whose changes are signalled by a condition
+     variable (gil_cond). gil_mutex is taken for short periods of time,
+     and therefore mostly uncontended.
+
+   - In the GIL-holding thread, the main loop (PyEval_EvalFrameEx) must be
+     able to release the GIL on demand by another thread. A volatile boolean
+     variable (gil_drop_request) is used for that purpose, which is checked
+     at every turn of the eval loop. That variable is set after a wait of
+     `interval` microseconds on `gil_cond` has timed out.
+      
+      [Actually, another volatile boolean variable (eval_breaker) is used
+       which ORs several conditions into one. Volatile booleans are
+       sufficient as inter-thread signalling means since Python is run
+       on cache-coherent architectures only.]
+
+   - A thread wanting to take the GIL will first let pass a given amount of
+     time (`interval` microseconds) before setting gil_drop_request. This
+     encourages a defined switching period, but doesn't enforce it since
+     opcodes can take an arbitrary time to execute.
+ 
+     The `interval` value is available for the user to read and modify
+     using the Python API `sys.{get,set}switchinterval()`.
+
+   - When a thread releases the GIL and gil_drop_request is set, that thread
+     ensures that another GIL-awaiting thread gets scheduled.
+     It does so by waiting on a condition variable (switch_cond) until
+     the value of gil_last_holder is changed to something else than its
+     own thread state pointer, indicating that another thread was able to
+     take the GIL.
+ 
+     This is meant to prohibit the latency-adverse behaviour on multi-core
+     machines where one thread would speculatively release the GIL, but still
+     run and end up being the first to re-acquire it, making the "timeslices"
+     much longer than expected.
+     (Note: this mechanism is enabled with FORCE_SWITCHING above)
+*/
+
+#ifndef _POSIX_THREADS
+/* This means pthreads are not implemented in libc headers, hence the macro
+   not present in unistd.h. But they still can be implemented as an external
+   library (e.g. gnu pth in pthread emulation) */
+# ifdef HAVE_PTHREAD_H
+#  include <pthread.h> /* _POSIX_THREADS */
+# endif
+#endif
+
+
+#ifdef _POSIX_THREADS
+
+/*
+ * POSIX support
+ */
+
+#include <pthread.h>
+
+#define ADD_MICROSECONDS(tv, interval) \
+do { \
+    tv.tv_usec += (long) interval; \
+    tv.tv_sec += tv.tv_usec / 1000000; \
+    tv.tv_usec %= 1000000; \
+} while (0)
+
+/* We assume all modern POSIX systems have gettimeofday() */
+#ifdef GETTIMEOFDAY_NO_TZ
+#define GETTIMEOFDAY(ptv) gettimeofday(ptv)
+#else
+#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL)
+#endif
+
+#define MUTEX_T pthread_mutex_t
+#define MUTEX_INIT(mut) \
+    if (pthread_mutex_init(&mut, NULL)) { \
+        Py_FatalError("pthread_mutex_init(" #mut ") failed"); };
+#define MUTEX_LOCK(mut) \
+    if (pthread_mutex_lock(&mut)) { \
+        Py_FatalError("pthread_mutex_lock(" #mut ") failed"); };
+#define MUTEX_UNLOCK(mut) \
+    if (pthread_mutex_unlock(&mut)) { \
+        Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); };
+
+#define COND_T pthread_cond_t
+#define COND_INIT(cond) \
+    if (pthread_cond_init(&cond, NULL)) { \
+        Py_FatalError("pthread_cond_init(" #cond ") failed"); };
+#define COND_PREPARE(cond)
+#define COND_SIGNAL(cond) \
+    if (pthread_cond_signal(&cond)) { \
+        Py_FatalError("pthread_cond_signal(" #cond ") failed"); };
+#define COND_WAIT(cond, mut) \
+    if (pthread_cond_wait(&cond, &mut)) { \
+        Py_FatalError("pthread_cond_wait(" #cond ") failed"); };
+#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
+    { \
+        int r; \
+        struct timespec ts; \
+        struct timeval deadline; \
+        \
+        GETTIMEOFDAY(&deadline); \
+        ADD_MICROSECONDS(deadline, microseconds); \
+        ts.tv_sec = deadline.tv_sec; \
+        ts.tv_nsec = deadline.tv_usec * 1000; \
+        \
+        r = pthread_cond_timedwait(&cond, &mut, &ts); \
+        if (r == ETIMEDOUT) \
+            timeout_result = 1; \
+        else if (r) \
+            Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \
+        else \
+            timeout_result = 0; \
+    } \
+
+#elif defined(NT_THREADS)
+
+/*
+ * Windows (2000 and later, as well as (hopefully) CE) support
+ */
+
+#include <windows.h>
+
+#define MUTEX_T HANDLE
+#define MUTEX_INIT(mut) \
+    if (!(mut = CreateMutex(NULL, FALSE, NULL))) { \
+        Py_FatalError("CreateMutex(" #mut ") failed"); };
+#define MUTEX_LOCK(mut) \
+    if (WaitForSingleObject(mut, INFINITE) != WAIT_OBJECT_0) { \
+        Py_FatalError("WaitForSingleObject(" #mut ") failed"); };
+#define MUTEX_UNLOCK(mut) \
+    if (!ReleaseMutex(mut)) { \
+        Py_FatalError("ReleaseMutex(" #mut ") failed"); };
+
+/* We emulate condition variables with events. It is sufficient here.
+   (WaitForMultipleObjects() allows the event to be caught and the mutex
+   to be taken atomically) */
+#define COND_T HANDLE
+#define COND_INIT(cond) \
+    /* auto-reset, non-signalled */ \
+    if (!(cond = CreateEvent(NULL, FALSE, FALSE, NULL))) { \
+        Py_FatalError("CreateMutex(" #cond ") failed"); };
+#define COND_PREPARE(cond) \
+    if (!ResetEvent(cond)) { \
+        Py_FatalError("ResetEvent(" #cond ") failed"); };
+#define COND_SIGNAL(cond) \
+    if (!SetEvent(cond)) { \
+        Py_FatalError("SetEvent(" #cond ") failed"); };
+#define COND_WAIT(cond, mut) \
+    { \
+        DWORD r; \
+        HANDLE objects[2] = { cond, mut }; \
+        MUTEX_UNLOCK(mut); \
+        r = WaitForMultipleObjects(2, objects, TRUE, INFINITE); \
+        if (r != WAIT_OBJECT_0) \
+            Py_FatalError("WaitForSingleObject(" #cond ") failed"); \
+    }
+#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
+    { \
+        DWORD r; \
+        HANDLE objects[2] = { cond, mut }; \
+        MUTEX_UNLOCK(mut); \
+        r = WaitForMultipleObjects(2, objects, TRUE, microseconds / 1000); \
+        if (r == WAIT_TIMEOUT) { \
+            MUTEX_LOCK(mut); \
+            timeout_result = 1; \
+        } \
+        else if (r != WAIT_OBJECT_0) \
+            Py_FatalError("WaitForSingleObject(" #cond ") failed"); \
+        else \
+            timeout_result = 0; \
+    }
+
+#else
+
+#error You need either a POSIX-compatible or a Windows system!
+
+#endif /* _POSIX_THREADS, NT_THREADS */
+
+
+/* Whether the GIL is already taken (-1 if uninitialized). This is volatile
+   because it can be read without any lock taken in ceval.c. */
+static volatile int gil_locked = -1;
+/* Number of GIL switches since the beginning. */
+static unsigned long gil_switch_number = 0;
+/* Last thread holding / having held the GIL. This helps us know whether
+   anyone else was scheduled after we dropped the GIL. */
+static PyThreadState *gil_last_holder = NULL;
+
+/* This condition variable allows one or several threads to wait until
+   the GIL is released. In addition, the mutex also protects the above
+   variables. */
+static COND_T gil_cond;
+static MUTEX_T gil_mutex;
+
+#ifdef FORCE_SWITCHING
+/* This condition variable helps the GIL-releasing thread wait for
+   a GIL-awaiting thread to be scheduled and take the GIL. */
+static COND_T switch_cond;
+static MUTEX_T switch_mutex;
+#endif
+
+
+static int gil_created(void)
+{
+    return gil_locked >= 0;
+}
+
+static void create_gil(void)
+{
+    MUTEX_INIT(gil_mutex);
+#ifdef FORCE_SWITCHING
+    MUTEX_INIT(switch_mutex);
+#endif
+    COND_INIT(gil_cond);
+#ifdef FORCE_SWITCHING
+    COND_INIT(switch_cond);
+#endif
+    gil_locked = 0;
+    gil_last_holder = NULL;
+}
+
+static void recreate_gil(void)
+{
+    create_gil();
+}
+
+static void drop_gil(PyThreadState *tstate)
+{
+    /* NOTE: tstate is allowed to be NULL. */
+    if (!gil_locked)
+        Py_FatalError("drop_gil: GIL is not locked");
+    if (tstate != NULL && tstate != gil_last_holder)
+        Py_FatalError("drop_gil: wrong thread state");
+
+    MUTEX_LOCK(gil_mutex);
+    gil_locked = 0;
+    COND_SIGNAL(gil_cond);
+#ifdef FORCE_SWITCHING
+    COND_PREPARE(switch_cond);
+#endif
+    MUTEX_UNLOCK(gil_mutex);
+    
+#ifdef FORCE_SWITCHING
+    if (gil_drop_request) {
+        MUTEX_LOCK(switch_mutex);
+        /* Not switched yet => wait */
+        if (gil_last_holder == tstate)
+            COND_WAIT(switch_cond, switch_mutex);
+        MUTEX_UNLOCK(switch_mutex);
+    }
+#endif
+}
+
+static void take_gil(PyThreadState *tstate)
+{
+    int err;
+    if (tstate == NULL)
+        Py_FatalError("take_gil: NULL tstate");
+
+    err = errno;
+    MUTEX_LOCK(gil_mutex);
+
+    if (!gil_locked)
+        goto _ready;
+    
+    COND_PREPARE(gil_cond);
+    while (gil_locked) {
+        int timed_out = 0;
+        unsigned long saved_switchnum;
+
+        saved_switchnum = gil_switch_number;
+        COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out);
+        /* If we timed out and no switch occurred in the meantime, it is time
+           to ask the GIL-holding thread to drop it. */
+        if (timed_out && gil_locked && gil_switch_number == saved_switchnum) {
+            SET_GIL_DROP_REQUEST();
+        }
+    }
+_ready:
+#ifdef FORCE_SWITCHING
+    /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */
+    MUTEX_LOCK(switch_mutex);
+#endif
+    /* We now hold the GIL */
+    gil_locked = 1;
+
+    if (tstate != gil_last_holder) {
+        gil_last_holder = tstate;
+        ++gil_switch_number;
+    }
+#ifdef FORCE_SWITCHING
+    COND_SIGNAL(switch_cond);
+    MUTEX_UNLOCK(switch_mutex);
+#endif
+    if (gil_drop_request) {
+        RESET_GIL_DROP_REQUEST();
+    }
+    if (tstate->async_exc != NULL) {
+        _PyEval_SignalAsyncExc();
+    }
+    
+    MUTEX_UNLOCK(gil_mutex);
+    errno = err;
+}
+
+void _PyEval_SetSwitchInterval(unsigned long microseconds)
+{
+    gil_interval = microseconds;
+}
+
+unsigned long _PyEval_GetSwitchInterval()
+{
+    return gil_interval;
+}
diff --git a/Python/pystate.c b/Python/pystate.c
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -395,6 +395,7 @@
 			p->async_exc = exc;
 			HEAD_UNLOCK();
 			Py_XDECREF(old_exc);
+			_PyEval_SignalAsyncExc();
 			return 1;
 		}
 	}
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -416,10 +416,18 @@
 and return.  See the profiler chapter in the library manual."
 );
 
+/* TODO: deprecate */
+static int _check_interval = 100;
+
 static PyObject *
 sys_setcheckinterval(PyObject *self, PyObject *args)
 {
-	if (!PyArg_ParseTuple(args, "i:setcheckinterval", &_Py_CheckInterval))
+	if (PyErr_WarnEx(PyExc_DeprecationWarning,
+			 "sys.getcheckinterval() and sys.setcheckinterval() "
+			 "are deprecated.  Use sys.setswitchinterval() "
+			 "instead.", 1) < 0)
+		return NULL;
+	if (!PyArg_ParseTuple(args, "i:setcheckinterval", &_check_interval))
 		return NULL;
 	Py_INCREF(Py_None);
 	return Py_None;
@@ -435,13 +443,59 @@
 static PyObject *
 sys_getcheckinterval(PyObject *self, PyObject *args)
 {
-	return PyInt_FromLong(_Py_CheckInterval);
+	if (PyErr_WarnEx(PyExc_DeprecationWarning,
+			 "sys.getcheckinterval() and sys.setcheckinterval() "
+			 "are deprecated.  Use sys.getswitchinterval() "
+			 "instead.", 1) < 0)
+		return NULL;
+	return PyLong_FromLong(_check_interval);
 }
 
 PyDoc_STRVAR(getcheckinterval_doc,
 "getcheckinterval() -> current check interval; see setcheckinterval()."
 );
 
+#ifdef WITH_THREAD
+static PyObject *
+sys_setswitchinterval(PyObject *self, PyObject *args)
+{
+	double d;
+	if (!PyArg_ParseTuple(args, "d:setswitchinterval", &d))
+		return NULL;
+	if (d <= 0.0) {
+		PyErr_SetString(PyExc_ValueError,
+				"switch interval must be strictly positive");
+		return NULL;
+	}
+	_PyEval_SetSwitchInterval((unsigned long) (1e6 * d));
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+PyDoc_STRVAR(setswitchinterval_doc,
+"setswitchinterval(n)\n\
+\n\
+Set the ideal thread switching delay inside the Python interpreter\n\
+The actual frequency of switching threads can be lower if the\n\
+interpreter executes long sequences of uninterruptible code\n\
+(this is implementation-specific and workload-dependent).\n\
+\n\
+The parameter must represent the desired switching delay in seconds\n\
+A typical value is 0.005 (5 milliseconds)."
+);
+
+static PyObject *
+sys_getswitchinterval(PyObject *self, PyObject *args)
+{
+	return PyFloat_FromDouble(1e-6 * _PyEval_GetSwitchInterval());
+}
+
+PyDoc_STRVAR(getswitchinterval_doc,
+"getswitchinterval() -> current thread switch interval; see setswitchinterval()."
+);
+
+#endif /* WITH_THREAD */
+
 #ifdef WITH_TSC
 static PyObject *
 sys_settscdump(PyObject *self, PyObject *args)
@@ -787,6 +841,12 @@
 	 setcheckinterval_doc},
 	{"getcheckinterval",	sys_getcheckinterval, METH_NOARGS,
 	 getcheckinterval_doc},
+#ifdef WITH_THREAD
+	{"setswitchinterval",	sys_setswitchinterval, METH_VARARGS,
+	 setswitchinterval_doc},
+	{"getswitchinterval",	sys_getswitchinterval, METH_NOARGS,
+	 getswitchinterval_doc},
+#endif
 #ifdef HAVE_DLOPEN
 	{"setdlopenflags", sys_setdlopenflags, METH_VARARGS,
 	 setdlopenflags_doc},
_______________________________________________
Python-Dev mailing list
Python-Dev@python.org
http://mail.python.org/mailman/listinfo/python-dev
Unsubscribe: 
http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com

Reply via email to