Author: aconway Date: Thu Oct 23 14:00:06 2014 New Revision: 1633814 URL: http://svn.apache.org/r1633814 Log: DISPATCH-72: Sporadic core dumps in dispatch tests.
Added missing lock scopes around calls to python code, this resoloved the sporadic core dumps (over 1000 test runs overnight with no failures) Initially I speculated that the problem was that we don't lock the GIL as recommended by the python C API docs https://docs.python.org/2/c-api/init.html#non-python-created-threads. Instead we use a separate mutex in the C code. I attempted to use the GIL but that caused deadlocks. They appeared to be associated with locking a python mutex in the management agent - this causes python to unlock and relock the GIL. That allows other threads into the python code which somehow causes deadlock - I am not sure exactly why. I believe it is OK to use the separate C mutex *provided* we never create threads in python code. If we do that then I think we will need to re-visit the GIL and figure out how to lock it correctly. Modified: qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h qpid/dispatch/trunk/include/qpid/dispatch/threading.h qpid/dispatch/trunk/src/dispatch.c qpid/dispatch/trunk/src/error.c qpid/dispatch/trunk/src/posix/threading.c qpid/dispatch/trunk/src/python_embedded.c qpid/dispatch/trunk/src/router_agent.c qpid/dispatch/trunk/src/router_pynode.c Modified: qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h?rev=1633814&r1=1633813&r2=1633814&view=diff ============================================================================== --- qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h (original) +++ qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h Thu Oct 23 14:00:06 2014 @@ -76,7 +76,8 @@ PyObject *qd_field_to_py(qd_parsed_field * These are temporary and will eventually be replaced by having an internal python * work queue that feeds a dedicated embedded-python thread. */ -void qd_python_lock(void); -void qd_python_unlock(void); +typedef PyGILState_STATE qd_python_lock_state_t; +qd_python_lock_state_t qd_python_lock(void); +void qd_python_unlock(qd_python_lock_state_t state); #endif Modified: qpid/dispatch/trunk/include/qpid/dispatch/threading.h URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/include/qpid/dispatch/threading.h?rev=1633814&r1=1633813&r2=1633814&view=diff ============================================================================== --- qpid/dispatch/trunk/include/qpid/dispatch/threading.h (original) +++ qpid/dispatch/trunk/include/qpid/dispatch/threading.h Thu Oct 23 14:00:06 2014 @@ -51,4 +51,10 @@ sys_thread_t *sys_thread(void *(*run_fun void sys_thread_free(sys_thread_t *thread); void sys_thread_join(sys_thread_t *thread); +/** Return the OS identifier for this thread */ +long sys_thread_id(sys_thread_t *thread); + +/** Return the OS identifier for the current thread */ +long sys_thread_self(); + #endif Modified: qpid/dispatch/trunk/src/dispatch.c URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/dispatch.c?rev=1633814&r1=1633813&r2=1633814&view=diff ============================================================================== --- qpid/dispatch/trunk/src/dispatch.c (original) +++ qpid/dispatch/trunk/src/dispatch.c Thu Oct 23 14:00:06 2014 @@ -78,15 +78,16 @@ STATIC_ASSERT(sizeof(long) >= sizeof(voi qd_error_t qd_dispatch_load_config(qd_dispatch_t *qd, const char *config_path) { + qd_python_lock_state_t lock_state = qd_python_lock(); PyObject *module = PyImport_ImportModule("qpid_dispatch_internal.management.config"); - if (!module) return qd_error_py(); - PyObject *configure_dispatch = PyObject_GetAttrString(module, "configure_dispatch"); - Py_DECREF(module); - if (!configure_dispatch) return qd_error_py(); - PyObject *result = PyObject_CallFunction(configure_dispatch, "(ls)", (long)qd, config_path); - Py_DECREF(configure_dispatch); - if (!result) return qd_error_py(); - return QD_ERROR_NONE; + PyObject *configure_dispatch = module ? PyObject_GetAttrString(module, "configure_dispatch") : NULL; + Py_XDECREF(module); + PyObject *result = configure_dispatch ? PyObject_CallFunction(configure_dispatch, "(ls)", (long)qd, config_path) : NULL; + Py_XDECREF(configure_dispatch); + if (!result) qd_error_py(); + Py_XDECREF(result); + qd_python_unlock(lock_state); + return qd_error_code(); } Modified: qpid/dispatch/trunk/src/error.c URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/error.c?rev=1633814&r1=1633813&r2=1633814&view=diff ============================================================================== --- qpid/dispatch/trunk/src/error.c (original) +++ qpid/dispatch/trunk/src/error.c Thu Oct 23 14:00:06 2014 @@ -144,16 +144,20 @@ qd_error_t qd_error_py_impl(const char * PyObject *type, *value, *trace; PyErr_Fetch(&type, &value, &trace); /* Note clears the python error indicator */ - PyObject *type_name = type ? PyObject_GetAttrString(type, "__name__") : NULL; - PyErr_Print(); - PyObject *value_str = value ? PyObject_Str(value) : NULL; - const char* str = value_str ? PyString_AsString(value_str) : "Unknown"; + PyObject *py_type_name = type ? PyObject_GetAttrString(type, "__name__") : NULL; + const char *type_name = py_type_name ? PyString_AsString(py_type_name) : NULL; + + PyObject *py_value_str = value ? PyObject_Str(value) : NULL; + const char *value_str = py_value_str ? PyString_AsString(py_value_str) : NULL; + if (!value_str) value_str = "Unknown"; + + PyErr_Clear(); /* Ignore errors while we're trying to build the values. */ if (type_name) - qd_error_impl(QD_ERROR_PYTHON, file, line, "%s: %s", PyString_AsString(type_name), str); + qd_error_impl(QD_ERROR_PYTHON, file, line, "%s: %s", type_name, value_str); else - qd_error_impl(QD_ERROR_PYTHON, file, line, "%s", str); - Py_XDECREF(value_str); - Py_XDECREF(type_name); + qd_error_impl(QD_ERROR_PYTHON, file, line, "%s", value_str); + Py_XDECREF(py_value_str); + Py_XDECREF(py_type_name); log_trace_py(type, value, trace, QD_LOG_ERROR, file, line); Modified: qpid/dispatch/trunk/src/posix/threading.c URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/posix/threading.c?rev=1633814&r1=1633813&r2=1633814&view=diff ============================================================================== --- qpid/dispatch/trunk/src/posix/threading.c (original) +++ qpid/dispatch/trunk/src/posix/threading.c Thu Oct 23 14:00:06 2014 @@ -24,14 +24,27 @@ struct sys_mutex_t { pthread_mutex_t mutex; +#ifndef NDEBUG + // In a debug build, used to assert correct use of mutex. int acquired; +#endif }; +// NOTE: normally it is incorrect for an assert expression to have side effects, +// since it could change the behavior between a debug and a release build. In +// this case however the mutex->acquired field only exists in a debug build, so +// we want operations on mutex->acquired to be compiled out of a release build. +#define ACQUIRE(mutex) assert(!mutex->acquired++) +#define RELEASE(mutex) assert(!--mutex->acquired) + + sys_mutex_t *sys_mutex(void) { sys_mutex_t *mutex = NEW(sys_mutex_t); pthread_mutex_init(&(mutex->mutex), 0); +#ifndef NDEBUG mutex->acquired = 0; +#endif return mutex; } @@ -47,15 +60,13 @@ void sys_mutex_free(sys_mutex_t *mutex) void sys_mutex_lock(sys_mutex_t *mutex) { pthread_mutex_lock(&(mutex->mutex)); - assert(!mutex->acquired); - mutex->acquired++; + ACQUIRE(mutex); } void sys_mutex_unlock(sys_mutex_t *mutex) { - mutex->acquired--; - assert(!mutex->acquired); + RELEASE(mutex); pthread_mutex_unlock(&(mutex->mutex)); } @@ -82,10 +93,9 @@ void sys_cond_free(sys_cond_t *cond) void sys_cond_wait(sys_cond_t *cond, sys_mutex_t *held_mutex) { - assert(held_mutex->acquired); - held_mutex->acquired--; + RELEASE(held_mutex); pthread_cond_wait(&(cond->cond), &(held_mutex->mutex)); - held_mutex->acquired++; + ACQUIRE(held_mutex); } @@ -150,6 +160,13 @@ sys_thread_t *sys_thread(void *(*run_fun return thread; } +long sys_thread_id(sys_thread_t *thread) { + return (long) thread->thread; +} + +long sys_thread_self() { + return pthread_self(); +} void sys_thread_free(sys_thread_t *thread) { Modified: qpid/dispatch/trunk/src/python_embedded.c URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/python_embedded.c?rev=1633814&r1=1633813&r2=1633814&view=diff ============================================================================== --- qpid/dispatch/trunk/src/python_embedded.c (original) +++ qpid/dispatch/trunk/src/python_embedded.c Thu Oct 23 14:00:06 2014 @@ -441,7 +441,7 @@ static PyObject *py_iter_parse(qd_field_ } PyObject *value = qd_field_to_py(parsed); qd_parse_free(parsed); - qd_error_py(); + if (!value) qd_error_py(); return value; } qd_error(QD_ERROR_MESSAGE, "Failed to parse message field"); @@ -489,12 +489,14 @@ static void qd_io_rx_handler(void *conte if (!qd_message_check(msg, QD_DEPTH_BODY)) return; + // This is called from non-python threads so we need to acquire the GIL to use python APIS. + qd_python_lock_state_t lock_state = qd_python_lock(); PyObject *py_msg = PyObject_CallFunction(message_type, NULL); if (!py_msg) { qd_error_py(); + qd_python_unlock(lock_state); return; } - iter_to_py_attr(qd_message_field_iterator(msg, QD_FIELD_TO), py_iter_copy, py_msg, "address"); iter_to_py_attr(qd_message_field_iterator(msg, QD_FIELD_REPLY_TO), py_iter_copy, py_msg, "reply_to"); // Note: correlation ID requires _typed() @@ -502,13 +504,12 @@ static void qd_io_rx_handler(void *conte iter_to_py_attr(qd_message_field_iterator(msg, QD_FIELD_APPLICATION_PROPERTIES), py_iter_parse, py_msg, "properties"); iter_to_py_attr(qd_message_field_iterator(msg, QD_FIELD_BODY), py_iter_parse, py_msg, "body"); - sys_mutex_lock(ilock); PyObject *value = PyObject_CallFunction(self->handler, "Ol", py_msg, link_id); - sys_mutex_unlock(ilock); Py_DECREF(py_msg); Py_XDECREF(value); qd_error_py(); + qd_python_unlock(lock_state); } @@ -730,12 +731,13 @@ static void qd_python_setup(void) } } -void qd_python_lock(void) +qd_python_lock_state_t qd_python_lock(void) { sys_mutex_lock(ilock); + return 0; } -void qd_python_unlock(void) +void qd_python_unlock(qd_python_lock_state_t lock_state) { sys_mutex_unlock(ilock); } Modified: qpid/dispatch/trunk/src/router_agent.c URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/router_agent.c?rev=1633814&r1=1633813&r2=1633814&view=diff ============================================================================== --- qpid/dispatch/trunk/src/router_agent.c (original) +++ qpid/dispatch/trunk/src/router_agent.c Thu Oct 23 14:00:06 2014 @@ -256,7 +256,7 @@ qd_error_t qd_c_entity_update_router_lin qd_router_link_t *link = (qd_router_link_t*) impl; /* FIXME aconway 2014-10-17: old management used link->bit_mask as name/identity, * but even when prefixed with router.link this is not unique. Let python agent - * generate a name for now, revisit with a better name later. + * generate a name for now. A better ID would be router.link:<remote container>.<link name>. */ if (!qd_entity_set_string(entity, "linkType", qd_link_type_name(link->link_type)) && !qd_entity_set_string(entity, "linkDir", (link->link_direction == QD_INCOMING) ? "in": "out") && Modified: qpid/dispatch/trunk/src/router_pynode.c URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/router_pynode.c?rev=1633814&r1=1633813&r2=1633814&view=diff ============================================================================== --- qpid/dispatch/trunk/src/router_pynode.c (original) +++ qpid/dispatch/trunk/src/router_pynode.c Thu Oct 23 14:00:06 2014 @@ -632,12 +632,12 @@ qd_error_t qd_pyrouter_tick(qd_router_t PyObject *pValue; if (pyTick && router->router_mode == QD_ROUTER_MODE_INTERIOR) { - qd_python_lock(); + qd_python_lock_state_t lock_state = qd_python_lock(); pArgs = PyTuple_New(0); pValue = PyObject_CallObject(pyTick, pArgs); Py_DECREF(pArgs); Py_XDECREF(pValue); - qd_python_unlock(); + qd_python_unlock(lock_state); } return qd_error_py(); } @@ -652,14 +652,14 @@ void qd_router_mobile_added(qd_router_t qd_field_iterator_reset_view(iter, ITER_VIEW_ADDRESS_HASH); char *address = (char*) qd_field_iterator_copy(iter); - qd_python_lock(); + qd_python_lock_state_t lock_state = qd_python_lock(); pArgs = PyTuple_New(1); PyTuple_SetItem(pArgs, 0, PyString_FromString(address)); pValue = PyObject_CallObject(pyAdded, pArgs); qd_error_py(); Py_DECREF(pArgs); Py_XDECREF(pValue); - qd_python_unlock(); + qd_python_unlock(lock_state); free(address); } @@ -672,14 +672,14 @@ void qd_router_mobile_removed(qd_router_ PyObject *pValue; if (pyRemoved && router->router_mode == QD_ROUTER_MODE_INTERIOR) { - qd_python_lock(); + qd_python_lock_state_t lock_state = qd_python_lock(); pArgs = PyTuple_New(1); PyTuple_SetItem(pArgs, 0, PyString_FromString(address)); pValue = PyObject_CallObject(pyRemoved, pArgs); qd_error_py(); Py_DECREF(pArgs); Py_XDECREF(pValue); - qd_python_unlock(); + qd_python_unlock(lock_state); } } @@ -690,14 +690,14 @@ void qd_router_link_lost(qd_router_t *ro PyObject *pValue; if (pyRemoved && router->router_mode == QD_ROUTER_MODE_INTERIOR) { - qd_python_lock(); + qd_python_lock_state_t lock_state = qd_python_lock(); pArgs = PyTuple_New(1); PyTuple_SetItem(pArgs, 0, PyInt_FromLong((long) link_mask_bit)); pValue = PyObject_CallObject(pyLinkLost, pArgs); qd_error_py(); Py_DECREF(pArgs); Py_XDECREF(pValue); - qd_python_unlock(); + qd_python_unlock(lock_state); } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
