Steven Dake napsal(a):
Looks really good. one point below about error handling when writing to full pipe.Regards -steve On 06/16/2011 08:18 AM, Jan Friesse wrote:Following situation could happen: - one thread is waiting for finish write operation (line 853), objdb is locked - flush (done in objdb_notify_dispatch) is called in main thread, but this call will never appear because main thread is waiting for objdb lock. In this situation deadlock appears. Commit solves this by: - setting pipe to non-blocking mode - pipe is used only as trigger for coropoll - dispatch messages are stored in list - main thread is processing messages from list Signed-off-by: Jan Friesse <[email protected]> --- services/confdb.c | 143 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 119 insertions(+), 24 deletions(-) diff --git a/services/confdb.c b/services/confdb.c index 64def8a..fce7b7d 100644 --- a/services/confdb.c +++ b/services/confdb.c @@ -71,14 +71,22 @@ static int notify_pipe[2];struct confdb_ipc_message_holder {void *conn; - void *msg; size_t mlen; + struct list_head list; + char msg[]; };+DECLARE_LIST_INIT(confdb_ipc_message_holder_list_head);+ +pthread_mutex_t confdb_ipc_message_holder_list_mutex = + PTHREAD_MUTEX_INITIALIZER; + static int confdb_exec_init_fn ( struct corosync_api_v1 *corosync_api); static int confdb_exec_exit_fn(void);+static int fd_set_nonblocking(int fd);+ static int objdb_notify_dispatch(hdb_handle_t handle, int fd, int revents, void *data);@@ -322,6 +330,8 @@ static int confdb_exec_exit_fn(void)static int confdb_exec_init_fn ( struct corosync_api_v1 *corosync_api) { + int i; + #ifdef COROSYNC_SOLARIS logsys_subsys_init(); #endif @@ -331,6 +341,12 @@ static int confdb_exec_init_fn ( return -1; }+ for (i = 0; i < 2; i++) {+ if (fd_set_nonblocking (notify_pipe[i]) == -1) { + return -1; + } + } + return poll_dispatch_add(api->poll_handle_get(), notify_pipe[0], POLLIN, NULL, objdb_notify_dispatch); } @@ -353,6 +369,23 @@ static int confdb_lib_exit_fn (void *conn) return (0); }+static int fd_set_nonblocking(int fd)+{ + int flags; + int res; + + flags = fcntl (fd, F_GETFL); + if (flags == -1) { + return -1; + } + + flags |= O_NONBLOCK; + + res = fcntl (fd, F_SETFL, flags); + + return res; +} + static void message_handler_req_lib_confdb_object_create (void *conn, const void *message) { @@ -827,52 +860,114 @@ static void message_handler_req_lib_confdb_reload (void *conn, static int objdb_notify_dispatch(hdb_handle_t handle, int fd, int revents, void *data) { - struct confdb_ipc_message_holder holder; + struct confdb_ipc_message_holder *holder; ssize_t rc; + char pipe_cmd;if (revents & POLLHUP) {return -1; } + + pthread_mutex_lock (&confdb_ipc_message_holder_list_mutex); + retry_read: - rc = read(fd, &holder, sizeof(struct confdb_ipc_message_holder)); - if (rc == -1 && errno == EINTR) { - goto retry_read; + rc = read(fd, &pipe_cmd, sizeof(pipe_cmd)); + if (rc == sizeof(pipe_cmd)) { + goto retry_read; /* Flush whole buffer */ } - if (rc != sizeof(struct confdb_ipc_message_holder)) { - return 0; + + if (rc == -1) { + if (errno == EINTR) { + goto retry_read; + } + + if (errno != EAGAIN && errno != EWOULDBLOCK) { + goto unlock_exit; + } + } else { + goto unlock_exit; /* rc != -1 && rc != 1 -> end of file */ }- api->ipc_dispatch_send(holder.conn, holder.msg, holder.mlen);+ while (!list_empty (&confdb_ipc_message_holder_list_head)) { + holder = list_entry (confdb_ipc_message_holder_list_head.next, + struct confdb_ipc_message_holder, list);- api->ipc_refcnt_dec(holder.conn);+ list_del (&holder->list); + + /* + * All list operations are done now, so unlock list mutex to + * prevent deadlock in IPC. + */ + pthread_mutex_unlock (&confdb_ipc_message_holder_list_mutex); + + api->ipc_dispatch_send(holder->conn, holder->msg, holder->mlen); + + api->ipc_refcnt_dec(holder->conn); + + free(holder); + + /* + * Next operation is again list one, so lock list again. + */ + pthread_mutex_lock (&confdb_ipc_message_holder_list_mutex); + } + +unlock_exit: + pthread_mutex_unlock (&confdb_ipc_message_holder_list_mutex);- free(holder.msg);return 0; }static int32_t ipc_dispatch_send_from_poll_thread(void *conn, const void *msg, size_t mlen){ - struct confdb_ipc_message_holder holder; + struct confdb_ipc_message_holder *holder; ssize_t written; + size_t holder_size; + char pipe_cmd;api->ipc_refcnt_inc(conn); - holder.conn = conn;- holder.msg = malloc(mlen); - memcpy(holder.msg, msg, mlen); - holder.mlen = mlen; + holder_size = sizeof (*holder) + mlen; + holder = malloc (holder_size); + if (holder == NULL) { + return -1; + } + + memset(holder, 0, holder_size); + holder->conn = conn; + holder->mlen = mlen; + memcpy(holder->msg, msg, mlen); + list_init(&holder->list); + + pthread_mutex_lock (&confdb_ipc_message_holder_list_mutex); + + list_add_tail (&holder->list, &confdb_ipc_message_holder_list_head);+ pipe_cmd = 'M'; /* Message */retry_write: - written = write(notify_pipe[1], &holder, sizeof(struct confdb_ipc_message_holder)); - if (written == -1 && errno == EINTR) { - goto retry_write; - } - if (written == sizeof(struct confdb_ipc_message_holder)) { - return 0; - } else { - api->ipc_refcnt_dec(conn); - return -1; + written = write(notify_pipe[1], &pipe_cmd, sizeof(pipe_cmd)); + + if (written == -1) { + if (errno == EINTR) { + goto retry_write; + } + + if (errno != EAGAIN && errno != EWOULDBLOCK) { + goto refcnt_unlock_exit; /* different error then + block */ + } + } else if (written != sizeof (pipe_cmd)) { + goto refcnt_unlock_exit; }I'm pretty sure this isn't correct - what happens when written == -1 because of EAGAIN? The event is entirely lost. I could see where ths could cause failures in some bizarre reproduction scenarios. This condition needs to be handled.
Steve,
please take a look to that code again.
+ if (errno != EAGAIN && errno != EWOULDBLOCK) {
-> if (written == -1 && !(errno == EAGAIN || errno == EWOULDBLOCK)) {
goto error
so if errno == EAGAIN code continues on pthread_mutex_unlock.but truth is, that I found little different error with ipc_refcnt, so attached is better patch.
+ pthread_mutex_unlock (&confdb_ipc_message_holder_list_mutex); + + return 0; + +refcnt_unlock_exit: + api->ipc_refcnt_dec(conn); + pthread_mutex_unlock (&confdb_ipc_message_holder_list_mutex); + + return -1; }static void confdb_notify_lib_of_key_change(object_change_type_t change_type,
>From 036d0e66491b69352bb86694d80ef31a9558e05b Mon Sep 17 00:00:00 2001 From: Jan Friesse <[email protected]> Date: Wed, 15 Jun 2011 15:54:23 +0200 Subject: [PATCH] confdb: Resolve dispatch deadlock Following situation could happen: - one thread is waiting for finish write operation (line 853), objdb is locked - flush (done in objdb_notify_dispatch) is called in main thread, but this call will never appear because main thread is waiting for objdb lock. In this situation deadlock appears. Commit solves this by: - setting pipe to non-blocking mode - pipe is used only as trigger for coropoll - dispatch messages are stored in list - main thread is processing messages from list Signed-off-by: Jan Friesse <[email protected]> --- services/confdb.c | 148 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 124 insertions(+), 24 deletions(-) diff --git a/services/confdb.c b/services/confdb.c index 64def8a..0083ecc 100644 --- a/services/confdb.c +++ b/services/confdb.c @@ -71,14 +71,22 @@ static int notify_pipe[2]; struct confdb_ipc_message_holder { void *conn; - void *msg; size_t mlen; + struct list_head list; + char msg[]; }; +DECLARE_LIST_INIT(confdb_ipc_message_holder_list_head); + +pthread_mutex_t confdb_ipc_message_holder_list_mutex = + PTHREAD_MUTEX_INITIALIZER; + static int confdb_exec_init_fn ( struct corosync_api_v1 *corosync_api); static int confdb_exec_exit_fn(void); +static int fd_set_nonblocking(int fd); + static int objdb_notify_dispatch(hdb_handle_t handle, int fd, int revents, void *data); @@ -322,6 +330,8 @@ static int confdb_exec_exit_fn(void) static int confdb_exec_init_fn ( struct corosync_api_v1 *corosync_api) { + int i; + #ifdef COROSYNC_SOLARIS logsys_subsys_init(); #endif @@ -331,6 +341,12 @@ static int confdb_exec_init_fn ( return -1; } + for (i = 0; i < 2; i++) { + if (fd_set_nonblocking (notify_pipe[i]) == -1) { + return -1; + } + } + return poll_dispatch_add(api->poll_handle_get(), notify_pipe[0], POLLIN, NULL, objdb_notify_dispatch); } @@ -353,6 +369,23 @@ static int confdb_lib_exit_fn (void *conn) return (0); } +static int fd_set_nonblocking(int fd) +{ + int flags; + int res; + + flags = fcntl (fd, F_GETFL); + if (flags == -1) { + return -1; + } + + flags |= O_NONBLOCK; + + res = fcntl (fd, F_SETFL, flags); + + return res; +} + static void message_handler_req_lib_confdb_object_create (void *conn, const void *message) { @@ -827,52 +860,119 @@ static void message_handler_req_lib_confdb_reload (void *conn, static int objdb_notify_dispatch(hdb_handle_t handle, int fd, int revents, void *data) { - struct confdb_ipc_message_holder holder; + struct confdb_ipc_message_holder *holder; ssize_t rc; + char pipe_cmd; if (revents & POLLHUP) { return -1; } + + pthread_mutex_lock (&confdb_ipc_message_holder_list_mutex); + retry_read: - rc = read(fd, &holder, sizeof(struct confdb_ipc_message_holder)); - if (rc == -1 && errno == EINTR) { - goto retry_read; + rc = read(fd, &pipe_cmd, sizeof(pipe_cmd)); + if (rc == sizeof(pipe_cmd)) { + goto retry_read; /* Flush whole buffer */ } - if (rc != sizeof(struct confdb_ipc_message_holder)) { - return 0; + + if (rc == -1) { + if (errno == EINTR) { + goto retry_read; + } + + if (errno != EAGAIN && errno != EWOULDBLOCK) { + goto unlock_exit; + } + } else { + goto unlock_exit; /* rc != -1 && rc != 1 -> end of file */ } - api->ipc_dispatch_send(holder.conn, holder.msg, holder.mlen); + while (!list_empty (&confdb_ipc_message_holder_list_head)) { + holder = list_entry (confdb_ipc_message_holder_list_head.next, + struct confdb_ipc_message_holder, list); + + list_del (&holder->list); - api->ipc_refcnt_dec(holder.conn); + /* + * All list operations are done now, so unlock list mutex to + * prevent deadlock in IPC. + */ + pthread_mutex_unlock (&confdb_ipc_message_holder_list_mutex); + + api->ipc_dispatch_send(holder->conn, holder->msg, holder->mlen); + + api->ipc_refcnt_dec(holder->conn); + + free(holder); + + /* + * Next operation is again list one, so lock list again. + */ + pthread_mutex_lock (&confdb_ipc_message_holder_list_mutex); + } + +unlock_exit: + pthread_mutex_unlock (&confdb_ipc_message_holder_list_mutex); - free(holder.msg); return 0; } static int32_t ipc_dispatch_send_from_poll_thread(void *conn, const void *msg, size_t mlen) { - struct confdb_ipc_message_holder holder; + struct confdb_ipc_message_holder *holder; ssize_t written; + size_t holder_size; + char pipe_cmd; api->ipc_refcnt_inc(conn); - holder.conn = conn; - holder.msg = malloc(mlen); - memcpy(holder.msg, msg, mlen); - holder.mlen = mlen; - -retry_write: - written = write(notify_pipe[1], &holder, sizeof(struct confdb_ipc_message_holder)); - if (written == -1 && errno == EINTR) { - goto retry_write; - } - if (written == sizeof(struct confdb_ipc_message_holder)) { - return 0; - } else { + holder_size = sizeof (*holder) + mlen; + holder = malloc (holder_size); + if (holder == NULL) { api->ipc_refcnt_dec(conn); return -1; } + + memset(holder, 0, holder_size); + holder->conn = conn; + holder->mlen = mlen; + memcpy(holder->msg, msg, mlen); + list_init(&holder->list); + + pthread_mutex_lock (&confdb_ipc_message_holder_list_mutex); + + list_add_tail (&holder->list, &confdb_ipc_message_holder_list_head); + + pipe_cmd = 'M'; /* Message */ +retry_write: + written = write(notify_pipe[1], &pipe_cmd, sizeof(pipe_cmd)); + + if (written == -1) { + if (errno == EINTR) { + goto retry_write; + } + + if (errno != EAGAIN && errno != EWOULDBLOCK) { + /* + * Different error then EINTR or BLOCK -> exit with error + */ + goto refcnt_del_unlock_exit; + } + } else if (written != sizeof (pipe_cmd)) { + goto refcnt_del_unlock_exit; + } + pthread_mutex_unlock (&confdb_ipc_message_holder_list_mutex); + + return 0; + +refcnt_del_unlock_exit: + list_del (&holder->list); + free(holder); + api->ipc_refcnt_dec(conn); + pthread_mutex_unlock (&confdb_ipc_message_holder_list_mutex); + + return -1; } static void confdb_notify_lib_of_key_change(object_change_type_t change_type, -- 1.7.1
_______________________________________________ Openais mailing list [email protected] https://lists.linux-foundation.org/mailman/listinfo/openais
