On 08/08/2010 10:40 PM, Angus Salkeld wrote: > On Sun, Aug 08, 2010 at 10:17:43PM -0700, Steven Dake wrote: >> The model is when server has too many sockets in use, library >> returns TRY_AGAIN? > Hi Steve > > No, if the server runs out of fds then we shutdown the listening > socket. The library returns LIB error, I believe. > > Then when we have more fds we setup the listening socket again. > > Is there any point of try again in this situation? I would say this > is not a "normal" error and probalby shows a machine setup incorrectly. > If processes start and quitely keep trying again it might not help anyone. > > -Angus >
The check is done at ipc connection right? In that case, my guess is it would appear the server is not operating and return "TRY_AGAIN". In this case, the library should return CS_ERR_NO_RESOURCES, but I don't think that is possible. I suppose for future versions of corosync we need to think a little more clearly on the possible error conditions that every api can return: 1. server out of resources 2. client out of resources 3. server too busy 4. server rejected security 5. server not operational >> >> Regards >> -steve >> >> On 08/08/2010 08:01 PM, Angus Salkeld wrote: >>> Whenever we accept a new connection or close an >>> existing one, check the number of available file >>> descriptors and either publish or withdraw the >>> IPC listening socket. >>> >>> Signed-off-by: Angus Salkeld<[email protected]> >>> --- >>> exec/ipc.c | 92 >>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- >>> 1 files changed, 86 insertions(+), 6 deletions(-) >>> >>> diff --git a/exec/ipc.c b/exec/ipc.c >>> index 5337d25..bf3102c 100644 >>> --- a/exec/ipc.c >>> +++ b/exec/ipc.c >>> @@ -99,6 +99,7 @@ >>> #define MSG_SEND_UNLOCKED 1 >>> >>> static unsigned int g_gid_valid = 0; >>> +static int32_t libais_server_fd = -1; >>> >>> static void (*ipc_serialize_lock_fn) (void); >>> >>> @@ -161,6 +162,15 @@ static int priv_change (struct conn_info *conn_info); >>> >>> static void ipc_disconnect (struct conn_info *conn_info); >>> >>> +static void server_socket_publish(void); >>> + >>> +static void server_socket_withdraw(void); >>> + >>> +static void server_socket_check(void); >>> + >>> +static int poll_handler_accept (poll_handle handle, int fd, >>> + int revent, void *data); >>> + >>> static int ipc_thread_active (void *conn) >>> { >>> struct conn_info *conn_info = (struct conn_info *)conn; >>> @@ -211,6 +221,7 @@ static inline int conn_info_destroy (struct conn_info >>> *conn_info) >>> conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) { >>> list_del (&conn_info->list); >>> close (conn_info->fd); >>> + server_socket_check(); >>> free (conn_info); >>> return (-1); >>> } >>> @@ -257,6 +268,7 @@ static inline int conn_info_destroy (struct conn_info >>> *conn_info) >>> free (conn_info->private_data); >>> } >>> close (conn_info->fd); >>> + server_socket_check(); >>> free (conn_info); >>> ipc_serialize_unlock_fn(); >>> return (-1); >>> @@ -773,7 +785,12 @@ retry_accept: >>> } >>> >>> if (new_fd == -1) { >>> - log_printf (LOG_LEVEL_ERROR, "ERROR: Could not accept Library >>> connection: %s\n", strerror (errno)); >>> + log_printf (LOG_LEVEL_ERROR, >>> + "ERROR: Could not accept Library connection: %s\n", >>> + strerror (errno)); >>> + if (errno == EMFILE || errno == ENFILE) { >>> + server_socket_withdraw(); >>> + } >>> return (0); /* This is an error, but -1 would indicate >>> disconnect from poll loop */ >>> } >>> >>> @@ -802,6 +819,7 @@ retry_accept: >>> if (res != 0) { >>> close (new_fd); >>> } >>> + server_socket_check(); >>> >>> return (0); >>> } >>> @@ -835,14 +853,23 @@ void openais_ipc_init ( >>> void (*serialize_lock_fn) (void), >>> void (*serialize_unlock_fn) (void)) >>> { >>> - int libais_server_fd; >>> - struct sockaddr_un un_addr; >>> - int res; >>> - >>> ipc_serialize_lock_fn = serialize_lock_fn; >>> >>> ipc_serialize_unlock_fn = serialize_unlock_fn; >>> >>> + server_socket_publish(); >>> + >>> + g_gid_valid = gid_valid; >>> +} >>> + >>> +static void server_socket_publish(void) >>> +{ >>> + int32_t res = 0; >>> + struct sockaddr_un un_addr; >>> + >>> + log_printf(LOG_LEVEL_WARNING, >>> + "Publishing socket for client connections.\n"); >>> + >>> /* >>> * Create socket for libais clients, name socket, listen for connections >>> */ >>> @@ -885,8 +912,61 @@ void openais_ipc_init ( >>> */ >>> poll_dispatch_add (aisexec_poll_handle, libais_server_fd, >>> POLLIN|POLLNVAL, 0, poll_handler_accept); >>> +} >>> >>> - g_gid_valid = gid_valid; >>> +static void server_socket_withdraw(void) >>> +{ >>> + log_printf(LOG_LEVEL_WARNING, >>> + "Withdrawing socket for client connections.\n"); >>> + >>> + poll_dispatch_delete(aisexec_poll_handle, libais_server_fd); >>> + shutdown(libais_server_fd, SHUT_RDWR); >>> + close(libais_server_fd); >>> + libais_server_fd = -1; >>> +} >>> + >>> +/* >>> + * The actual used sockets is 12 but allowing a larger number >>> + * for safety. >>> + */ >>> +#define COROIPC_NUM_RESERVED_SOCKETS 25 >>> + >>> +static int32_t num_avail_sockets(void) >>> +{ >>> + struct rlimit lim; >>> + int32_t open_socks = 0; >>> + int32_t res; >>> + struct list_head *list; >>> + >>> + if (getrlimit(RLIMIT_NOFILE,&lim) == -1) { >>> + char error_str[100]; >>> + strerror_r(errno, error_str, 100); >>> + log_printf(LOG_LEVEL_ERROR, >>> + "getrlimit: %s\n", error_str); >>> + return -1; >>> + } >>> + >>> + for (list = conn_info_list_head.next; list !=&conn_info_list_head; >>> + list = list->next) { >>> + open_socks++; >>> + } >>> + res = lim.rlim_cur - (open_socks + COROIPC_NUM_RESERVED_SOCKETS); >>> + log_printf(LOG_LEVEL_DEBUG, >>> + "(lim.rlim_cur:%lu - (open_socks:%d + reserved:%d) == %d\n", >>> + lim.rlim_cur, open_socks, COROIPC_NUM_RESERVED_SOCKETS, res); >>> + return res; >>> +} >>> + >>> +static void server_socket_check(void) >>> +{ >>> + int32_t num = num_avail_sockets(); >>> + >>> + if (libais_server_fd == -1&& num> 0) { >>> + server_socket_publish(); >>> + } >>> + else if (libais_server_fd != -1&& num<= 0) { >>> + server_socket_withdraw(); >>> + } >>> } >>> >>> void openais_ipc_exit (void) > _______________________________________________ > Openais mailing list > [email protected] > https://lists.linux-foundation.org/mailman/listinfo/openais _______________________________________________ Openais mailing list [email protected] https://lists.linux-foundation.org/mailman/listinfo/openais
