On 08/08/2010 10:40 PM, Angus Salkeld wrote:
> On Sun, Aug 08, 2010 at 10:17:43PM -0700, Steven Dake wrote:
>> The model is when server has too many sockets in use, library
>> returns TRY_AGAIN?
> Hi Steve
>
> No, if the server runs out of fds then we shutdown the listening
> socket. The library returns LIB error, I believe.
>
> Then when we have more fds we setup the listening socket again.
>
> Is there any point of try again in this situation? I would say this
> is not a "normal" error and probalby shows a machine setup incorrectly.
> If processes start and quitely keep trying again it might not help anyone.
>
> -Angus
>

The check is done at ipc connection right?  In that case, my guess is it 
would appear the server is not operating and return "TRY_AGAIN".

In this case, the library should return CS_ERR_NO_RESOURCES, but I don't 
think that is possible.

I suppose for future versions of corosync we need to think a little more 
clearly on the possible error conditions that every api can return:

1. server out of resources
2. client out of resources
3. server too busy
4. server rejected security
5. server not operational


>>
>> Regards
>> -steve
>>
>> On 08/08/2010 08:01 PM, Angus Salkeld wrote:
>>>   Whenever we accept a new connection or close an
>>>   existing one, check the number of available file
>>>   descriptors and either publish or withdraw the
>>>   IPC listening socket.
>>>
>>> Signed-off-by: Angus Salkeld<[email protected]>
>>> ---
>>>   exec/ipc.c |   92 
>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>>>   1 files changed, 86 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/exec/ipc.c b/exec/ipc.c
>>> index 5337d25..bf3102c 100644
>>> --- a/exec/ipc.c
>>> +++ b/exec/ipc.c
>>> @@ -99,6 +99,7 @@
>>>   #define MSG_SEND_UNLOCKED 1
>>>
>>>   static unsigned int g_gid_valid = 0;
>>> +static int32_t libais_server_fd = -1;
>>>
>>>   static void (*ipc_serialize_lock_fn) (void);
>>>
>>> @@ -161,6 +162,15 @@ static int priv_change (struct conn_info *conn_info);
>>>
>>>   static void ipc_disconnect (struct conn_info *conn_info);
>>>
>>> +static void server_socket_publish(void);
>>> +
>>> +static void server_socket_withdraw(void);
>>> +
>>> +static void server_socket_check(void);
>>> +
>>> +static int poll_handler_accept (poll_handle handle, int fd,
>>> +   int revent, void *data);
>>> +
>>>   static int ipc_thread_active (void *conn)
>>>   {
>>>     struct conn_info *conn_info = (struct conn_info *)conn;
>>> @@ -211,6 +221,7 @@ static inline int conn_info_destroy (struct conn_info 
>>> *conn_info)
>>>             conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) {
>>>             list_del (&conn_info->list);
>>>             close (conn_info->fd);
>>> +           server_socket_check();
>>>             free (conn_info);
>>>             return (-1);
>>>     }
>>> @@ -257,6 +268,7 @@ static inline int conn_info_destroy (struct conn_info 
>>> *conn_info)
>>>             free (conn_info->private_data);
>>>     }
>>>     close (conn_info->fd);
>>> +   server_socket_check();
>>>     free (conn_info);
>>>     ipc_serialize_unlock_fn();
>>>     return (-1);
>>> @@ -773,7 +785,12 @@ retry_accept:
>>>     }
>>>
>>>     if (new_fd == -1) {
>>> -           log_printf (LOG_LEVEL_ERROR, "ERROR: Could not accept Library 
>>> connection: %s\n", strerror (errno));
>>> +           log_printf (LOG_LEVEL_ERROR,
>>> +                   "ERROR: Could not accept Library connection: %s\n",
>>> +                   strerror (errno));
>>> +           if (errno == EMFILE || errno == ENFILE) {
>>> +                   server_socket_withdraw();
>>> +           }
>>>             return (0); /* This is an error, but -1 would indicate 
>>> disconnect from poll loop */
>>>     }
>>>
>>> @@ -802,6 +819,7 @@ retry_accept:
>>>     if (res != 0) {
>>>             close (new_fd);
>>>     }
>>> +   server_socket_check();
>>>
>>>     return (0);
>>>   }
>>> @@ -835,14 +853,23 @@ void openais_ipc_init (
>>>     void (*serialize_lock_fn) (void),
>>>     void (*serialize_unlock_fn) (void))
>>>   {
>>> -   int libais_server_fd;
>>> -   struct sockaddr_un un_addr;
>>> -   int res;
>>> -
>>>     ipc_serialize_lock_fn = serialize_lock_fn;
>>>
>>>     ipc_serialize_unlock_fn = serialize_unlock_fn;
>>>
>>> +   server_socket_publish();
>>> +
>>> +   g_gid_valid = gid_valid;
>>> +}
>>> +
>>> +static void server_socket_publish(void)
>>> +{
>>> +   int32_t res = 0;
>>> +   struct sockaddr_un un_addr;
>>> +
>>> +   log_printf(LOG_LEVEL_WARNING,
>>> +           "Publishing socket for client connections.\n");
>>> +
>>>     /*
>>>      * Create socket for libais clients, name socket, listen for connections
>>>      */
>>> @@ -885,8 +912,61 @@ void openais_ipc_init (
>>>            */
>>>           poll_dispatch_add (aisexec_poll_handle, libais_server_fd,
>>>                   POLLIN|POLLNVAL, 0, poll_handler_accept);
>>> +}
>>>
>>> -   g_gid_valid = gid_valid;
>>> +static void server_socket_withdraw(void)
>>> +{
>>> +   log_printf(LOG_LEVEL_WARNING,
>>> +           "Withdrawing socket for client connections.\n");
>>> +
>>> +   poll_dispatch_delete(aisexec_poll_handle, libais_server_fd);
>>> +   shutdown(libais_server_fd, SHUT_RDWR);
>>> +   close(libais_server_fd);
>>> +   libais_server_fd = -1;
>>> +}
>>> +
>>> +/*
>>> + * The actual used sockets is 12 but allowing a larger number
>>> + * for safety.
>>> + */
>>> +#define COROIPC_NUM_RESERVED_SOCKETS 25
>>> +
>>> +static int32_t num_avail_sockets(void)
>>> +{
>>> +   struct rlimit lim;
>>> +   int32_t open_socks = 0;
>>> +   int32_t res;
>>> +   struct list_head *list;
>>> +
>>> +   if (getrlimit(RLIMIT_NOFILE,&lim) == -1) {
>>> +           char error_str[100];
>>> +           strerror_r(errno, error_str, 100);
>>> +           log_printf(LOG_LEVEL_ERROR,
>>> +                   "getrlimit: %s\n", error_str);
>>> +           return -1;
>>> +   }
>>> +
>>> +   for (list = conn_info_list_head.next; list !=&conn_info_list_head;
>>> +           list = list->next) {
>>> +           open_socks++;
>>> +   }
>>> +   res = lim.rlim_cur - (open_socks + COROIPC_NUM_RESERVED_SOCKETS);
>>> +   log_printf(LOG_LEVEL_DEBUG,
>>> +           "(lim.rlim_cur:%lu - (open_socks:%d + reserved:%d) == %d\n",
>>> +           lim.rlim_cur, open_socks, COROIPC_NUM_RESERVED_SOCKETS, res);
>>> +   return res;
>>> +}
>>> +
>>> +static void server_socket_check(void)
>>> +{
>>> +   int32_t num = num_avail_sockets();
>>> +
>>> +   if (libais_server_fd == -1&&   num>   0) {
>>> +           server_socket_publish();
>>> +   }
>>> +   else if (libais_server_fd != -1&&   num<= 0) {
>>> +           server_socket_withdraw();
>>> +   }
>>>   }
>>>
>>>   void openais_ipc_exit (void)
> _______________________________________________
> Openais mailing list
> [email protected]
> https://lists.linux-foundation.org/mailman/listinfo/openais

_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to