On (23/02/16 14:28), Lukas Slebodnik wrote:
>On (23/02/16 13:37), Jakub Hrozek wrote:
>>On Thu, Feb 18, 2016 at 02:04:54PM +0100, Lukas Slebodnik wrote:
>>> ehlo,
>>> 
>>> It took me some time to reproduce issue with cron.
>>> It occured very rarely in my case (twice in a week).
>>> 
>>> Therefore I prepared different reproducer "mini_cron.c"
>>> attached in mail. It tries to query for data in the interval
>>> (9.990 sec .. 10.069 sec) when responder might destroy connection.
>>> mini_cron expect there is a user mof_user1 in LDAP and client_idle_timeout
>>> has minimal value 10 seconds. Default is 60 seconds. It's also good to
>>> decrease memory cache timeout to ensure connection to responder every time.
>>> 
>>> e.g.
>>> [sssd]
>>> config_file_version = 2
>>> services = nss, pam
>>> domains = LDAP
>>> client_idle_timeout = 10
>>> 
>>> [nss]
>>> filter_groups = root
>>> filter_users = root
>>> memcache_timeout = 0
>>> client_idle_timeout = 10
>>> debug_level =9
>>> debug_microseconds = true
>>> 
>>> Detailed explanation is in commit message.
>>> 
>>> Attached is also a debug patch which I used as part of
>>> analysis when it can fail.
>>
>>Thank you.
>>
>>> 
>>> BTW I ran mini_cron reprodurer for a week and it didn't fail.
>>> 
>>> LS
>>
>>The code looks good to me and sanity testing of the clients passed as
>>well -> ACK
>>
>>CI is still running.
>I realized that I didn't fix it in pam client.
>
Updated version is attached.

LS
>From e96b5435f8a74ab5287d970e3263b0238402ac96 Mon Sep 17 00:00:00 2001
From: Lukas Slebodnik <[email protected]>
Date: Fri, 26 Feb 2016 16:06:50 +0100
Subject: [PATCH 1/2] CLIENT: Reduce code duplication

Patch for #2626 will be simpler with this small refactoring
---
 src/sss_client/common.c | 56 ++++++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/src/sss_client/common.c b/src/sss_client/common.c
index 
0d9106a04c30054e6acc9c0c420ccdd43590709a..c1de25654df9f59462909c981741a2d02ade8383
 100644
--- a/src/sss_client/common.c
+++ b/src/sss_client/common.c
@@ -829,6 +829,7 @@ int sss_pam_make_request(enum sss_cli_command cmd,
     enum sss_status status;
     char *envval;
     struct stat stat_buf;
+    const char *socket_name;
 
     sss_pam_lock();
 
@@ -841,7 +842,8 @@ int sss_pam_make_request(enum sss_cli_command cmd,
 
     /* only root shall use the privileged pipe */
     if (getuid() == 0 && getgid() == 0) {
-        statret = stat(SSS_PAM_PRIV_SOCKET_NAME, &stat_buf);
+        socket_name = SSS_PAM_PRIV_SOCKET_NAME;
+        statret = stat(socket_name, &stat_buf);
         if (statret != 0) {
             ret = PAM_SERVICE_ERR;
             goto out;
@@ -854,10 +856,9 @@ int sss_pam_make_request(enum sss_cli_command cmd,
             ret = PAM_SERVICE_ERR;
             goto out;
         }
-
-        status = sss_cli_check_socket(errnop, SSS_PAM_PRIV_SOCKET_NAME);
     } else {
-        statret = stat(SSS_PAM_SOCKET_NAME, &stat_buf);
+        socket_name = SSS_PAM_SOCKET_NAME;
+        statret = stat(socket_name, &stat_buf);
         if (statret != 0) {
             ret = PAM_SERVICE_ERR;
             goto out;
@@ -870,9 +871,9 @@ int sss_pam_make_request(enum sss_cli_command cmd,
             ret = PAM_SERVICE_ERR;
             goto out;
         }
-
-        status = sss_cli_check_socket(errnop, SSS_PAM_SOCKET_NAME);
     }
+
+    status = sss_cli_check_socket(errnop, socket_name);
     if (status != SSS_STATUS_SUCCESS) {
         ret = PAM_SERVICE_ERR;
         goto out;
@@ -910,6 +911,25 @@ void sss_pam_close_fd(void)
     sss_pam_unlock();
 }
 
+static enum sss_status
+sss_cli_make_request_with_checks(enum sss_cli_command cmd,
+                                 struct sss_cli_req_data *rd,
+                                 uint8_t **repbuf, size_t *replen,
+                                 int *errnop,
+                                 const char *socket_name)
+{
+    enum sss_status ret = SSS_STATUS_UNAVAIL;
+
+    ret = sss_cli_check_socket(errnop, socket_name);
+    if (ret != SSS_STATUS_SUCCESS) {
+        return SSS_STATUS_UNAVAIL;
+    }
+
+    ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+
+    return ret;
+}
+
 int sss_sudo_make_request(enum sss_cli_command cmd,
                           struct sss_cli_req_data *rd,
                           uint8_t **repbuf, size_t *replen,
@@ -917,12 +937,8 @@ int sss_sudo_make_request(enum sss_cli_command cmd,
 {
     enum sss_status ret = SSS_STATUS_UNAVAIL;
 
-    ret = sss_cli_check_socket(errnop, SSS_SUDO_SOCKET_NAME);
-    if (ret != SSS_STATUS_SUCCESS) {
-        return SSS_STATUS_UNAVAIL;
-    }
-
-    ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    ret = sss_cli_make_request_with_checks(cmd, rd, repbuf, replen, errnop,
+                                           SSS_SUDO_SOCKET_NAME);
 
     return ret;
 }
@@ -934,12 +950,8 @@ int sss_autofs_make_request(enum sss_cli_command cmd,
 {
     enum sss_status ret = SSS_STATUS_UNAVAIL;
 
-    ret = sss_cli_check_socket(errnop, SSS_AUTOFS_SOCKET_NAME);
-    if (ret != SSS_STATUS_SUCCESS) {
-        return SSS_STATUS_UNAVAIL;
-    }
-
-    ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    ret = sss_cli_make_request_with_checks(cmd, rd, repbuf, replen, errnop,
+                                           SSS_AUTOFS_SOCKET_NAME);
 
     return ret;
 }
@@ -951,12 +963,8 @@ int sss_ssh_make_request(enum sss_cli_command cmd,
 {
     enum sss_status ret = SSS_STATUS_UNAVAIL;
 
-    ret = sss_cli_check_socket(errnop, SSS_SSH_SOCKET_NAME);
-    if (ret != SSS_STATUS_SUCCESS) {
-        return SSS_STATUS_UNAVAIL;
-    }
-
-    ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    ret = sss_cli_make_request_with_checks(cmd, rd, repbuf, replen, errnop,
+                                           SSS_SSH_SOCKET_NAME);
 
     return ret;
 }
-- 
2.5.0

>From 1469e21942f6791e744abfec3193c945f96f9af1 Mon Sep 17 00:00:00 2001
From: Lukas Slebodnik <[email protected]>
Date: Wed, 17 Feb 2016 15:21:55 +0100
Subject: [PATCH 2/2] CLIENT: Retry request after EPIPE

We have a function sss_cli_check_socket which checks
socket in client code. The socket is reopened in case of some
issues e.g. responder terminated connections ...

We use syscall poll for checking status of socket.
It's not 100% reliable method because there is still
chance that responder will terminate socket after this check.

Here is a schema of sss_*_make_request functions:
    sss_cli_check_socket
    sss_cli_make_request_nochecks {
       sss_cli_send_req {
           poll
           send
       }
       sss_cli_recv_rep {
           poll
           read
       }
    }

The syscall pool does not return EPIPE directly but we convert
special revents from poll to EPIPE. As it was mentioned earlier,
checking of socket in the sss_cli_check_socket is not 100% reliable.
It can happen very rarely due to TOCTOU issue (Time of check to time of use)

We can return EPIPE from the sss_cli_make_request_nochecks function
in case of failure in poll in sss_cli_send_req. The send function
in sss_cli_send_req can also return EPIPE is responder close socket
in the same time. The send function can succeed in sss_cli_send_req
but it does not mean that responder read the message. It can happen
that timer for closing socket can be handled before reading a message.
Therefore there is a still a chance that we might return EPIPE in case
of failure in poll in sss_cli_recv_rep.

Therefore we need to reconnect to responder(sss_cli_check_socket)
in case of EPIPE returned from sss_cli_make_request_nochecks and
try to do the same request one more time.

Resolves:
https://fedorahosted.org/sssd/ticket/2626
---
 src/sss_client/common.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/src/sss_client/common.c b/src/sss_client/common.c
index 
c1de25654df9f59462909c981741a2d02ade8383..4e3883e80ada21a430262753e43bb831f65e7c22
 100644
--- a/src/sss_client/common.c
+++ b/src/sss_client/common.c
@@ -734,6 +734,22 @@ enum nss_status sss_nss_make_request(enum sss_cli_command 
cmd,
     }
 
     ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    if (ret == SSS_STATUS_UNAVAIL && *errnop == EPIPE) {
+        /* try reopen socket */
+        ret = sss_cli_check_socket(errnop, SSS_NSS_SOCKET_NAME);
+        if (ret != SSS_STATUS_SUCCESS) {
+#ifdef NONSTANDARD_SSS_NSS_BEHAVIOUR
+            *errnop = 0;
+            errno = 0;
+            return NSS_STATUS_NOTFOUND;
+#else
+            return NSS_STATUS_UNAVAIL;
+#endif
+        }
+
+        /* and make request one more time */
+        ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    }
     switch (ret) {
     case SSS_STATUS_TRYAGAIN:
         return NSS_STATUS_TRYAGAIN;
@@ -784,6 +800,16 @@ int sss_pac_make_request(enum sss_cli_command cmd,
     }
 
     ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    if (ret == SSS_STATUS_UNAVAIL && *errnop == EPIPE) {
+        /* try reopen socket */
+        ret = sss_cli_check_socket(errnop, SSS_PAC_SOCKET_NAME);
+        if (ret != SSS_STATUS_SUCCESS) {
+            return NSS_STATUS_UNAVAIL;
+        }
+
+        /* and make request one more time */
+        ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    }
     switch (ret) {
     case SSS_STATUS_TRYAGAIN:
         return NSS_STATUS_TRYAGAIN;
@@ -888,6 +914,18 @@ int sss_pam_make_request(enum sss_cli_command cmd,
     }
 
     status = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    if (status == SSS_STATUS_UNAVAIL && *errnop == EPIPE) {
+        /* try reopen socket */
+        status = sss_cli_check_socket(errnop, socket_name);
+        if (status != SSS_STATUS_SUCCESS) {
+            ret = PAM_SERVICE_ERR;
+            goto out;
+        }
+
+        /* and make request one more time */
+        status = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, 
errnop);
+    }
+
     if (status == SSS_STATUS_SUCCESS) {
         ret = PAM_SUCCESS;
     } else {
@@ -926,6 +964,16 @@ sss_cli_make_request_with_checks(enum sss_cli_command cmd,
     }
 
     ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    if (ret == SSS_STATUS_UNAVAIL && *errnop == EPIPE) {
+        /* try reopen socket */
+        ret = sss_cli_check_socket(errnop, socket_name);
+        if (ret != SSS_STATUS_SUCCESS) {
+            return SSS_STATUS_UNAVAIL;
+        }
+
+        /* and make request one more time */
+        ret = sss_cli_make_request_nochecks(cmd, rd, repbuf, replen, errnop);
+    }
 
     return ret;
 }
-- 
2.5.0

_______________________________________________
sssd-devel mailing list
[email protected]
https://lists.fedorahosted.org/admin/lists/[email protected]

Reply via email to