This is an automated email from the ASF dual-hosted git repository.

jpeach pushed a commit to branch master
in repository https://git-dual.apache.org/repos/asf/trafficserver.git

The following commit(s) were added to refs/heads/master by this push:
       new  280eaa5   TS-4925: Manager pollMgmtProcessServer stuck with EBADF.
280eaa5 is described below

commit 280eaa59cc36c74c3f7d7a621e1244fe2a7fc287
Author: James Peach <jpe...@apache.org>
AuthorDate: Mon Oct 3 21:43:57 2016 -0700

    TS-4925: Manager pollMgmtProcessServer stuck with EBADF.
    
    If the select in pollMgmtProcessServer fails with an actual error
    (EBADF, for example), it just returned back into the select loop
    and was unable to recover. This might happen if the server file
    descriptor is -1, or otherwise stale.
    
    The fix is to not put -1 into the select set, and to break the loop
    if select returns an error.
---
 mgmt/LocalManager.cc    | 42 +++++++++++++++++++++++++++++++-----------
 mgmt/utils/MgmtSocket.h |  6 ------
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/mgmt/LocalManager.cc b/mgmt/LocalManager.cc
index 67cad23..f90b342 100644
--- a/mgmt/LocalManager.cc
+++ b/mgmt/LocalManager.cc
@@ -384,16 +384,21 @@ LocalManager::pollMgmtProcessServer()
   int num;
   struct timeval timeout;
   fd_set fdlist;
+
+  while (1) {
 #if TS_HAS_WCCP
-  int wccp_fd = wccp_cache.getSocket();
+    int wccp_fd = wccp_cache.getSocket();
 #endif
 
-  while (1) {
-    // poll only
     timeout.tv_sec  = process_server_timeout_secs;
     timeout.tv_usec = process_server_timeout_msecs * 1000;
+
     FD_ZERO(&fdlist);
-    FD_SET(process_server_sockfd, &fdlist);
+
+    if (process_server_sockfd != ts::NO_FD) {
+      FD_SET(process_server_sockfd, &fdlist);
+    }
+
     if (watched_process_fd != ts::NO_FD) {
       FD_SET(watched_process_fd, &fdlist);
     }
@@ -406,20 +411,36 @@ LocalManager::pollMgmtProcessServer()
       time_t wccp_wait = wccp_cache.waitTime();
       if (wccp_wait < process_server_timeout_secs)
         timeout.tv_sec = wccp_wait;
-      FD_SET(wccp_cache.getSocket(), &fdlist);
+
+      if (wccp_fd != ts::NO_FD) {
+        FD_SET(wccp_fd, &fdlist);
+      }
     }
 #endif
 
     num = mgmt_select(FD_SETSIZE, &fdlist, NULL, NULL, &timeout);
-    if (num == 0) { /* Have nothing */
-      break;
-    } else if (num > 0) { /* Have something */
+
+    switch (num) {
+    case 0:
+      // Timed out, nothing to do.
+      return;
+    case -1:
+      if (mgmt_transient_error()) {
+        continue;
+      }
+
+      mgmt_log("[LocalManager::pollMgmtProcessServer] select failed: %s 
(%d)\n", ::strerror(errno), errno);
+      return;
+
+    default:
+
 #if TS_HAS_WCCP
       if (wccp_fd != ts::NO_FD && FD_ISSET(wccp_fd, &fdlist)) {
         wccp_cache.handleMessage();
         --num;
       }
 #endif
+
       if (FD_ISSET(process_server_sockfd, &fdlist)) { /* New connection */
         struct sockaddr_in clientAddr;
         socklen_t clientLen = sizeof(clientAddr);
@@ -456,6 +477,7 @@ LocalManager::pollMgmtProcessServer()
         } else if (res < 0) {
           mgmt_fatal(0, "[LocalManager::pollMgmtProcessServer] Error in read 
(errno: %d)\n", -res);
         }
+
         // handle EOF
         if (res == 0) {
           int estatus;
@@ -488,10 +510,8 @@ LocalManager::pollMgmtProcessServer()
 
         num--;
       }
-      ink_assert(num == 0); /* Invariant */
 
-    } else if (num < 0) { /* Error */
-      mgmt_log("[LocalManager::pollMgmtProcessServer] select failed or was 
interrupted (%d)\n", errno);
+      ink_assert(num == 0); /* Invariant */
     }
   }
 }
diff --git a/mgmt/utils/MgmtSocket.h b/mgmt/utils/MgmtSocket.h
index eaac8be..ab10027 100644
--- a/mgmt/utils/MgmtSocket.h
+++ b/mgmt/utils/MgmtSocket.h
@@ -27,12 +27,6 @@
 #include "ts/ink_platform.h"
 
 //-------------------------------------------------------------------------
-// defines
-//-------------------------------------------------------------------------
-
-#define MGMT_MAX_TRANSIENT_ERRORS 64
-
-//-------------------------------------------------------------------------
 // transient_error
 //-------------------------------------------------------------------------
 

-- 
To stop receiving notification emails like this one, please contact
['"commits@trafficserver.apache.org" <commits@trafficserver.apache.org>'].

Reply via email to