This is an automated email from the ASF dual-hosted git repository.

vmamidi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new b321f01  Add new sub command 'host' to traffic_ctl so that hosts used 
as parents in parent.config may be manually marked down and up so that they may 
be excluded or included for use as determined by the selected status, up or 
down.
b321f01 is described below

commit b321f011bb4b13b2e9011d60f62442050ae53565
Author: jrushford <[email protected]>
AuthorDate: Fri Mar 16 21:02:54 2018 +0000

    Add new sub command 'host' to traffic_ctl so that hosts used as
    parents in parent.config may be manually marked down and up so
    that they may be excluded or included for use as determined
    by the selected status, up or down.
---
 cmd/traffic_ctl/Makefile.am                    |   1 +
 cmd/traffic_ctl/host.cc                        |  98 ++++++++++++++++++++++++
 cmd/traffic_ctl/traffic_ctl.cc                 |   1 +
 cmd/traffic_ctl/traffic_ctl.h                  |   1 +
 doc/appendices/command-line/traffic_ctl.en.rst |  19 +++++
 mgmt/BaseManager.h                             |   2 +
 mgmt/LocalManager.cc                           |  14 ++++
 mgmt/LocalManager.h                            |   2 +
 mgmt/ProcessManager.cc                         |   6 ++
 mgmt/api/CoreAPI.cc                            |  24 ++++++
 mgmt/api/CoreAPI.h                             |   2 +
 mgmt/api/CoreAPIRemote.cc                      |  22 ++++++
 mgmt/api/EventControlMain.cc                   |   2 +
 mgmt/api/INKMgmtAPI.cc                         |  13 ++++
 mgmt/api/NetworkMessage.cc                     |   6 ++
 mgmt/api/NetworkMessage.h                      |   2 +
 mgmt/api/TSControlMain.cc                      |  45 +++++++++++
 mgmt/api/include/mgmtapi.h                     |   3 +-
 proxy/EventName.cc                             |   4 +
 proxy/HostStatus.cc                            | 101 +++++++++++++++++++++----
 proxy/HostStatus.h                             |  19 +++--
 proxy/Main.cc                                  |   2 +
 proxy/ParentConsistentHash.cc                  |  13 +++-
 proxy/ParentRoundRobin.cc                      |  27 ++++---
 proxy/ParentSelection.cc                       |  75 ++++++++++++++++--
 25 files changed, 464 insertions(+), 40 deletions(-)

diff --git a/cmd/traffic_ctl/Makefile.am b/cmd/traffic_ctl/Makefile.am
index 2767097..ddc51a5 100644
--- a/cmd/traffic_ctl/Makefile.am
+++ b/cmd/traffic_ctl/Makefile.am
@@ -34,6 +34,7 @@ traffic_ctl_SOURCES = \
   plugin.cc \
   server.cc \
   storage.cc \
+  host.cc \
   traffic_ctl.cc
 
 traffic_ctl_LDADD = \
diff --git a/cmd/traffic_ctl/host.cc b/cmd/traffic_ctl/host.cc
new file mode 100644
index 0000000..2272ce8
--- /dev/null
+++ b/cmd/traffic_ctl/host.cc
@@ -0,0 +1,98 @@
+/** @file
+
+  host.cc
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+#include "traffic_ctl.h"
+#include <P_RecUtils.h>
+const std::string stat_prefix = "host_status.";
+static int
+status_get(unsigned argc, const char **argv)
+{
+  if (!CtrlProcessArguments(argc, argv, nullptr, 0) || n_file_arguments < 1) {
+    return CtrlCommandUsage("host status HOST  [HOST  ...]", nullptr, 0);
+  }
+
+  for (unsigned i = 0; i < n_file_arguments; ++i) {
+    CtrlMgmtRecord record;
+    TSMgmtError error;
+    std::string str = stat_prefix + file_arguments[i];
+
+    error = record.fetch(str.c_str());
+    if (error != TS_ERR_OKAY) {
+      CtrlMgmtError(error, "failed to fetch %s", file_arguments[i]);
+      return CTRL_EX_ERROR;
+    }
+
+    if (REC_TYPE_IS_STAT(record.rclass())) {
+      printf("%s %s\n", record.name(), CtrlMgmtRecordValue(record).c_str());
+    }
+  }
+
+  return CTRL_EX_OK;
+}
+
+static int
+status_down(unsigned argc, const char **argv)
+{
+  if (!CtrlProcessArguments(argc, argv, nullptr, 0) || n_file_arguments < 1) {
+    return CtrlCommandUsage("host down HOST  value", nullptr, 0);
+  }
+  TSMgmtError error = TS_ERR_OKAY;
+  for (unsigned i = 0; i < n_file_arguments; ++i) {
+    error = TSHostStatusSetDown(file_arguments[i]);
+    if (error != TS_ERR_OKAY) {
+      CtrlMgmtError(error, "failed to set %s", file_arguments[i]);
+      return CTRL_EX_ERROR;
+    }
+  }
+
+  return CTRL_EX_OK;
+}
+static int
+status_up(unsigned argc, const char **argv)
+{
+  if (!CtrlProcessArguments(argc, argv, nullptr, 0) || n_file_arguments < 1) {
+    return CtrlCommandUsage("host up METRIC value", nullptr, 0);
+  }
+  TSMgmtError error;
+  for (unsigned i = 0; i < n_file_arguments; ++i) {
+    error = TSHostStatusSetUp(file_arguments[i]);
+    if (error != TS_ERR_OKAY) {
+      CtrlMgmtError(error, "failed to set %s", file_arguments[i]);
+      return CTRL_EX_ERROR;
+    }
+  }
+
+  return CTRL_EX_OK;
+}
+
+int
+subcommand_host(unsigned argc, const char **argv)
+{
+  const subcommand commands[] = {
+    {status_get, "status", "Get one or more host statuses"},
+    {status_down, "down", "Set down one or more host(s) "},
+    {status_up, "up", "Set up one or more host(s) "},
+
+  };
+
+  return CtrlGenericSubcommand("host", commands, countof(commands), argc, 
argv);
+}
diff --git a/cmd/traffic_ctl/traffic_ctl.cc b/cmd/traffic_ctl/traffic_ctl.cc
index f8c4588..af75351 100644
--- a/cmd/traffic_ctl/traffic_ctl.cc
+++ b/cmd/traffic_ctl/traffic_ctl.cc
@@ -218,6 +218,7 @@ static const subcommand commands[] = {
   {subcommand_server, "server", "Stop, restart and examine the server"},
   {subcommand_storage, "storage", "Manipulate cache storage"},
   {subcommand_plugin, "plugin", "Interact with plugins"},
+  {subcommand_host, "host", "Interact with host status"},
 };
 
 int
diff --git a/cmd/traffic_ctl/traffic_ctl.h b/cmd/traffic_ctl/traffic_ctl.h
index 845df11..f2d19b4 100644
--- a/cmd/traffic_ctl/traffic_ctl.h
+++ b/cmd/traffic_ctl/traffic_ctl.h
@@ -204,6 +204,7 @@ int subcommand_metric(unsigned argc, const char **argv);
 int subcommand_server(unsigned argc, const char **argv);
 int subcommand_storage(unsigned argc, const char **argv);
 int subcommand_plugin(unsigned argc, const char **argv);
+int subcommand_host(unsigned argc, const char **argv);
 
 // Exit status codes, following BSD's sysexits(3)
 #define CTRL_EX_OK 0
diff --git a/doc/appendices/command-line/traffic_ctl.en.rst 
b/doc/appendices/command-line/traffic_ctl.en.rst
index 9edfbf2..991ba14 100644
--- a/doc/appendices/command-line/traffic_ctl.en.rst
+++ b/doc/appendices/command-line/traffic_ctl.en.rst
@@ -50,6 +50,8 @@ of subcommands that control different aspects of Traffic 
Server:
     Manipulate cache storage
 :program:`traffic_ctl plugin`
     Interact with plugins.
+:program:`traffic_ctl host`
+    Manipulate host status.  parents for now but will be expanded to origins.
 
 To use :program:`traffic_ctl`, :ref:`traffic_manager` needs to be running.
 
@@ -271,6 +273,23 @@ traffic_ctl plugin
     plugin hook processing. It is expected that plugins will use :arg:`TAG` to 
select relevant messages
     and determine the format of the :arg:`DATA`.
 
+traffic_ctl host
+----------------
+.. program:: traffic_ctl host
+.. option:: status HOSTNAME [HOSTNAME ...]
+
+    Get the current status of the hosts used in parent.config as a next hop in 
a multi-tiered cache heirarchy.  The value 0 or 1 is returned indicating that 
the host is marked as down '0' or marked as up '1'.  If a host is marked as 
down, it will not be used as the next hop parent, another host marked as up 
will be chosen.
+
+.. program:: traffic_ctl host
+.. option:: down HOSTNAME [HOSTNAME ...]
+
+    Marks the listed hosts as down so that they will not be chosen as a next 
hop parent.
+
+.. program:: traffic_ctl host
+.. option:: up HOSTNAME [HOSTNAME ...]
+
+    Marks the listed hosts as up so that they will be available for use as a 
next hop parent.
+
 Examples
 ========
 
diff --git a/mgmt/BaseManager.h b/mgmt/BaseManager.h
index c71be3d..12608ea 100644
--- a/mgmt/BaseManager.h
+++ b/mgmt/BaseManager.h
@@ -71,6 +71,8 @@
 #define MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE 10011
 #define MGMT_EVENT_LIFECYCLE_MESSAGE 10012
 #define MGMT_EVENT_DRAIN 10013
+#define MGMT_EVENT_HOST_STATUS_UP 10014
+#define MGMT_EVENT_HOST_STATUS_DOWN 10015
 
 /***********************************************************************
  *
diff --git a/mgmt/LocalManager.cc b/mgmt/LocalManager.cc
index c4a08c0..66ebad1 100644
--- a/mgmt/LocalManager.cc
+++ b/mgmt/LocalManager.cc
@@ -122,6 +122,20 @@ LocalManager::rollLogFiles()
 }
 
 void
+LocalManager::hostStatusSetDown(const char *name)
+{
+  signalEvent(MGMT_EVENT_HOST_STATUS_DOWN, name);
+  return;
+}
+
+void
+LocalManager::hostStatusSetUp(const char *name)
+{
+  signalEvent(MGMT_EVENT_HOST_STATUS_UP, name);
+  return;
+}
+
+void
 LocalManager::clearStats(const char *name)
 {
   // Clear our records and then send the signal.  There is a race condition
diff --git a/mgmt/LocalManager.h b/mgmt/LocalManager.h
index 082e134..bc33474 100644
--- a/mgmt/LocalManager.h
+++ b/mgmt/LocalManager.h
@@ -94,6 +94,8 @@ public:
   void processDrain(int to_drain = 1);
   void rollLogFiles();
   void clearStats(const char *name = NULL);
+  void hostStatusSetDown(const char *name);
+  void hostStatusSetUp(const char *name);
 
   bool processRunning();
 
diff --git a/mgmt/ProcessManager.cc b/mgmt/ProcessManager.cc
index 7fd5cc8..63766c4 100644
--- a/mgmt/ProcessManager.cc
+++ b/mgmt/ProcessManager.cc
@@ -432,6 +432,12 @@ ProcessManager::handleMgmtMsgFromLM(MgmtMessageHdr *mh)
   case MGMT_EVENT_CLEAR_STATS:
     executeMgmtCallback(MGMT_EVENT_CLEAR_STATS, nullptr, 0);
     break;
+  case MGMT_EVENT_HOST_STATUS_UP:
+    executeMgmtCallback(MGMT_EVENT_HOST_STATUS_UP, data_raw, mh->data_len);
+    break;
+  case MGMT_EVENT_HOST_STATUS_DOWN:
+    executeMgmtCallback(MGMT_EVENT_HOST_STATUS_DOWN, data_raw, mh->data_len);
+    break;
   case MGMT_EVENT_ROLL_LOG_FILES:
     executeMgmtCallback(MGMT_EVENT_ROLL_LOG_FILES, nullptr, 0);
     break;
diff --git a/mgmt/api/CoreAPI.cc b/mgmt/api/CoreAPI.cc
index 154636f..6cffcca 100644
--- a/mgmt/api/CoreAPI.cc
+++ b/mgmt/api/CoreAPI.cc
@@ -881,6 +881,30 @@ EventSignalCbUnregister(const char *event_name, 
TSEventSignalFunc func)
 }
 
 /*-------------------------------------------------------------------------
+ * HostStatusSetDown
+ *-------------------------------------------------------------------------
+ * Sets the HOST status to Down
+ */
+TSMgmtError
+HostStatusSetDown(const char *name)
+{
+  lmgmt->hostStatusSetDown(name);
+  return TS_ERR_OKAY;
+}
+
+/*-------------------------------------------------------------------------
+ * HostStatusSetUp
+ *-------------------------------------------------------------------------
+ * Sets the HOST status to Up
+ */
+TSMgmtError
+HostStatusSetUp(const char *name)
+{
+  lmgmt->hostStatusSetUp(name);
+  return TS_ERR_OKAY;
+}
+
+/*-------------------------------------------------------------------------
  * StatsReset
  *-------------------------------------------------------------------------
  * Iterates through the RecordsConfig table, and for all stats
diff --git a/mgmt/api/CoreAPI.h b/mgmt/api/CoreAPI.h
index 84639b5..cf8733e 100644
--- a/mgmt/api/CoreAPI.h
+++ b/mgmt/api/CoreAPI.h
@@ -83,6 +83,8 @@ TSMgmtError EventIsActive(const char *event_name, bool 
*is_current);
 TSMgmtError EventSignalCbRegister(const char *event_name, TSEventSignalFunc 
func, void *data);
 TSMgmtError EventSignalCbUnregister(const char *event_name, TSEventSignalFunc 
func);
 
+TSMgmtError HostStatusSetDown(const char *name);
+TSMgmtError HostStatusSetUp(const char *name);
 TSMgmtError StatsReset(const char *name = NULL);
 
 #endif
diff --git a/mgmt/api/CoreAPIRemote.cc b/mgmt/api/CoreAPIRemote.cc
index cdab53f..a6250f0 100644
--- a/mgmt/api/CoreAPIRemote.cc
+++ b/mgmt/api/CoreAPIRemote.cc
@@ -1023,6 +1023,28 @@ EventSignalCbUnregister(const char *event_name, 
TSEventSignalFunc func)
 }
 
 TSMgmtError
+HostStatusSetDown(const char *host_name)
+{
+  TSMgmtError ret         = TS_ERR_PARAMS;
+  OpType op               = OpType::HOST_STATUS_DOWN;
+  MgmtMarshallString name = const_cast<MgmtMarshallString>(host_name);
+
+  ret = MGMTAPI_SEND_MESSAGE(main_socket_fd, op, &op, &name);
+  return (ret == TS_ERR_OKAY) ? parse_generic_response(op, main_socket_fd) : 
ret;
+}
+
+TSMgmtError
+HostStatusSetUp(const char *host_name)
+{
+  TSMgmtError ret         = TS_ERR_PARAMS;
+  OpType op               = OpType::HOST_STATUS_UP;
+  MgmtMarshallString name = const_cast<MgmtMarshallString>(host_name);
+
+  ret = MGMTAPI_SEND_MESSAGE(main_socket_fd, op, &op, &name);
+  return (ret == TS_ERR_OKAY) ? parse_generic_response(op, main_socket_fd) : 
ret;
+}
+
+TSMgmtError
 StatsReset(const char *stat_name)
 {
   TSMgmtError ret;
diff --git a/mgmt/api/EventControlMain.cc b/mgmt/api/EventControlMain.cc
index ba76d10..eaa5f99 100644
--- a/mgmt/api/EventControlMain.cc
+++ b/mgmt/api/EventControlMain.cc
@@ -537,6 +537,8 @@ static const event_message_handler handlers[] = {
   nullptr,                     // STORAGE_DEVICE_CMD_OFFLINE
   nullptr,                     // RECORD_MATCH_GET
   nullptr,                     // LIFECYCLE_MESSAGE
+  nullptr,                     // HOST_STATUS_UP
+  nullptr,                     // HOST_STATUS_DOWN
 };
 
 static TSMgmtError
diff --git a/mgmt/api/INKMgmtAPI.cc b/mgmt/api/INKMgmtAPI.cc
index b0b8189..1800d98 100644
--- a/mgmt/api/INKMgmtAPI.cc
+++ b/mgmt/api/INKMgmtAPI.cc
@@ -414,6 +414,19 @@ TSRecordEleDestroy(TSRecordEle *ele)
  * API Core
  ***************************************************************************/
 
+/*--- host status operations ----------------------------------------------- */
+tsapi TSMgmtError
+TSHostStatusSetUp(const char *name)
+{
+  return HostStatusSetUp(name);
+}
+
+tsapi TSMgmtError
+TSHostStatusSetDown(const char *name)
+{
+  return HostStatusSetDown(name);
+}
+
 /*--- statistics operations ----------------------------------------------- */
 tsapi TSMgmtError
 TSStatsReset(const char *name)
diff --git a/mgmt/api/NetworkMessage.cc b/mgmt/api/NetworkMessage.cc
index 46261ff..6d2cb54 100644
--- a/mgmt/api/NetworkMessage.cc
+++ b/mgmt/api/NetworkMessage.cc
@@ -61,6 +61,8 @@ static const struct NetCmdOperation requests[] = {
   /* SERVER_BACKTRACE           */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}},
   /* RECORD_DESCRIBE_CONFIG     */ {3, {MGMT_MARSHALL_INT, 
MGMT_MARSHALL_STRING, MGMT_MARSHALL_INT}},
   /* LIFECYCLE_MESSAGE          */ {3, {MGMT_MARSHALL_INT, 
MGMT_MARSHALL_STRING, MGMT_MARSHALL_DATA}},
+  /* HOST_STATUS_HOST_UP        */ {2, {MGMT_MARSHALL_INT, 
MGMT_MARSHALL_STRING}},
+  /* HOST_STATUS_HOST_DOWN      */ {2, {MGMT_MARSHALL_INT, 
MGMT_MARSHALL_STRING}},
 };
 
 // Responses always begin with a TSMgmtError code, followed by additional 
fields.
@@ -95,6 +97,8 @@ static const struct NetCmdOperation responses[] = {
     MGMT_MARSHALL_INT /* updatetype */, MGMT_MARSHALL_INT /* checktype */, 
MGMT_MARSHALL_INT /* source */,
     MGMT_MARSHALL_STRING /* checkexpr */}},
   /* LIFECYCLE_MESSAGE          */ {1, {MGMT_MARSHALL_INT}},
+  /* HOST_STATUS_UP             */ {1, {MGMT_MARSHALL_INT}},
+  /* HOST_STATUS_DOWN           */ {1, {MGMT_MARSHALL_INT}},
 };
 
 #define GETCMD(ops, optype, cmd)                           \
@@ -202,6 +206,8 @@ send_mgmt_error(int fd, OpType optype, TSMgmtError error)
   case OpType::RECONFIGURE:
   case OpType::RESTART:
   case OpType::STATS_RESET_NODE:
+  case OpType::HOST_STATUS_UP:
+  case OpType::HOST_STATUS_DOWN:
   case OpType::STORAGE_DEVICE_CMD_OFFLINE:
     ink_release_assert(responses[static_cast<unsigned>(optype)].nfields == 1);
     return send_mgmt_response(fd, optype, &ecode);
diff --git a/mgmt/api/NetworkMessage.h b/mgmt/api/NetworkMessage.h
index d529661..e56cb29 100644
--- a/mgmt/api/NetworkMessage.h
+++ b/mgmt/api/NetworkMessage.h
@@ -55,6 +55,8 @@ enum class OpType : MgmtMarshallInt {
   SERVER_BACKTRACE,
   RECORD_DESCRIBE_CONFIG,
   LIFECYCLE_MESSAGE,
+  HOST_STATUS_UP,
+  HOST_STATUS_DOWN,
   UNDEFINED_OP /* This must be last */
 };
 
diff --git a/mgmt/api/TSControlMain.cc b/mgmt/api/TSControlMain.cc
index 58dddbf..28c9db0 100644
--- a/mgmt/api/TSControlMain.cc
+++ b/mgmt/api/TSControlMain.cc
@@ -799,6 +799,49 @@ handle_stats_reset(int fd, void *req, size_t reqlen)
 }
 
 /**************************************************************************
+ * handle_host_status_up
+ *
+ * purpose: handles request to reset statistics to default values
+ * output: TS_ERR_xx
+ *************************************************************************/
+static TSMgmtError
+handle_host_status_up(int fd, void *req, size_t reqlen)
+{
+  OpType optype;
+  MgmtMarshallString name = nullptr;
+  MgmtMarshallInt err;
+
+  err = recv_mgmt_request(req, reqlen, OpType::HOST_STATUS_UP, &optype, &name);
+  if (err == TS_ERR_OKAY) {
+    err = HostStatusSetUp(name);
+  }
+
+  ats_free(name);
+  return send_mgmt_response(fd, (OpType)optype, &err);
+}
+
+/**************************************************************************
+ * handle_host_status_down
+ *
+ * purpose: handles request to reset statistics to default values
+ * output: TS_ERR_xx
+ *************************************************************************/
+static TSMgmtError
+handle_host_status_down(int fd, void *req, size_t reqlen)
+{
+  OpType optype;
+  MgmtMarshallString name = nullptr;
+  MgmtMarshallInt err;
+
+  err = recv_mgmt_request(req, reqlen, OpType::HOST_STATUS_DOWN, &optype, 
&name);
+  if (err == TS_ERR_OKAY) {
+    err = HostStatusSetDown(name);
+  }
+
+  ats_free(name);
+  return send_mgmt_response(fd, (OpType)optype, &err);
+}
+/**************************************************************************
  * handle_api_ping
  *
  * purpose: handles the API_PING messaghat is sent by API clients to keep
@@ -1015,6 +1058,8 @@ static const control_message_handler handlers[] = {
   /* SERVER_BACKTRACE           */ {MGMT_API_PRIVILEGED, 
handle_server_backtrace},
   /* RECORD_DESCRIBE_CONFIG     */ {0, handle_record_describe},
   /* LIFECYCLE_MESSAGE          */ {MGMT_API_PRIVILEGED, 
handle_lifecycle_message},
+  /* HOST_STATUS_UP             */ {MGMT_API_PRIVILEGED, 
handle_host_status_up},
+  /* HOST_STATUS_DOWN           */ {MGMT_API_PRIVILEGED, 
handle_host_status_down},
 };
 
 // This should use countof(), but we need a constexpr :-/
diff --git a/mgmt/api/include/mgmtapi.h b/mgmt/api/include/mgmtapi.h
index c3567e9..34f3ad8 100644
--- a/mgmt/api/include/mgmtapi.h
+++ b/mgmt/api/include/mgmtapi.h
@@ -490,7 +490,8 @@ tsapi TSMgmtError TSReadFromUrl(char *url, char **header, 
int *headerSize, char
  * NOTE: header and headerSize can be NULL
  */
 tsapi TSMgmtError TSReadFromUrlEx(const char *url, char **header, int 
*headerSize, char **body, int *bodySize, int timeout);
-
+tsapi TSMgmtError TSHostStatusSetUp(const char *name);
+tsapi TSMgmtError TSHostStatusSetDown(const char *name);
 /*--- statistics operations -----------------------------------------------*/
 /* TSStatsReset: sets all the statistics variables to their default values
  * Outpue: TSErrr
diff --git a/proxy/EventName.cc b/proxy/EventName.cc
index 3d95417..cac03e9 100644
--- a/proxy/EventName.cc
+++ b/proxy/EventName.cc
@@ -130,6 +130,10 @@ event_int_to_string(int event, int blen, char *buffer)
     return "MGMT_EVENT_CONFIG_FILE_UPDATE_NO_INC_VERSION";
   case MGMT_EVENT_CLEAR_STATS:
     return "MGMT_EVENT_CLEAR_STATS";
+  case MGMT_EVENT_HOST_STATUS_UP:
+    return "MGMT_EVENT_HOST_STATUS_UP";
+  case MGMT_EVENT_HOST_STATUS_DOWN:
+    return "MGMT_EVENT_HOST_STATUS_DOWN";
 
   default:
     if (buffer != nullptr) {
diff --git a/proxy/HostStatus.cc b/proxy/HostStatus.cc
index 33bee19..0366feb 100644
--- a/proxy/HostStatus.cc
+++ b/proxy/HostStatus.cc
@@ -21,39 +21,114 @@
   limitations under the License.
  */
 #include "HostStatus.h"
+#include "ProcessManager.h"
 
-HostStatus::HostStatus() : 
hosts_statuses(ink_hash_table_create(InkHashTableKeyType_String))
+static RecRawStatBlock *host_status_rsb = nullptr;
+
+static void *
+mgmt_host_status_up_callback(void *x, char *data, int len)
 {
-  ink_mutex_init(&hosts_statuses_mutex);
+  if (data != nullptr) {
+    Debug("host_statuses", "marking up server %s", data);
+    HostStatus &hs = HostStatus::instance();
+    hs.setHostStatus(data, HostStatus_t::HOST_STATUS_UP);
+  }
+  return nullptr;
+}
+
+static void *
+mgmt_host_status_down_callback(void *x, char *data, int len)
+{
+  if (data != nullptr) {
+    Debug("host_statuses", "marking down server %s", data);
+    HostStatus &hs = HostStatus::instance();
+    hs.setHostStatus(data, HostStatus_t::HOST_STATUS_DOWN);
+  }
+  return nullptr;
+}
+
+HostStatus::HostStatus()
+{
+  hosts_statuses  = ink_hash_table_create(InkHashTableKeyType_String);
+  hosts_stats_ids = ink_hash_table_create(InkHashTableKeyType_String);
+  ink_rwlock_init(&host_status_rwlock);
+  ink_rwlock_init(&host_statids_rwlock);
+  Debug("host_statuses", "registering ostas");
+  pmgmt->registerMgmtCallback(MGMT_EVENT_HOST_STATUS_UP, 
mgmt_host_status_up_callback, nullptr);
+  pmgmt->registerMgmtCallback(MGMT_EVENT_HOST_STATUS_DOWN, 
mgmt_host_status_down_callback, nullptr);
+  host_status_rsb = RecAllocateRawStatBlock((int)TS_MAX_API_STATS);
 }
 
 HostStatus::~HostStatus()
 {
   ink_hash_table_destroy(hosts_statuses);
-
-  ink_mutex_destroy(&hosts_statuses_mutex);
+  ink_hash_table_destroy(hosts_stats_ids);
+  ink_rwlock_destroy(&host_status_rwlock);
+  ink_rwlock_destroy(&host_statids_rwlock);
 }
 
 void
-HostStatus::setHostStatus(const char *key, HostStatus_t status)
+HostStatus::setHostStatus(const char *name, HostStatus_t status)
 {
-  Debug("host_statuses", "HostStatus::setHostStatus():  key: %s, status: %d", 
key, status);
-  ink_mutex_acquire(&hosts_statuses_mutex);
+  int stat_id = getHostStatId(name);
+  if (stat_id != -1) {
+    if (status == HostStatus_t::HOST_STATUS_UP) {
+      Debug("host_statuses", "set stat for :  name: %s, status: %d", name, 
status);
+      RecSetRawStatCount(host_status_rsb, stat_id, 1);
+      RecSetRawStatSum(host_status_rsb, stat_id, 1);
+    } else {
+      RecSetRawStatCount(host_status_rsb, stat_id, 0);
+      RecSetRawStatSum(host_status_rsb, stat_id, 0);
+      Debug("host_statuses", "clear stat for :  name: %s, status: %d", name, 
status);
+    }
+  }
+  Debug("host_statuses", "name: %s, status: %d", name, status);
   // update / insert status.
   // using the hash table pointer to store the HostStatus_t value.
-  ink_hash_table_insert(hosts_statuses, key, reinterpret_cast<void *>(status));
-
-  ink_mutex_release(&hosts_statuses_mutex);
+  ink_rwlock_wrlock(&host_status_rwlock);
+  ink_hash_table_insert(hosts_statuses, name, reinterpret_cast<void 
*>(status));
+  ink_rwlock_unlock(&host_status_rwlock);
 }
 
 HostStatus_t
-HostStatus::getHostStatus(const char *key)
+HostStatus::getHostStatus(const char *name)
 {
   intptr_t _status = HostStatus_t::HOST_STATUS_INIT;
 
   // the hash table value pointer has the HostStatus_t value.
-  ink_hash_table_lookup(hosts_statuses, key, reinterpret_cast<void 
**>(&_status));
-  Debug("host_statuses", "HostStatus::getHostStatus():  key: %s, status: %d", 
key, static_cast<int>(_status));
+  ink_rwlock_rdlock(&host_status_rwlock);
+  ink_hash_table_lookup(hosts_statuses, name, reinterpret_cast<void 
**>(&_status));
+  ink_rwlock_unlock(&host_status_rwlock);
+  Debug("host_statuses", "name: %s, status: %d", name, 
static_cast<int>(_status));
 
   return static_cast<HostStatus_t>(_status);
 }
+
+void
+HostStatus::createHostStat(const char *name)
+{
+  InkHashTableEntry *entry;
+  entry = ink_hash_table_lookup_entry(hosts_stats_ids, name);
+  if (entry == nullptr) {
+    RecRegisterRawStat(host_status_rsb, RECT_PROCESS, (stat_prefix + 
name).c_str(), RECD_INT, RECP_NON_PERSISTENT,
+                       (int)next_stat_id, RecRawStatSyncSum);
+    Debug("host_statuses", "name: %s, id: %d", name, next_stat_id);
+    ink_rwlock_wrlock(&host_statids_rwlock);
+    ink_hash_table_insert(hosts_stats_ids, name, reinterpret_cast<void 
*>(next_stat_id));
+    ink_rwlock_unlock(&host_statids_rwlock);
+    setHostStatus(name, HostStatus_t::HOST_STATUS_UP);
+    next_stat_id++;
+  }
+}
+
+int
+HostStatus::getHostStatId(const char *name)
+{
+  intptr_t _id = -1;
+  ink_rwlock_rdlock(&host_statids_rwlock);
+  ink_hash_table_lookup(hosts_stats_ids, name, reinterpret_cast<void 
**>(&_id));
+  ink_rwlock_unlock(&host_statids_rwlock);
+  Debug("host_statuses", "name: %s, id: %d", name, static_cast<int>(_id));
+
+  return static_cast<int>(_id);
+}
diff --git a/proxy/HostStatus.h b/proxy/HostStatus.h
index 87ebba6..c8d80f7 100644
--- a/proxy/HostStatus.h
+++ b/proxy/HostStatus.h
@@ -37,10 +37,12 @@
 
 enum HostStatus_t {
   HOST_STATUS_INIT,
-  HOST_STATUS_UP,
   HOST_STATUS_DOWN,
+  HOST_STATUS_UP,
 };
 
+const std::string stat_prefix = "host_status.";
+
 /**
  * Singleton placeholder for next hop status.
  */
@@ -52,17 +54,22 @@ struct HostStatus {
   {
     static HostStatus instance;
     return instance;
-  } // return the signleton pointer.
-  void setHostStatus(const char *key, const HostStatus_t status);
-  HostStatus_t getHostStatus(const char *key);
+  }
+  void setHostStatus(const char *name, const HostStatus_t status);
+  HostStatus_t getHostStatus(const char *name);
+  void createHostStat(const char *name);
 
 private:
+  int next_stat_id = 1;
   HostStatus();
   HostStatus(const HostStatus &obj) = delete;
   HostStatus &operator=(HostStatus const &) = delete;
+  int getHostStatId(const char *name);
 
-  InkHashTable *hosts_statuses; // next hop status, key is hostname or ip 
string, data is bool (available).
-  ink_mutex hosts_statuses_mutex;
+  InkHashTable *hosts_statuses;  // next hop status, key is hostname or ip 
string, data is bool (available).
+  InkHashTable *hosts_stats_ids; // next hop stat ids, key is hostname or ip 
string, data is int stat id.
+  ink_rwlock host_status_rwlock;
+  ink_rwlock host_statids_rwlock;
 };
 
 #endif
diff --git a/proxy/Main.cc b/proxy/Main.cc
index e58b6d0..0034f05 100644
--- a/proxy/Main.cc
+++ b/proxy/Main.cc
@@ -81,6 +81,7 @@ extern "C" int plock(int);
 #include "CacheControl.h"
 #include "IPAllow.h"
 #include "ParentSelection.h"
+#include "HostStatus.h"
 #include "MgmtUtils.h"
 #include "StatPages.h"
 #include "HTTP.h"
@@ -1859,6 +1860,7 @@ main(int /* argc ATS_UNUSED */, const char **argv)
     initCacheControl();
     IpAllow::startup();
     ParentConfig::startup();
+    HostStatus::instance();
 #ifdef SPLIT_DNS
     SplitDNSConfig::startup();
 #endif
diff --git a/proxy/ParentConsistentHash.cc b/proxy/ParentConsistentHash.cc
index f8185ed..72ada84 100644
--- a/proxy/ParentConsistentHash.cc
+++ b/proxy/ParentConsistentHash.cc
@@ -117,7 +117,8 @@ ParentConsistentHash::selectParent(bool first_call, 
ParentResult *result, Reques
   uint64_t path_hash            = 0;
   uint32_t last_lookup;
   pRecord *prtmp = nullptr, *pRec = nullptr;
-  HostStatus &pStatus = HostStatus::instance();
+  HostStatus &pStatus    = HostStatus::instance();
+  HostStatus_t host_stat = HostStatus_t::HOST_STATUS_INIT;
 
   Debug("parent_select", "ParentConsistentHash::%s(): Using a consistent hash 
parent selection strategy.", __func__);
   ink_assert(numParents(result) > 0 || result->rec->go_direct == true);
@@ -168,10 +169,13 @@ ParentConsistentHash::selectParent(bool first_call, 
ParentResult *result, Reques
       } while (prtmp && strcmp(prtmp->hostname, result->hostname) == 0);
     }
   }
+  host_stat = pStatus.getHostStatus(pRec->hostname);
   // didn't find a parent or the parent is marked unavailable.
-  if ((pRec && !pRec->available) || pStatus.getHostStatus(pRec->hostname) == 
HOST_STATUS_DOWN) {
+  if ((pRec && !pRec->available) || host_stat == HOST_STATUS_DOWN) {
     do {
-      if (pRec && !pRec->available) {
+      // check if the host is retryable.  It's retryable if the retry window 
has elapsed
+      // and the global host status is HOST_STATUS_UP
+      if (pRec && !pRec->available && host_stat == HOST_STATUS_UP) {
         Debug("parent_select", "Parent.failedAt = %u, retry = %u, xact_start = 
%u", (unsigned int)pRec->failedAt,
               (unsigned int)retry_time, (unsigned 
int)request_info->xact_start);
         if ((pRec->failedAt + retry_time) < request_info->xact_start) {
@@ -217,7 +221,8 @@ ParentConsistentHash::selectParent(bool first_call, 
ParentResult *result, Reques
         Debug("parent_select", "No available parents.");
         break;
       }
-    } while (!prtmp || !pRec->available || 
pStatus.getHostStatus(pRec->hostname) == HOST_STATUS_DOWN);
+      host_stat = pStatus.getHostStatus(pRec->hostname);
+    } while (!prtmp || !pRec->available || host_stat == HOST_STATUS_DOWN);
   }
 
   // use the available or marked for retry parent.
diff --git a/proxy/ParentRoundRobin.cc b/proxy/ParentRoundRobin.cc
index 52eecb7..b47ae2b 100644
--- a/proxy/ParentRoundRobin.cc
+++ b/proxy/ParentRoundRobin.cc
@@ -61,10 +61,11 @@ ParentRoundRobin::selectParent(bool first_call, 
ParentResult *result, RequestDat
                                unsigned int retry_time)
 {
   Debug("parent_select", "In ParentRoundRobin::selectParent(): Using a round 
robin parent selection strategy.");
-  int cur_index       = 0;
-  bool parentUp       = false;
-  bool parentRetry    = false;
-  HostStatus &pStatus = HostStatus::instance();
+  int cur_index          = 0;
+  bool parentUp          = false;
+  bool parentRetry       = false;
+  HostStatus &pStatus    = HostStatus::instance();
+  HostStatus_t host_stat = HostStatus_t::HOST_STATUS_INIT;
 
   HttpRequestData *request_info = static_cast<HttpRequestData *>(rdata);
 
@@ -137,15 +138,19 @@ ParentRoundRobin::selectParent(bool first_call, 
ParentResult *result, RequestDat
   // Loop through the array of parent seeing if any are up or
   //   should be retried
   do {
+    host_stat = pStatus.getHostStatus(parents[cur_index].hostname);
     Debug("parent_select", "cur_index: %d, result->start_parent: %d", 
cur_index, result->start_parent);
     // DNS ParentOnly inhibits bypassing the parent so always return that t
     if ((parents[cur_index].failedAt == 0) || (parents[cur_index].failCount < 
static_cast<int>(fail_threshold))) {
-      Debug("parent_select", "FailThreshold = %d", fail_threshold);
-      Debug("parent_select", "Selecting a parent due to little failCount 
(faileAt: %u failCount: %d)",
-            (unsigned)parents[cur_index].failedAt, 
parents[cur_index].failCount);
-      parentUp = true;
+      if (host_stat == HOST_STATUS_UP) {
+        Debug("parent_select", "FailThreshold = %d", fail_threshold);
+        Debug("parent_select", "Selecting a parent due to little failCount 
(faileAt: %u failCount: %d)",
+              (unsigned)parents[cur_index].failedAt, 
parents[cur_index].failCount);
+        parentUp = true;
+      }
     } else {
-      if ((result->wrap_around) || ((parents[cur_index].failedAt + retry_time) 
< request_info->xact_start)) {
+      if ((result->wrap_around) ||
+          ((parents[cur_index].failedAt + retry_time) < 
request_info->xact_start && host_stat == HOST_STATUS_UP)) {
         Debug("parent_select", "Parent[%d].failedAt = %u, retry = 
%u,xact_start = %" PRId64 " but wrap = %d", cur_index,
               (unsigned)parents[cur_index].failedAt, retry_time, 
(int64_t)request_info->xact_start, result->wrap_around);
         // Reuse the parent
@@ -157,8 +162,8 @@ ParentRoundRobin::selectParent(bool first_call, 
ParentResult *result, RequestDat
       }
     }
 
-    if (parentUp == true && pStatus.getHostStatus(parents[cur_index].hostname) 
!= HOST_STATUS_DOWN) {
-      Debug("parent_select", "status for %s: %d", parents[cur_index].hostname, 
pStatus.getHostStatus(parents[cur_index].hostname));
+    if (parentUp == true && host_stat != HOST_STATUS_DOWN) {
+      Debug("parent_select", "status for %s: %d", parents[cur_index].hostname, 
host_stat);
       result->result      = PARENT_SPECIFIED;
       result->hostname    = parents[cur_index].hostname;
       result->port        = parents[cur_index].port;
diff --git a/proxy/ParentSelection.cc b/proxy/ParentSelection.cc
index 4027e36..3ceeab9 100644
--- a/proxy/ParentSelection.cc
+++ b/proxy/ParentSelection.cc
@@ -416,6 +416,7 @@ ParentRecord::ProcessParents(char *val, bool isPrimary)
   if (numTok == 0) {
     return "No parents specified";
   }
+  HostStatus &hs = HostStatus::instance();
   // Allocate the parents array
   if (isPrimary) {
     this->parents = (pRecord *)ats_malloc(sizeof(pRecord) * numTok);
@@ -491,6 +492,7 @@ ParentRecord::ProcessParents(char *val, bool isPrimary)
       this->parents[i].name                    = this->parents[i].hostname;
       this->parents[i].available               = true;
       this->parents[i].weight                  = weight;
+      hs.createHostStat(this->parents[i].hostname);
     } else {
       memcpy(this->secondary_parents[i].hostname, current, tmp - current);
       this->secondary_parents[i].hostname[tmp - current] = '\0';
@@ -502,6 +504,7 @@ ParentRecord::ProcessParents(char *val, bool isPrimary)
       this->secondary_parents[i].name                    = 
this->secondary_parents[i].hostname;
       this->secondary_parents[i].available               = true;
       this->secondary_parents[i].weight                  = weight;
+      hs.createHostStat(this->secondary_parents[i].hostname);
     }
   }
 
@@ -1431,14 +1434,13 @@ 
EXCLUSIVE_REGRESSION_TEST(PARENTSELECTION)(RegressionTest * /* t ATS_UNUSED */,
   sleep(1);
   RE(verify(result, PARENT_SPECIFIED, "fuzzy", 80), 186);
 
-  // test
+  // Test 187
   // test the HostStatus API with ParentConsistent Hash.
   tbl[0] = '\0';
-  ST(178);
+  ST(187);
   T("dest_domain=rabbit.net 
parent=fuzzy:80|1.0;fluffy:80|1.0;furry:80|1.0;frisky:80|1.0 "
     "round_robin=consistent_hash go_direct=false\n");
   REBUILD;
-  REINIT;
 
   // mark all up.
   _st.setHostStatus("furry", HOST_STATUS_UP);
@@ -1446,8 +1448,6 @@ EXCLUSIVE_REGRESSION_TEST(PARENTSELECTION)(RegressionTest 
* /* t ATS_UNUSED */,
   _st.setHostStatus("frisky", HOST_STATUS_UP);
   _st.setHostStatus("fuzzy", HOST_STATUS_UP);
 
-  // Test 187
-  ST(187);
   REINIT;
   br(request, "i.am.rabbit.net");
   FP;
@@ -1476,6 +1476,71 @@ 
EXCLUSIVE_REGRESSION_TEST(PARENTSELECTION)(RegressionTest * /* t ATS_UNUSED */,
   sleep(1);
   RE(verify(result, PARENT_SPECIFIED, "fuzzy", 80), 189);
 
+  // Test 190
+  // mark fuzzy back down and set the host status down
+  // then wait for fuzzy to become available.
+  // even though fuzzy becomes retryable we should not select it
+  // because the host status is set to down.
+  params->markParentDown(result, fail_threshold, retry_time);
+  // set host status down
+  _st.setHostStatus("fuzzy", HOST_STATUS_DOWN);
+  // sleep long enough so that fuzzy is retryable
+  sleep(params->policy.ParentRetryTime + 1);
+  ST(190);
+  REINIT;
+  br(request, "i.am.rabbit.net");
+  FP;
+  RE(verify(result, PARENT_SPECIFIED, "frisky", 80), 190);
+
+  // now set the host staus on fuzzy to up and it should now
+  // be retried.
+  _st.setHostStatus("fuzzy", HOST_STATUS_UP);
+  ST(191);
+  REINIT;
+  br(request, "i.am.rabbit.net");
+  FP;
+  RE(verify(result, PARENT_SPECIFIED, "fuzzy", 80), 191);
+
+  // Test 192
+  tbl[0] = '\0';
+  ST(192);
+  T("dest_domain=rabbit.net parent=fuzzy:80,fluffy:80,furry:80,frisky:80 
round_robin=false go_direct=true\n");
+  REBUILD;
+  // mark all up.
+  _st.setHostStatus("fuzzy", HOST_STATUS_UP);
+  _st.setHostStatus("fluffy", HOST_STATUS_UP);
+  _st.setHostStatus("furry", HOST_STATUS_UP);
+  _st.setHostStatus("frisky", HOST_STATUS_UP);
+  // fuzzy should be chosen.
+  sleep(1);
+  REINIT;
+  br(request, "i.am.rabbit.net");
+  FP;
+  RE(verify(result, PARENT_SPECIFIED, "fuzzy", 80), 192);
+
+  // Test 193
+  // mark fuzzy down and wait for it to become retryable
+  ST(193);
+  params->markParentDown(result, fail_threshold, retry_time);
+  sleep(params->policy.ParentRetryTime + 1);
+  // since the host status is down even though fuzzy is
+  // retryable, fluffy should be chosen
+  _st.setHostStatus("fuzzy", HOST_STATUS_DOWN);
+  REINIT;
+  br(request, "i.am.rabbit.net");
+  FP;
+  RE(verify(result, PARENT_SPECIFIED, "fluffy", 80), 193);
+
+  // Test 194
+  // set the host status for fuzzy  back up and since its
+  // retryable fuzzy should be chosen
+  ST(194);
+  _st.setHostStatus("fuzzy", HOST_STATUS_UP);
+  REINIT;
+  br(request, "i.am.rabbit.net");
+  FP;
+  RE(verify(result, PARENT_SPECIFIED, "fuzzy", 80), 194);
+
   delete request;
   delete result;
   delete params;

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to