bneradt commented on a change in pull request #7874:
URL: https://github.com/apache/trafficserver/pull/7874#discussion_r794870237



##########
File path: iocore/hostdb/I_HostDBProcessor.h
##########
@@ -84,350 +89,539 @@ makeHostHash(const char *string)
 // Types
 //
 
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on 
context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any 
change is made,
- * the @c object_version for the cache must be updated by modifying @c 
HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
-  /// Generic storage. This is verified to be the size of the union.
-  struct application_data_allotment {
-    unsigned int application1;
-    unsigned int application2;
-  } allotment;
-
-  //////////////////////////////////////////////////////////
-  // http server attributes in the host database          //
-  //                                                      //
-  // http_version       - one of HTTPVersion              //
-  // last_failure       - UNIX time for the last time     //
-  //                      we tried the server & failed    //
-  // fail_count         - Number of times we tried and    //
-  //                       and failed to contact the host //
-  //////////////////////////////////////////////////////////
-  struct http_server_attr {
-    uint32_t last_failure;
-    HTTPVersion http_version;
-    uint8_t fail_count;
-    http_server_attr() : http_version() {}
-  } http_data;
-
-  struct application_data_rr {
-    unsigned int offset;
-  } rr;
-  HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
 
+/// Information for an SRV record.
 struct SRVInfo {
-  unsigned int srv_offset : 16;
+  unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
   unsigned int srv_weight : 16;
   unsigned int srv_priority : 16;
   unsigned int srv_port : 16;
   unsigned int key;
 };
 
-struct HostDBInfo : public RefCountObj {
-  /** Internal IP address data.
-      This is at least large enough to hold an IPv6 address.
-  */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+  UNSPEC, ///< No valid data.
+  ADDR,   ///< IP address.
+  SRV,    ///< SRV record.
+  HOST    ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);
 
-  static HostDBInfo *
-  alloc(int size = 0)
-  {
-    size += sizeof(HostDBInfo);
-    int iobuffer_index = iobuffer_size_to_index(size, hostdb_max_iobuf_index);
-    ink_release_assert(iobuffer_index >= 0);
-    void *ptr = ioBufAllocator[iobuffer_index].alloc_void();
-    memset(ptr, 0, size);
-    HostDBInfo *ret      = new (ptr) HostDBInfo();
-    ret->_iobuffer_index = iobuffer_index;
-    return ret;
-  }
+/** Information about a single target.
+ */
+struct HostDBInfo {
+  using self_type = HostDBInfo; ///< Self reference type.
 
-  void
-  free() override
-  {
-    ink_release_assert(from_alloc());
-    Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), 
this);
-    ioBufAllocator[_iobuffer_index].free_void((void *)(this));
-  }
+  /// Default constructor.
+  HostDBInfo() = default;
 
-  /// Effectively the @c object_version for cache data.
-  /// This is used to indicate incompatible changes in the binary layout of 
HostDB records.
-  /// It must be updated if any such change is made, even if it is 
functionally equivalent.
-  static ts::VersionNumber
-  version()
-  {
-    /// - 1.0 Initial version.
-    /// - 1.1 tweak HostDBApplicationInfo::http_data.
-    return ts::VersionNumber(1, 1);
-  }
+  HostDBInfo &operator=(HostDBInfo const &that);

Review comment:
       Use self_type for `HostDBInfo`?

##########
File path: iocore/hostdb/I_HostDBProcessor.h
##########
@@ -84,350 +89,539 @@ makeHostHash(const char *string)
 // Types
 //
 
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on 
context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any 
change is made,
- * the @c object_version for the cache must be updated by modifying @c 
HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
-  /// Generic storage. This is verified to be the size of the union.
-  struct application_data_allotment {
-    unsigned int application1;
-    unsigned int application2;
-  } allotment;
-
-  //////////////////////////////////////////////////////////
-  // http server attributes in the host database          //
-  //                                                      //
-  // http_version       - one of HTTPVersion              //
-  // last_failure       - UNIX time for the last time     //
-  //                      we tried the server & failed    //
-  // fail_count         - Number of times we tried and    //
-  //                       and failed to contact the host //
-  //////////////////////////////////////////////////////////
-  struct http_server_attr {
-    uint32_t last_failure;
-    HTTPVersion http_version;
-    uint8_t fail_count;
-    http_server_attr() : http_version() {}
-  } http_data;
-
-  struct application_data_rr {
-    unsigned int offset;
-  } rr;
-  HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
 
+/// Information for an SRV record.
 struct SRVInfo {
-  unsigned int srv_offset : 16;
+  unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
   unsigned int srv_weight : 16;
   unsigned int srv_priority : 16;
   unsigned int srv_port : 16;
   unsigned int key;
 };
 
-struct HostDBInfo : public RefCountObj {
-  /** Internal IP address data.
-      This is at least large enough to hold an IPv6 address.
-  */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+  UNSPEC, ///< No valid data.
+  ADDR,   ///< IP address.
+  SRV,    ///< SRV record.
+  HOST    ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);
 
-  static HostDBInfo *
-  alloc(int size = 0)
-  {
-    size += sizeof(HostDBInfo);
-    int iobuffer_index = iobuffer_size_to_index(size, hostdb_max_iobuf_index);
-    ink_release_assert(iobuffer_index >= 0);
-    void *ptr = ioBufAllocator[iobuffer_index].alloc_void();
-    memset(ptr, 0, size);
-    HostDBInfo *ret      = new (ptr) HostDBInfo();
-    ret->_iobuffer_index = iobuffer_index;
-    return ret;
-  }
+/** Information about a single target.
+ */
+struct HostDBInfo {
+  using self_type = HostDBInfo; ///< Self reference type.
 
-  void
-  free() override
-  {
-    ink_release_assert(from_alloc());
-    Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), 
this);
-    ioBufAllocator[_iobuffer_index].free_void((void *)(this));
-  }
+  /// Default constructor.
+  HostDBInfo() = default;
 
-  /// Effectively the @c object_version for cache data.
-  /// This is used to indicate incompatible changes in the binary layout of 
HostDB records.
-  /// It must be updated if any such change is made, even if it is 
functionally equivalent.
-  static ts::VersionNumber
-  version()
-  {
-    /// - 1.0 Initial version.
-    /// - 1.1 tweak HostDBApplicationInfo::http_data.
-    return ts::VersionNumber(1, 1);
-  }
+  HostDBInfo &operator=(HostDBInfo const &that);
 
-  static HostDBInfo *
-  unmarshall(char *buf, unsigned int size)
-  {
-    if (size < sizeof(HostDBInfo)) {
-      return nullptr;
-    }
-    HostDBInfo *ret = HostDBInfo::alloc(size - sizeof(HostDBInfo));
-    int buf_index   = ret->_iobuffer_index;
-    memcpy((void *)ret, buf, size);
-    // Reset the refcount back to 0, this is a bit ugly-- but I'm not sure we 
want to expose a method
-    // to mess with the refcount, since this is a fairly unique use case
-    ret                  = new (ret) HostDBInfo();
-    ret->_iobuffer_index = buf_index;
-    return ret;
-  }
+  /// Absolute time of when this target failed.
+  /// A value of zero (@c TS_TIME_ZERO ) indicates no failure.
+  ts_time last_fail_time() const;
 
-  // return expiry time (in seconds since epoch)
-  ink_time_t
-  expiry_time() const
-  {
-    return ip_timestamp + ip_timeout_interval + 
hostdb_serve_stale_but_revalidate;
-  }
+  /// Target is alive - no known failure.
+  bool is_alive();
 
-  sockaddr *
-  ip()
-  {
-    return &data.ip.sa;
-  }
+  /// Target has failed and is still in the blocked time window.
+  bool is_dead(ts_time now, ts_seconds fail_window);
 
-  sockaddr const *
-  ip() const
-  {
-    return &data.ip.sa;
-  }
+  /** Select this target.
+   *
+   * @param now Current time.
+   * @param fail_window Failure window.
+   * @return Status of the selection.
+   *

Review comment:
       Let's flesh out the description of the function, describing the 
intention of its use, and the semantics of the boolean returned (I think `true` 
means it's alive, but `false` means dead or zombie).

##########
File path: iocore/hostdb/I_HostDBProcessor.h
##########
@@ -84,350 +89,539 @@ makeHostHash(const char *string)
 // Types
 //
 
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on 
context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any 
change is made,
- * the @c object_version for the cache must be updated by modifying @c 
HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
-  /// Generic storage. This is verified to be the size of the union.
-  struct application_data_allotment {
-    unsigned int application1;
-    unsigned int application2;
-  } allotment;
-
-  //////////////////////////////////////////////////////////
-  // http server attributes in the host database          //
-  //                                                      //
-  // http_version       - one of HTTPVersion              //
-  // last_failure       - UNIX time for the last time     //
-  //                      we tried the server & failed    //
-  // fail_count         - Number of times we tried and    //
-  //                       and failed to contact the host //
-  //////////////////////////////////////////////////////////
-  struct http_server_attr {
-    uint32_t last_failure;
-    HTTPVersion http_version;
-    uint8_t fail_count;
-    http_server_attr() : http_version() {}
-  } http_data;
-
-  struct application_data_rr {
-    unsigned int offset;
-  } rr;
-  HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
 
+/// Information for an SRV record.
 struct SRVInfo {
-  unsigned int srv_offset : 16;
+  unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
   unsigned int srv_weight : 16;
   unsigned int srv_priority : 16;
   unsigned int srv_port : 16;
   unsigned int key;
 };
 
-struct HostDBInfo : public RefCountObj {
-  /** Internal IP address data.
-      This is at least large enough to hold an IPv6 address.
-  */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+  UNSPEC, ///< No valid data.
+  ADDR,   ///< IP address.
+  SRV,    ///< SRV record.
+  HOST    ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);

Review comment:
       I believe this can be a constexpr function.

##########
File path: iocore/hostdb/I_HostDBProcessor.h
##########
@@ -84,350 +89,539 @@ makeHostHash(const char *string)
 // Types
 //
 
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on 
context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any 
change is made,
- * the @c object_version for the cache must be updated by modifying @c 
HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
-  /// Generic storage. This is verified to be the size of the union.
-  struct application_data_allotment {
-    unsigned int application1;
-    unsigned int application2;
-  } allotment;
-
-  //////////////////////////////////////////////////////////
-  // http server attributes in the host database          //
-  //                                                      //
-  // http_version       - one of HTTPVersion              //
-  // last_failure       - UNIX time for the last time     //
-  //                      we tried the server & failed    //
-  // fail_count         - Number of times we tried and    //
-  //                       and failed to contact the host //
-  //////////////////////////////////////////////////////////
-  struct http_server_attr {
-    uint32_t last_failure;
-    HTTPVersion http_version;
-    uint8_t fail_count;
-    http_server_attr() : http_version() {}
-  } http_data;
-
-  struct application_data_rr {
-    unsigned int offset;
-  } rr;
-  HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
 
+/// Information for an SRV record.
 struct SRVInfo {
-  unsigned int srv_offset : 16;
+  unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
   unsigned int srv_weight : 16;
   unsigned int srv_priority : 16;
   unsigned int srv_port : 16;
   unsigned int key;
 };
 
-struct HostDBInfo : public RefCountObj {
-  /** Internal IP address data.
-      This is at least large enough to hold an IPv6 address.
-  */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+  UNSPEC, ///< No valid data.
+  ADDR,   ///< IP address.
+  SRV,    ///< SRV record.
+  HOST    ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);
 
-  static HostDBInfo *
-  alloc(int size = 0)
-  {
-    size += sizeof(HostDBInfo);
-    int iobuffer_index = iobuffer_size_to_index(size, hostdb_max_iobuf_index);
-    ink_release_assert(iobuffer_index >= 0);
-    void *ptr = ioBufAllocator[iobuffer_index].alloc_void();
-    memset(ptr, 0, size);
-    HostDBInfo *ret      = new (ptr) HostDBInfo();
-    ret->_iobuffer_index = iobuffer_index;
-    return ret;
-  }
+/** Information about a single target.
+ */
+struct HostDBInfo {
+  using self_type = HostDBInfo; ///< Self reference type.
 
-  void
-  free() override
-  {
-    ink_release_assert(from_alloc());
-    Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), 
this);
-    ioBufAllocator[_iobuffer_index].free_void((void *)(this));
-  }
+  /// Default constructor.
+  HostDBInfo() = default;
 
-  /// Effectively the @c object_version for cache data.
-  /// This is used to indicate incompatible changes in the binary layout of 
HostDB records.
-  /// It must be updated if any such change is made, even if it is 
functionally equivalent.
-  static ts::VersionNumber
-  version()
-  {
-    /// - 1.0 Initial version.
-    /// - 1.1 tweak HostDBApplicationInfo::http_data.
-    return ts::VersionNumber(1, 1);
-  }
+  HostDBInfo &operator=(HostDBInfo const &that);
 
-  static HostDBInfo *
-  unmarshall(char *buf, unsigned int size)
-  {
-    if (size < sizeof(HostDBInfo)) {
-      return nullptr;
-    }
-    HostDBInfo *ret = HostDBInfo::alloc(size - sizeof(HostDBInfo));
-    int buf_index   = ret->_iobuffer_index;
-    memcpy((void *)ret, buf, size);
-    // Reset the refcount back to 0, this is a bit ugly-- but I'm not sure we 
want to expose a method
-    // to mess with the refcount, since this is a fairly unique use case
-    ret                  = new (ret) HostDBInfo();
-    ret->_iobuffer_index = buf_index;
-    return ret;
-  }
+  /// Absolute time of when this target failed.
+  /// A value of zero (@c TS_TIME_ZERO ) indicates no failure.
+  ts_time last_fail_time() const;
 
-  // return expiry time (in seconds since epoch)
-  ink_time_t
-  expiry_time() const
-  {
-    return ip_timestamp + ip_timeout_interval + 
hostdb_serve_stale_but_revalidate;
-  }
+  /// Target is alive - no known failure.
+  bool is_alive();
 
-  sockaddr *
-  ip()
-  {
-    return &data.ip.sa;
-  }
+  /// Target has failed and is still in the blocked time window.
+  bool is_dead(ts_time now, ts_seconds fail_window);

Review comment:
       Probably good to add an `is_zombie` function as well.

##########
File path: iocore/hostdb/P_HostDBProcessor.h
##########
@@ -169,221 +151,34 @@ extern RecRawStatBlock *hostdb_rsb;
 
 #define HOSTDB_DECREMENT_THREAD_DYN_STAT(_s, _t) RecIncrRawStatSum(hostdb_rsb, 
_t, (int)_s, -1);
 
-struct CmpConstBuffferCaseInsensitive {
-  bool
-  operator()(ts::ConstBuffer a, ts::ConstBuffer b) const
-  {
-    return ptr_len_casecmp(a._ptr, a._size, b._ptr, b._size) < 0;
-  }
+struct HostFileRecord {
+  HostDBRecord::Handle record_4;
+  HostDBRecord::Handle record_6;
 };
 
-// Our own typedef for the host file mapping
-typedef std::map<ts::ConstBuffer, IpAddr, CmpConstBuffferCaseInsensitive> 
HostsFileMap;
-// A to hold a ref-counted map
-struct RefCountedHostsFileMap : public RefCountObj {
-  HostsFileMap hosts_file_map;
-  ats_scoped_str HostFileText;
-};
+using HostFileMap = std::unordered_map<ts::TextView, HostFileRecord, 
std::hash<std::string_view>>;
 
 //
 // HostDBCache (Private)
 //
 struct HostDBCache {
   int start(int flags = 0);
   // Map to contain all of the host file overrides, initialize it to empty
-  Ptr<RefCountedHostsFileMap> hosts_file_ptr;
+  std::shared_ptr<HostFileMap> host_file;
+  std::shared_mutex host_file_mutex;
+
   // TODO: make ATS call a close() method or something on shutdown (it does 
nothing of the sort today)
-  RefCountCache<HostDBInfo> *refcountcache = nullptr;
+  RefCountCache<HostDBRecord> *refcountcache = nullptr;

Review comment:
       Since we're changing HostDBCache, should the HOST_DB_CACHE_MAJOR_VERSION 
and HOST_DB_CACHE_MINOR_VERSION be updated? 
   
   
https://github.com/apache/trafficserver/pull/7874/files#diff-944e3e1e73047c1013f2944bd7d5f4eb750efdee052d429ddfdbb799f319d1dbR88-R90
   
   I'm guessing not since they are never referenced. :) Can we simply remove 
those defines?

##########
File path: iocore/hostdb/I_HostDBProcessor.h
##########
@@ -84,350 +89,539 @@ makeHostHash(const char *string)
 // Types
 //
 
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on 
context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any 
change is made,
- * the @c object_version for the cache must be updated by modifying @c 
HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
-  /// Generic storage. This is verified to be the size of the union.
-  struct application_data_allotment {
-    unsigned int application1;
-    unsigned int application2;
-  } allotment;
-
-  //////////////////////////////////////////////////////////
-  // http server attributes in the host database          //
-  //                                                      //
-  // http_version       - one of HTTPVersion              //
-  // last_failure       - UNIX time for the last time     //
-  //                      we tried the server & failed    //
-  // fail_count         - Number of times we tried and    //
-  //                       and failed to contact the host //
-  //////////////////////////////////////////////////////////
-  struct http_server_attr {
-    uint32_t last_failure;
-    HTTPVersion http_version;
-    uint8_t fail_count;
-    http_server_attr() : http_version() {}
-  } http_data;
-
-  struct application_data_rr {
-    unsigned int offset;
-  } rr;
-  HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
 
+/// Information for an SRV record.
 struct SRVInfo {
-  unsigned int srv_offset : 16;
+  unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
   unsigned int srv_weight : 16;
   unsigned int srv_priority : 16;
   unsigned int srv_port : 16;
   unsigned int key;
 };
 
-struct HostDBInfo : public RefCountObj {
-  /** Internal IP address data.
-      This is at least large enough to hold an IPv6 address.
-  */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+  UNSPEC, ///< No valid data.
+  ADDR,   ///< IP address.
+  SRV,    ///< SRV record.
+  HOST    ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);
 
-  static HostDBInfo *
-  alloc(int size = 0)
-  {
-    size += sizeof(HostDBInfo);
-    int iobuffer_index = iobuffer_size_to_index(size, hostdb_max_iobuf_index);
-    ink_release_assert(iobuffer_index >= 0);
-    void *ptr = ioBufAllocator[iobuffer_index].alloc_void();
-    memset(ptr, 0, size);
-    HostDBInfo *ret      = new (ptr) HostDBInfo();
-    ret->_iobuffer_index = iobuffer_index;
-    return ret;
-  }
+/** Information about a single target.
+ */
+struct HostDBInfo {
+  using self_type = HostDBInfo; ///< Self reference type.
 
-  void
-  free() override
-  {
-    ink_release_assert(from_alloc());
-    Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), 
this);
-    ioBufAllocator[_iobuffer_index].free_void((void *)(this));
-  }
+  /// Default constructor.
+  HostDBInfo() = default;
 
-  /// Effectively the @c object_version for cache data.
-  /// This is used to indicate incompatible changes in the binary layout of 
HostDB records.
-  /// It must be updated if any such change is made, even if it is 
functionally equivalent.
-  static ts::VersionNumber
-  version()
-  {
-    /// - 1.0 Initial version.
-    /// - 1.1 tweak HostDBApplicationInfo::http_data.
-    return ts::VersionNumber(1, 1);
-  }
+  HostDBInfo &operator=(HostDBInfo const &that);
 
-  static HostDBInfo *
-  unmarshall(char *buf, unsigned int size)
-  {
-    if (size < sizeof(HostDBInfo)) {
-      return nullptr;
-    }
-    HostDBInfo *ret = HostDBInfo::alloc(size - sizeof(HostDBInfo));
-    int buf_index   = ret->_iobuffer_index;
-    memcpy((void *)ret, buf, size);
-    // Reset the refcount back to 0, this is a bit ugly-- but I'm not sure we 
want to expose a method
-    // to mess with the refcount, since this is a fairly unique use case
-    ret                  = new (ret) HostDBInfo();
-    ret->_iobuffer_index = buf_index;
-    return ret;
-  }
+  /// Absolute time of when this target failed.
+  /// A value of zero (@c TS_TIME_ZERO ) indicates no failure.
+  ts_time last_fail_time() const;
 
-  // return expiry time (in seconds since epoch)
-  ink_time_t
-  expiry_time() const
-  {
-    return ip_timestamp + ip_timeout_interval + 
hostdb_serve_stale_but_revalidate;
-  }
+  /// Target is alive - no known failure.
+  bool is_alive();
 
-  sockaddr *
-  ip()
-  {
-    return &data.ip.sa;
-  }
+  /// Target has failed and is still in the blocked time window.
+  bool is_dead(ts_time now, ts_seconds fail_window);
 
-  sockaddr const *
-  ip() const
-  {
-    return &data.ip.sa;
-  }
+  /** Select this target.
+   *
+   * @param now Current time.
+   * @param fail_window Failure window.
+   * @return Status of the selection.
+   *
+   * If a zombie is selected the failure time is updated to make it look dead 
to other threads in a thread safe
+   * manner. The caller should check @c last_fail_time to see if a zombie was 
selected.
+   */
+  bool select(ts_time now, ts_seconds fail_window);
 
-  char *hostname() const;
-  char *perm_hostname() const;
-  char *srvname(HostDBRoundRobin *rr) const;
+  /// Check if this info is valid.
+  bool is_valid() const;
 
-  /// Check if this entry is an element of a round robin entry.
-  /// If @c true then this entry is part of and was obtained from a round 
robin root. This is useful if the
-  /// address doesn't work - a retry can probably get a new address by doing 
another lookup and resolving to
-  /// a different element of the round robin.
-  bool
-  is_rr_elt() const
-  {
-    return 0 != round_robin_elt;
-  }
+  /// Mark this info as invalid.
+  void invalidate();
 
-  HostDBRoundRobin *rr();
+  /** Mark the entry as down.
+   *
+   * @param now Time of the failure.
+   * @return @c true if @a this was marked down, @c false if not.
+   *
+   * This can return @c false if the entry is already marked down, in which 
case the failure time is not updated.
+   */
+  bool mark_down(ts_time now);
 
-  unsigned int
-  ip_interval() const
-  {
-    return (hostdb_current_interval - ip_timestamp) & 0x7FFFFFFF;
-  }
+  /** Mark the target as up / alive.
+   *
+   * @return Previous alive state of the target.
+   */
+  bool mark_up();
 
-  int
-  ip_time_remaining() const
-  {
-    return static_cast<int>(ip_timeout_interval) - 
static_cast<int>(this->ip_interval());
-  }
+  char const *srvname() const;
 
-  bool
-  is_ip_stale() const
-  {
-    return ip_timeout_interval >= 2 * hostdb_ip_stale_interval && 
ip_interval() >= hostdb_ip_stale_interval;
-  }
+  /** Migrate data after a DNS update.
+   *
+   * @param that Source item.
+   *
+   * This moves only specific state information, it is not a generic copy.
+   */
+  void migrate_from(self_type const &that);
 
-  bool
-  is_ip_timeout() const
-  {
-    return ip_interval() >= ip_timeout_interval;
-  }
+  /// A target is either an IP address or an SRV record.
+  /// The type should be indicated by @c flags.f.is_srv;
+  union {
+    IpAddr ip;   ///< IP address / port data.
+    SRVInfo srv; ///< SRV record.
+  } data{IpAddr{}};
+
+  /// Data that migrates after updated DNS records are processed.
+  /// @see migrate_from
+  /// @{
+  /// Last time a failure was recorded.
+  std::atomic<ts_time> last_failure{TS_TIME_ZERO};
+  /// Count of connection failures
+  std::atomic<uint8_t> fail_count{0};
+  /// Expected HTTP version of the target based on earlier transactions.
+  HTTPVersion http_version = HTTP_INVALID;
+  /// @}
+
+  self_type &assign(IpAddr const &addr);
+
+protected:
+  self_type &assign(sa_family_t af, void const *addr);
+  self_type &assign(SRV const *srv, char const *name);
+
+  HostDBType type = HostDBType::UNSPEC; ///< Invalid data.
+
+  friend HostDBContinuation;
+};
 
-  bool
-  is_ip_fail_timeout() const
-  {
-    return ip_interval() >= hostdb_ip_fail_timeout_interval;
+inline HostDBInfo &
+HostDBInfo::operator=(HostDBInfo const &that)
+{
+  if (this != &that) {
+    memcpy(static_cast<void *>(this), static_cast<const void *>(&that), 
sizeof(*this));
   }
+  return *this;
+}
 
-  void
-  refresh_ip()
-  {
-    ip_timestamp = hostdb_current_interval;
-  }
+inline ts_time
+HostDBInfo::last_fail_time() const
+{
+  return last_failure;
+}
 
-  bool
-  serve_stale_but_revalidate() const
-  {
-    // the option is disabled
-    if (hostdb_serve_stale_but_revalidate <= 0) {
-      return false;
-    }
+inline bool
+HostDBInfo::is_alive()
+{
+  return this->last_fail_time() == TS_TIME_ZERO;
+}
 
-    // ip_timeout_interval == DNS TTL
-    // hostdb_serve_stale_but_revalidate == number of seconds
-    // ip_interval() is the number of seconds between now() and when the entry 
was inserted
-    if ((ip_timeout_interval + hostdb_serve_stale_but_revalidate) > 
ip_interval()) {
-      Debug("hostdb", "serving stale entry %d | %d | %d as requested by 
config", ip_timeout_interval,
-            hostdb_serve_stale_but_revalidate, ip_interval());
-      return true;
-    }
+inline bool
+HostDBInfo::is_dead(ts_time now, ts_seconds fail_window)
+{
+  auto last_fail = this->last_fail_time();
+  return (last_fail != TS_TIME_ZERO) && (last_fail + fail_window < now);
+}
+
+inline bool
+HostDBInfo::mark_up()
+{
+  auto t = last_failure.exchange(TS_TIME_ZERO);
+  return t != TS_TIME_ZERO;
+}
+
+inline bool
+HostDBInfo::mark_down(ts_time now)
+{
+  auto t0{TS_TIME_ZERO};
+  return last_failure.compare_exchange_strong(t0, now);
+}
 
-    // otherwise, the entry is too old
-    return false;
+inline bool
+HostDBInfo::select(ts_time now, ts_seconds fail_window)
+{
+  auto t0 = this->last_fail_time();
+  if (t0 == TS_TIME_ZERO) {
+    return true; // it's alive and so is valid for selection.
   }
+  // Success means this is a zombie and this thread updated the failure time.
+  return (t0 + fail_window < now) && last_failure.compare_exchange_strong(t0, 
now);
+}
+
+inline void
+HostDBInfo::migrate_from(HostDBInfo::self_type const &that)
+{
+  this->last_failure = that.last_failure.load();
+  this->http_version = that.http_version;
+}
 
-  /*
-   * Given the current time `now` and the fail_window, determine if this real 
is alive
+inline bool
+HostDBInfo::is_valid() const
+{
+  return type != HostDBType::UNSPEC;
+}
+
+inline void
+HostDBInfo::invalidate()
+{
+  type = HostDBType::UNSPEC;
+}
+
+// ----
+/** Root item for HostDB.
+ * This is the container for HostDB data. It is always an array of @c 
HostDBInfo instances plus metadata.
+ * All strings are C-strings and therefore don't need a distinct size.
+ *
+ */
+class HostDBRecord : public RefCountObj
+{
+  friend struct HostDBContinuation;
+  friend struct ShowHostDB;
+  using self_type = HostDBRecord;
+
+  /// Size of the IO buffer block owned by @a this.
+  /// If negative @a this is in not allocated memory.
+  int _iobuffer_index{-1};
+  /// Actual size of the data.
+  unsigned _record_size = sizeof(self_type);
+
+public:
+  HostDBRecord()                      = default;
+  HostDBRecord(self_type const &that) = delete;
+
+  using Handle = Ptr<HostDBRecord>; ///< Shared pointer type to hold an 
instance.
+
+  /** Allocate an instance from the IOBuffers.
+   *
+   * @param query_name Name of the query for the record.
+   * @param rr_count Number of info instances.
+   * @param srv_name_size Storage for SRV names, if any.
+   * @return An instance sufficient to hold the specified data.
+   *
+   * The query name will stored and initialized, and the info instances 
initialized.
    */
-  bool
-  is_alive(ink_time_t now, int32_t fail_window)
-  {
-    unsigned int last_failure = app.http_data.last_failure;
-
-    if (last_failure == 0 || (unsigned int)(now - fail_window) > last_failure) 
{
-      return true;
-    } else {
-      // Entry is marked down.  Make sure some nasty clock skew
-      //  did not occur.  Use the retry time to set an upper bound
-      //  as to how far in the future we should tolerate bogus last
-      //  failure times.  This sets the upper bound that we would ever
-      //  consider a server down to 2*down_server_timeout
-      if ((unsigned int)(now + fail_window) < last_failure) {
-        app.http_data.last_failure = 0;
-        return false;
-      }
-      return false;
-    }
-  }
+  static self_type *alloc(ts::TextView query_name, unsigned rr_count, size_t 
srv_name_size = 0);
 
-  bool
-  is_failed() const
-  {
-    return !((is_srv && data.srv.srv_offset) || (reverse_dns && 
data.hostname_offset) || ats_is_ip(ip()));
-  }
+  /// Type of data stored in this record.
+  HostDBType record_type = HostDBType::UNSPEC;
 
-  void
-  set_failed()
-  {
-    if (is_srv) {
-      data.srv.srv_offset = 0;
-    } else if (reverse_dns) {
-      data.hostname_offset = 0;
-    } else {
-      ats_ip_invalidate(ip());
-    }
-  }
+  /// IP family of this record.
+  sa_family_t af_family = AF_UNSPEC;
+
+  /// Offset from @a this to the VLA.
+  unsigned short rr_offset = 0;
+
+  /// Number of @c HostDBInfo instances.
+  unsigned short rr_count = 0;
+
+  /// Timing data for switch records in the RR.
+  std::atomic<ts_time> rr_ctime{TS_TIME_ZERO};
 
+  /// Hash key.
   uint64_t key;
 
-  // Application specific data. NOTE: We need an integral number of
-  // these per block. This structure is 32 bytes. (at 200k hosts =
-  // 8 Meg). Which gives us 7 bytes of application information.
-  HostDBApplicationInfo app;
+  /// When the data was received.
+  ts_time ip_timestamp;
 
-  union {
-    IpEndpoint ip;                ///< IP address / port data.
-    unsigned int hostname_offset; ///< Some hostname thing.
-    SRVInfo srv;
-  } data;
+  /// Valid duration of the data.
+  ts_seconds ip_timeout_interval;
 
-  unsigned int hostname_offset; // always maintain a permanent copy of the 
hostname for non-rev dns records.
+  /** Atomically advance the round robin index.
+   *
+   * If multiple threads call this simultaneously each thread will get a 
distinct return value.
+   *
+   * @return The new round robin index.
+   */
+  unsigned next_rr();
+
+  /** Pick the next round robin and update the record atomically.
+   *
+   * @note This may select a zombie server and reserve it for the caller, 
therefore the caller must
+   * attempt to connect to the selected target if possible.
+   *
+   * @param now Current time to use for aliveness calculations.
+   * @param fail_window Blackout time for dead servers.
+   * @return Status of the updated target.
+   *
+   * If the return value is @c HostDBInfo::Status::DEAD this means all targets 
are dead and there is
+   * no valid upstream.
+   *
+   * @note Concurrency - this is not done under lock and depends on the caller 
for correct use.
+   * For strict round robin, it is a feature that every call will get a 
distinct index. For
+   * timed round robin, the caller must arrange to have only one thread call 
this per time interval.
+   */
+  HostDBInfo *select_next_rr(ts_time now, ts_seconds fail_window);
 
-  unsigned int ip_timestamp;
+  /// Check if this record is of SRV targets.
+  bool is_srv() const;
 
-  unsigned int ip_timeout_interval; // bounded between 1 and HOST_DB_MAX_TTL 
(0x1FFFFF, 24 days)
+  /** Query name for the record.
+   * @return A C-string.
+   * If this is a @c HOST record, this is the resolved named and the query was 
based on the IP address.
+   * Otherwise this is the name used in the DNS query.
+   */
+  char const *name() const;
+
+  /** Query name for the record.
+   * @return A view.
+   * If this is a @c HOST record, this is the resolved named and the query was 
based on the IP address.
+   * Otherwise this is the name used in the DNS query.
+   * @note Although not included in the view, the name is always nul 
terminated and the string can
+   * be used as a C-string.
+   */
+  ts::TextView name_view() const;
 
-  unsigned int is_srv : 1;
-  unsigned int reverse_dns : 1;
+  /// Get the array of info instances.
+  ts::MemSpan<HostDBInfo> rr_info();
 
-  unsigned int round_robin : 1;     // This is the root of a round robin block
-  unsigned int round_robin_elt : 1; // This is an address in a round robin 
block
+  /** Find a host record by IP address.
+   *
+   * @param addr Address key.
+   * @return A pointer to the info instance if a match is found, @c nullptr if 
not.
+   */
+  HostDBInfo *find(sockaddr const *addr);
+
+  /** Select an upstream target.
+   *
+   * @param now Current time.
+   * @param fail_window Dead server blackout time.
+   * @param hash_addr Inbound remote IP address.
+   * @return A selected target, or @c nullptr if there are no valid targets.
+   *
+   * This accounts for the round robin setting. The default is to use "client 
affinity" in
+   * which case @a hash_addr is as a hash seed to select the target.
+   *
+   * This may select a zombie target, which can be detected by checking the 
target's last
+   * failure time. If it is not @c TS_TIME_ZERO the target is a zombie. Other 
transactions will
+   * be blocked from selecting that target until @a fail_window time has 
passed.
+   *
+   * In cases other than strict round robin, a base target is selected. If 
valid, that is returned,
+   * but if not then the targets in this record are searched until a valid one 
is found. The result
+   * is this can be called to select a target for failover when a previous 
target fails.
+   */
+  HostDBInfo *select_best_http(ts_time now, ts_seconds fail_window, sockaddr 
const *hash_addr);
+  HostDBInfo *select_best_srv(char *target, InkRand *rand, ts_time now, 
ts_seconds fail_window);
 
-  HostDBInfo() : _iobuffer_index{-1} {}
+  bool is_failed() const;
 
-  HostDBInfo(HostDBInfo const &src) : RefCountObj()
-  {
-    memcpy(static_cast<void *>(this), static_cast<const void *>(&src), 
sizeof(*this));
-    _iobuffer_index = -1;
-  }
+  void set_failed();
 
-  HostDBInfo &
-  operator=(HostDBInfo const &src)
-  {
-    if (this != &src) {
-      int iob_idx = _iobuffer_index;
-      memcpy(static_cast<void *>(this), static_cast<const void *>(&src), 
sizeof(*this));
-      _iobuffer_index = iob_idx;
-    }
-    return *this;
-  }
+  /// @return The time point when the item expires.
+  ts_time expiry_time() const;
 
-  bool
-  from_alloc() const
-  {
-    return _iobuffer_index >= 0;
-  }
+  ts_seconds ip_interval() const;
 
-private:
-  // The value of this will be -1 for objects that are not created by the 
alloc() static member function.
-  int _iobuffer_index;
-};
+  ts_seconds ip_time_remaining() const;
+
+  bool is_ip_stale() const;
+
+  bool is_ip_timeout() const;
+
+  bool is_ip_fail_timeout() const;
+
+  void refresh_ip();
+
+  bool serve_stale_but_revalidate() const;
+
+  /// Deallocate @a this.
+  void free() override;
 
-struct HostDBRoundRobin {
-  /** Total number (to compute space used). */
-  short rrcount = 0;
+  /** The current round robin index.
+   *
+   * @return The current index.
+   *
+   * @note The internal index may be out of range due to concurrency 
constraints - this insures the
+   * returned valu is in range.

Review comment:
       valu -> value

##########
File path: doc/developer-guide/core-architecture/hostdb.en.rst
##########
@@ -0,0 +1,191 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+.. include:: ../../common.defs
+
+.. highlight:: cpp
+.. default-domain:: cpp
+
+.. _developer-doc-hostdb:
+
+HostDB
+******
+
+HostDB is a cache of DNS results. It is used to increase performance by 
aggregating address
+resolution across transactions. HostDB also stores state information for 
specific IP addresses.
+
+Operation
+=========
+
+The primary operation for HostDB is to resolve a fully qualified domain name 
("FQDN"). As noted each
+FQDN is associated with a single record. Each record has an array of items. 
When a resolution
+request is made the database is checked to see if the record is already 
present. If so, it is
+served. Otherwise a DNS request is made. When the nameserver replies a record 
is created, added
+to the database, and then returned to the requestor.
+
+Each info tracks several status values for its corresponding upstream. These 
are
+
+*  HTTP version
+*  Last failure time
+
+The HTTP version is tracked from responses and provides a mechanism to make 
intelligent guesses
+about the protocol to use to the upstream.
+
+The last failure time tracks when the last connection failure to the info 
occurred and doubles as
+a flag, where a value of ``TS_TIME_ZERO`` indicates a live target and any 
other value indicates a
+dead info.
+
+If an info is marked dead (has a non-zero last failure time) there is a "fail 
window" during which
+no connections are permitted. After this time the info is considered to be a 
"zombie". If all infos
+for a record are dead then a specific error message is generated (body factory 
tag
+"connect#all_dead"). Otherwise if the selected info is a zombie, a request is 
permitted but the
+zombie is immediately marked dead again, preventing any additional requests 
until either the fail
+window has passed or the single connection succeeds. A successful connection 
clears the last file
+time and the info becomes alive.

Review comment:
       "last file time" -> "last fail time"

##########
File path: doc/developer-guide/core-architecture/hostdb.en.rst
##########
@@ -0,0 +1,191 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+.. include:: ../../common.defs
+
+.. highlight:: cpp
+.. default-domain:: cpp
+
+.. _developer-doc-hostdb:
+
+HostDB
+******
+
+HostDB is a cache of DNS results. It is used to increase performance by 
aggregating address
+resolution across transactions. HostDB also stores state information for 
specific IP addresses.
+
+Operation
+=========
+
+The primary operation for HostDB is to resolve a fully qualified domain name 
("FQDN"). As noted each
+FQDN is associated with a single record. Each record has an array of items. 
When a resolution
+request is made the database is checked to see if the record is already 
present. If so, it is
+served. Otherwise a DNS request is made. When the nameserver replies a record 
is created, added
+to the database, and then returned to the requestor.
+
+Each info tracks several status values for its corresponding upstream. These 
are
+
+*  HTTP version
+*  Last failure time
+
+The HTTP version is tracked from responses and provides a mechanism to make 
intelligent guesses
+about the protocol to use to the upstream.
+
+The last failure time tracks when the last connection failure to the info 
occurred and doubles as
+a flag, where a value of ``TS_TIME_ZERO`` indicates a live target and any 
other value indicates a
+dead info.
+
+If an info is marked dead (has a non-zero last failure time) there is a "fail 
window" during which
+no connections are permitted. After this time the info is considered to be a 
"zombie". If all infos
+for a record are dead then a specific error message is generated (body factory 
tag
+"connect#all_dead"). Otherwise if the selected info is a zombie, a request is 
permitted but the
+zombie is immediately marked dead again, preventing any additional requests 
until either the fail
+window has passed or the single connection succeeds. A successful connection 
clears the last file
+time and the info becomes alive.
+
+Runtime Structure
+=================
+
+DNS results are stored in a global hash table as instances of 
``HostDBRecord``. Each record stores
+the results of a single query. These records are not updated with new DNS 
results - instead a new
+record instance is created and replaces the previous instance in the table. 
The records are
+reference counted so such a replacement doesn't invalidate the old record if 
the latter is still
+being accessed. Some specific dynamic data is migrated from the old record to 
the new one, such as
+the failure status of the upstreams in the record.
+
+In each record is a variable length array of items, instances of 
``HostDBInfo``, one for each
+IP address in the record. This is called the "round robin" data for historical 
reasons. For SRV
+records there is an additional storage area in the record that is used to 
store the SRV names.
+
+.. figure:: HostDB-Data-Layout.svg
+
+The round robin data is accessed by using an offset and count in the base 
record. For SRV records
+each record has an offset, relative to that ``HostDBInfo`` instance, for its 
own name in the name
+storage area.
+
+State information for the outbound connection has been moved to a refurbished 
``DNSInfo`` class
+named ``ResolveInfo``. As much as possible relevant state information has been 
moved from the

Review comment:
       Referencing `DNSInfo` is helpful to us as reviewers, but probably not 
after this PR is merged in. Probably reword the first sentence to: 
   
   ```
   State information for the output connection is stored in an instance of a 
   class named ``ResolveInfo``. 
   ```

##########
File path: doc/developer-guide/core-architecture/hostdb.en.rst
##########
@@ -0,0 +1,191 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+.. include:: ../../common.defs
+
+.. highlight:: cpp
+.. default-domain:: cpp
+
+.. _developer-doc-hostdb:
+
+HostDB
+******
+
+HostDB is a cache of DNS results. It is used to increase performance by 
aggregating address
+resolution across transactions. HostDB also stores state information for 
specific IP addresses.
+
+Operation
+=========
+
+The primary operation for HostDB is to resolve a fully qualified domain name 
("FQDN"). As noted each
+FQDN is associated with a single record. Each record has an array of items. 
When a resolution
+request is made the database is checked to see if the record is already 
present. If so, it is
+served. Otherwise a DNS request is made. When the nameserver replies a record 
is created, added
+to the database, and then returned to the requestor.
+
+Each info tracks several status values for its corresponding upstream. These 
are
+
+*  HTTP version
+*  Last failure time
+
+The HTTP version is tracked from responses and provides a mechanism to make 
intelligent guesses
+about the protocol to use to the upstream.
+
+The last failure time tracks when the last connection failure to the info 
occurred and doubles as
+a flag, where a value of ``TS_TIME_ZERO`` indicates a live target and any 
other value indicates a
+dead info.
+
+If an info is marked dead (has a non-zero last failure time) there is a "fail 
window" during which
+no connections are permitted. After this time the info is considered to be a 
"zombie". If all infos
+for a record are dead then a specific error message is generated (body factory 
tag
+"connect#all_dead"). Otherwise if the selected info is a zombie, a request is 
permitted but the
+zombie is immediately marked dead again, preventing any additional requests 
until either the fail
+window has passed or the single connection succeeds. A successful connection 
clears the last file
+time and the info becomes alive.
+
+Runtime Structure
+=================
+
+DNS results are stored in a global hash table as instances of 
``HostDBRecord``. Each record stores
+the results of a single query. These records are not updated with new DNS 
results - instead a new
+record instance is created and replaces the previous instance in the table. 
The records are
+reference counted so such a replacement doesn't invalidate the old record if 
the latter is still
+being accessed. Some specific dynamic data is migrated from the old record to 
the new one, such as
+the failure status of the upstreams in the record.
+
+In each record is a variable length array of items, instances of 
``HostDBInfo``, one for each
+IP address in the record. This is called the "round robin" data for historical 
reasons. For SRV
+records there is an additional storage area in the record that is used to 
store the SRV names.
+
+.. figure:: HostDB-Data-Layout.svg
+
+The round robin data is accessed by using an offset and count in the base 
record. For SRV records
+each record has an offset, relative to that ``HostDBInfo`` instance, for its 
own name in the name
+storage area.
+
+State information for the outbound connection has been moved to a refurbished 
``DNSInfo`` class
+named ``ResolveInfo``. As much as possible relevant state information has been 
moved from the
+``HttpSM`` to this structure. This is intended for future work where the state 
machine deals only
+with upstream transactions and not sessions.
+
+``ResolveInfo`` may contain a reference to a HostDB record, which preserves 
the record even if it is
+replaced due to DNS queries in other transactions. The record is not required 
as the resolution
+information can be supplied directly without DNS or HostDB, e.g. a plugin sets 
the upstream address
+explicitly. The ``resolved_p`` flag indicates if the current information is 
valid and ready to be
+used or not. A result of this is there is no longer a specific holder for API 
provided addresses -
+the interface now puts the address in the ``ResolveInfo`` and marks it as 
resolved. This prevents
+further DNS / HostDB lookups and the address is used as is.
+
+The upstream port is a bit tricky and should be cleaned up. Currently value in 
``srv_port``
+determines the port if set. If not, then the port in ``addr`` is used.
+
+Resolution Style
+----------------
+
+.. cpp:enum:: OS_Addr
+
+   Metadata about the source of the resolved address.'
+
+   .. cpp:enumerator:: TRY_DEFAULT
+
+      Use default resolution. This is the initial state.
+
+   .. cpp:enumerator:: TRY_HOSTDB
+
+      Use HostDB to resolve the target key.
+
+   .. cpp:enumerator:: TRY_CLIENT
+
+      Use the client supplied target address. This is used for transparent 
connections - the upstream
+      address is obtained from the inbound connection. May fail over to HostDB.
+
+   .. cpp:enumerator:: USE_HOSTDB
+
+      Use HostDB to resolve the target key.
+
+   .. cpp:enumerator:: USE_CLIENT
+
+      Use the client supplied target address.
+
+   .. cpp:enumerator:: USE_API
+
+      Use the address provided via the plugin API.
+
+   The parallel values for using HostDB and the client target address are to 
control fail over on
+   connection failure. The ``TRY_`` values can fail over to another style, but 
the ``USE_`` values
+   cannot. This prevents cycles of style changes by having any ``TRY_`` value 
fail over to a
+   ``USE_`` value, at which point it can no longer change. Note there is no 
``TRY_API`` - if a
+   plugin sets the upstream address that is locked in.
+
+Issues
+======
+
+Currently if an upstream is marked down connections are still permitted, the 
only change is the
+number of retries. This has caused operational problems where dead systems are 
flooded with requests
+which, despite the timeouts, accumulate in ATS until ATS runs out of memory 
(there were instances of

Review comment:
       These sentences use `down` and `dead` interchangeably. #7283 records the 
desire to generally unify and clarify the wording, but for new docs let's make 
the wording consistent within the doc.

##########
File path: iocore/hostdb/I_HostDBProcessor.h
##########
@@ -84,350 +89,539 @@ makeHostHash(const char *string)
 // Types
 //
 
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on 
context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any 
change is made,
- * the @c object_version for the cache must be updated by modifying @c 
HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
-  /// Generic storage. This is verified to be the size of the union.
-  struct application_data_allotment {
-    unsigned int application1;
-    unsigned int application2;
-  } allotment;
-
-  //////////////////////////////////////////////////////////
-  // http server attributes in the host database          //
-  //                                                      //
-  // http_version       - one of HTTPVersion              //
-  // last_failure       - UNIX time for the last time     //
-  //                      we tried the server & failed    //
-  // fail_count         - Number of times we tried and    //
-  //                       and failed to contact the host //
-  //////////////////////////////////////////////////////////
-  struct http_server_attr {
-    uint32_t last_failure;
-    HTTPVersion http_version;
-    uint8_t fail_count;
-    http_server_attr() : http_version() {}
-  } http_data;
-
-  struct application_data_rr {
-    unsigned int offset;
-  } rr;
-  HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
 
+/// Information for an SRV record.
 struct SRVInfo {
-  unsigned int srv_offset : 16;
+  unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
   unsigned int srv_weight : 16;
   unsigned int srv_priority : 16;
   unsigned int srv_port : 16;
   unsigned int key;
 };
 
-struct HostDBInfo : public RefCountObj {
-  /** Internal IP address data.
-      This is at least large enough to hold an IPv6 address.
-  */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+  UNSPEC, ///< No valid data.
+  ADDR,   ///< IP address.
+  SRV,    ///< SRV record.
+  HOST    ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);
 
-  static HostDBInfo *
-  alloc(int size = 0)
-  {
-    size += sizeof(HostDBInfo);
-    int iobuffer_index = iobuffer_size_to_index(size, hostdb_max_iobuf_index);
-    ink_release_assert(iobuffer_index >= 0);
-    void *ptr = ioBufAllocator[iobuffer_index].alloc_void();
-    memset(ptr, 0, size);
-    HostDBInfo *ret      = new (ptr) HostDBInfo();
-    ret->_iobuffer_index = iobuffer_index;
-    return ret;
-  }
+/** Information about a single target.
+ */
+struct HostDBInfo {
+  using self_type = HostDBInfo; ///< Self reference type.
 
-  void
-  free() override
-  {
-    ink_release_assert(from_alloc());
-    Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), 
this);
-    ioBufAllocator[_iobuffer_index].free_void((void *)(this));
-  }
+  /// Default constructor.
+  HostDBInfo() = default;
 
-  /// Effectively the @c object_version for cache data.
-  /// This is used to indicate incompatible changes in the binary layout of 
HostDB records.
-  /// It must be updated if any such change is made, even if it is 
functionally equivalent.
-  static ts::VersionNumber
-  version()
-  {
-    /// - 1.0 Initial version.
-    /// - 1.1 tweak HostDBApplicationInfo::http_data.
-    return ts::VersionNumber(1, 1);
-  }
+  HostDBInfo &operator=(HostDBInfo const &that);
 
-  static HostDBInfo *
-  unmarshall(char *buf, unsigned int size)
-  {
-    if (size < sizeof(HostDBInfo)) {
-      return nullptr;
-    }
-    HostDBInfo *ret = HostDBInfo::alloc(size - sizeof(HostDBInfo));
-    int buf_index   = ret->_iobuffer_index;
-    memcpy((void *)ret, buf, size);
-    // Reset the refcount back to 0, this is a bit ugly-- but I'm not sure we 
want to expose a method
-    // to mess with the refcount, since this is a fairly unique use case
-    ret                  = new (ret) HostDBInfo();
-    ret->_iobuffer_index = buf_index;
-    return ret;
-  }
+  /// Absolute time of when this target failed.
+  /// A value of zero (@c TS_TIME_ZERO ) indicates no failure.
+  ts_time last_fail_time() const;
 
-  // return expiry time (in seconds since epoch)
-  ink_time_t
-  expiry_time() const
-  {
-    return ip_timestamp + ip_timeout_interval + 
hostdb_serve_stale_but_revalidate;
-  }
+  /// Target is alive - no known failure.
+  bool is_alive();
 
-  sockaddr *
-  ip()
-  {
-    return &data.ip.sa;
-  }
+  /// Target has failed and is still in the blocked time window.
+  bool is_dead(ts_time now, ts_seconds fail_window);
 
-  sockaddr const *
-  ip() const
-  {
-    return &data.ip.sa;
-  }
+  /** Select this target.
+   *
+   * @param now Current time.
+   * @param fail_window Failure window.
+   * @return Status of the selection.
+   *
+   * If a zombie is selected the failure time is updated to make it look dead 
to other threads in a thread safe
+   * manner. The caller should check @c last_fail_time to see if a zombie was 
selected.
+   */
+  bool select(ts_time now, ts_seconds fail_window);
 
-  char *hostname() const;
-  char *perm_hostname() const;
-  char *srvname(HostDBRoundRobin *rr) const;
+  /// Check if this info is valid.
+  bool is_valid() const;
 
-  /// Check if this entry is an element of a round robin entry.
-  /// If @c true then this entry is part of and was obtained from a round 
robin root. This is useful if the
-  /// address doesn't work - a retry can probably get a new address by doing 
another lookup and resolving to
-  /// a different element of the round robin.
-  bool
-  is_rr_elt() const
-  {
-    return 0 != round_robin_elt;
-  }
+  /// Mark this info as invalid.
+  void invalidate();
 
-  HostDBRoundRobin *rr();
+  /** Mark the entry as down.
+   *
+   * @param now Time of the failure.
+   * @return @c true if @a this was marked down, @c false if not.
+   *
+   * This can return @c false if the entry is already marked down, in which 
case the failure time is not updated.
+   */
+  bool mark_down(ts_time now);
 
-  unsigned int
-  ip_interval() const
-  {
-    return (hostdb_current_interval - ip_timestamp) & 0x7FFFFFFF;
-  }
+  /** Mark the target as up / alive.
+   *
+   * @return Previous alive state of the target.
+   */
+  bool mark_up();
 
-  int
-  ip_time_remaining() const
-  {
-    return static_cast<int>(ip_timeout_interval) - 
static_cast<int>(this->ip_interval());
-  }
+  char const *srvname() const;
 
-  bool
-  is_ip_stale() const
-  {
-    return ip_timeout_interval >= 2 * hostdb_ip_stale_interval && 
ip_interval() >= hostdb_ip_stale_interval;
-  }
+  /** Migrate data after a DNS update.
+   *
+   * @param that Source item.
+   *
+   * This moves only specific state information, it is not a generic copy.
+   */
+  void migrate_from(self_type const &that);
 
-  bool
-  is_ip_timeout() const
-  {
-    return ip_interval() >= ip_timeout_interval;
-  }
+  /// A target is either an IP address or an SRV record.
+  /// The type should be indicated by @c flags.f.is_srv;
+  union {
+    IpAddr ip;   ///< IP address / port data.
+    SRVInfo srv; ///< SRV record.
+  } data{IpAddr{}};
+
+  /// Data that migrates after updated DNS records are processed.
+  /// @see migrate_from
+  /// @{
+  /// Last time a failure was recorded.
+  std::atomic<ts_time> last_failure{TS_TIME_ZERO};
+  /// Count of connection failures
+  std::atomic<uint8_t> fail_count{0};
+  /// Expected HTTP version of the target based on earlier transactions.
+  HTTPVersion http_version = HTTP_INVALID;
+  /// @}
+
+  self_type &assign(IpAddr const &addr);
+
+protected:
+  self_type &assign(sa_family_t af, void const *addr);
+  self_type &assign(SRV const *srv, char const *name);
+
+  HostDBType type = HostDBType::UNSPEC; ///< Invalid data.
+
+  friend HostDBContinuation;
+};
 
-  bool
-  is_ip_fail_timeout() const
-  {
-    return ip_interval() >= hostdb_ip_fail_timeout_interval;
+inline HostDBInfo &
+HostDBInfo::operator=(HostDBInfo const &that)
+{
+  if (this != &that) {
+    memcpy(static_cast<void *>(this), static_cast<const void *>(&that), 
sizeof(*this));
   }
+  return *this;
+}
 
-  void
-  refresh_ip()
-  {
-    ip_timestamp = hostdb_current_interval;
-  }
+inline ts_time
+HostDBInfo::last_fail_time() const
+{
+  return last_failure;
+}
 
-  bool
-  serve_stale_but_revalidate() const
-  {
-    // the option is disabled
-    if (hostdb_serve_stale_but_revalidate <= 0) {
-      return false;
-    }
+inline bool
+HostDBInfo::is_alive()
+{
+  return this->last_fail_time() == TS_TIME_ZERO;
+}
 
-    // ip_timeout_interval == DNS TTL
-    // hostdb_serve_stale_but_revalidate == number of seconds
-    // ip_interval() is the number of seconds between now() and when the entry 
was inserted
-    if ((ip_timeout_interval + hostdb_serve_stale_but_revalidate) > 
ip_interval()) {
-      Debug("hostdb", "serving stale entry %d | %d | %d as requested by 
config", ip_timeout_interval,
-            hostdb_serve_stale_but_revalidate, ip_interval());
-      return true;
-    }
+inline bool
+HostDBInfo::is_dead(ts_time now, ts_seconds fail_window)
+{
+  auto last_fail = this->last_fail_time();
+  return (last_fail != TS_TIME_ZERO) && (last_fail + fail_window < now);
+}
+
+inline bool
+HostDBInfo::mark_up()
+{
+  auto t = last_failure.exchange(TS_TIME_ZERO);
+  return t != TS_TIME_ZERO;
+}
+
+inline bool
+HostDBInfo::mark_down(ts_time now)
+{
+  auto t0{TS_TIME_ZERO};
+  return last_failure.compare_exchange_strong(t0, now);
+}
 
-    // otherwise, the entry is too old
-    return false;
+inline bool
+HostDBInfo::select(ts_time now, ts_seconds fail_window)
+{
+  auto t0 = this->last_fail_time();
+  if (t0 == TS_TIME_ZERO) {
+    return true; // it's alive and so is valid for selection.
   }
+  // Success means this is a zombie and this thread updated the failure time.
+  return (t0 + fail_window < now) && last_failure.compare_exchange_strong(t0, 
now);
+}
+
+inline void
+HostDBInfo::migrate_from(HostDBInfo::self_type const &that)
+{
+  this->last_failure = that.last_failure.load();
+  this->http_version = that.http_version;
+}
 
-  /*
-   * Given the current time `now` and the fail_window, determine if this real 
is alive
+inline bool
+HostDBInfo::is_valid() const
+{
+  return type != HostDBType::UNSPEC;
+}
+
+inline void
+HostDBInfo::invalidate()
+{
+  type = HostDBType::UNSPEC;
+}
+
+// ----
+/** Root item for HostDB.
+ * This is the container for HostDB data. It is always an array of @c 
HostDBInfo instances plus metadata.
+ * All strings are C-strings and therefore don't need a distinct size.
+ *
+ */
+class HostDBRecord : public RefCountObj
+{
+  friend struct HostDBContinuation;
+  friend struct ShowHostDB;
+  using self_type = HostDBRecord;
+
+  /// Size of the IO buffer block owned by @a this.
+  /// If negative @a this is in not allocated memory.
+  int _iobuffer_index{-1};
+  /// Actual size of the data.
+  unsigned _record_size = sizeof(self_type);
+
+public:
+  HostDBRecord()                      = default;
+  HostDBRecord(self_type const &that) = delete;
+
+  using Handle = Ptr<HostDBRecord>; ///< Shared pointer type to hold an 
instance.
+
+  /** Allocate an instance from the IOBuffers.
+   *
+   * @param query_name Name of the query for the record.
+   * @param rr_count Number of info instances.
+   * @param srv_name_size Storage for SRV names, if any.
+   * @return An instance sufficient to hold the specified data.
+   *
+   * The query name will stored and initialized, and the info instances 
initialized.
    */
-  bool
-  is_alive(ink_time_t now, int32_t fail_window)
-  {
-    unsigned int last_failure = app.http_data.last_failure;
-
-    if (last_failure == 0 || (unsigned int)(now - fail_window) > last_failure) 
{
-      return true;
-    } else {
-      // Entry is marked down.  Make sure some nasty clock skew
-      //  did not occur.  Use the retry time to set an upper bound
-      //  as to how far in the future we should tolerate bogus last
-      //  failure times.  This sets the upper bound that we would ever
-      //  consider a server down to 2*down_server_timeout
-      if ((unsigned int)(now + fail_window) < last_failure) {
-        app.http_data.last_failure = 0;
-        return false;
-      }
-      return false;
-    }
-  }
+  static self_type *alloc(ts::TextView query_name, unsigned rr_count, size_t 
srv_name_size = 0);
 
-  bool
-  is_failed() const
-  {
-    return !((is_srv && data.srv.srv_offset) || (reverse_dns && 
data.hostname_offset) || ats_is_ip(ip()));
-  }
+  /// Type of data stored in this record.
+  HostDBType record_type = HostDBType::UNSPEC;
 
-  void
-  set_failed()
-  {
-    if (is_srv) {
-      data.srv.srv_offset = 0;
-    } else if (reverse_dns) {
-      data.hostname_offset = 0;
-    } else {
-      ats_ip_invalidate(ip());
-    }
-  }
+  /// IP family of this record.
+  sa_family_t af_family = AF_UNSPEC;
+
+  /// Offset from @a this to the VLA.
+  unsigned short rr_offset = 0;
+
+  /// Number of @c HostDBInfo instances.
+  unsigned short rr_count = 0;
+
+  /// Timing data for switch records in the RR.
+  std::atomic<ts_time> rr_ctime{TS_TIME_ZERO};
 
+  /// Hash key.
   uint64_t key;
 
-  // Application specific data. NOTE: We need an integral number of
-  // these per block. This structure is 32 bytes. (at 200k hosts =
-  // 8 Meg). Which gives us 7 bytes of application information.
-  HostDBApplicationInfo app;
+  /// When the data was received.
+  ts_time ip_timestamp;
 
-  union {
-    IpEndpoint ip;                ///< IP address / port data.
-    unsigned int hostname_offset; ///< Some hostname thing.
-    SRVInfo srv;
-  } data;
+  /// Valid duration of the data.
+  ts_seconds ip_timeout_interval;
 
-  unsigned int hostname_offset; // always maintain a permanent copy of the 
hostname for non-rev dns records.
+  /** Atomically advance the round robin index.
+   *
+   * If multiple threads call this simultaneously each thread will get a 
distinct return value.
+   *
+   * @return The new round robin index.
+   */
+  unsigned next_rr();
+
+  /** Pick the next round robin and update the record atomically.
+   *
+   * @note This may select a zombie server and reserve it for the caller, 
therefore the caller must
+   * attempt to connect to the selected target if possible.
+   *
+   * @param now Current time to use for aliveness calculations.
+   * @param fail_window Blackout time for dead servers.
+   * @return Status of the updated target.
+   *
+   * If the return value is @c HostDBInfo::Status::DEAD this means all targets 
are dead and there is
+   * no valid upstream.
+   *
+   * @note Concurrency - this is not done under lock and depends on the caller 
for correct use.
+   * For strict round robin, it is a feature that every call will get a 
distinct index. For
+   * timed round robin, the caller must arrange to have only one thread call 
this per time interval.
+   */
+  HostDBInfo *select_next_rr(ts_time now, ts_seconds fail_window);
 
-  unsigned int ip_timestamp;
+  /// Check if this record is of SRV targets.
+  bool is_srv() const;
 
-  unsigned int ip_timeout_interval; // bounded between 1 and HOST_DB_MAX_TTL 
(0x1FFFFF, 24 days)
+  /** Query name for the record.
+   * @return A C-string.
+   * If this is a @c HOST record, this is the resolved named and the query was 
based on the IP address.
+   * Otherwise this is the name used in the DNS query.
+   */
+  char const *name() const;
+
+  /** Query name for the record.
+   * @return A view.
+   * If this is a @c HOST record, this is the resolved named and the query was 
based on the IP address.
+   * Otherwise this is the name used in the DNS query.
+   * @note Although not included in the view, the name is always nul 
terminated and the string can
+   * be used as a C-string.
+   */
+  ts::TextView name_view() const;
 
-  unsigned int is_srv : 1;
-  unsigned int reverse_dns : 1;
+  /// Get the array of info instances.
+  ts::MemSpan<HostDBInfo> rr_info();
 
-  unsigned int round_robin : 1;     // This is the root of a round robin block
-  unsigned int round_robin_elt : 1; // This is an address in a round robin 
block
+  /** Find a host record by IP address.
+   *
+   * @param addr Address key.
+   * @return A pointer to the info instance if a match is found, @c nullptr if 
not.
+   */
+  HostDBInfo *find(sockaddr const *addr);
+
+  /** Select an upstream target.
+   *
+   * @param now Current time.
+   * @param fail_window Dead server blackout time.
+   * @param hash_addr Inbound remote IP address.
+   * @return A selected target, or @c nullptr if there are no valid targets.
+   *
+   * This accounts for the round robin setting. The default is to use "client 
affinity" in
+   * which case @a hash_addr is as a hash seed to select the target.
+   *
+   * This may select a zombie target, which can be detected by checking the 
target's last
+   * failure time. If it is not @c TS_TIME_ZERO the target is a zombie. Other 
transactions will
+   * be blocked from selecting that target until @a fail_window time has 
passed.
+   *
+   * In cases other than strict round robin, a base target is selected. If 
valid, that is returned,
+   * but if not then the targets in this record are searched until a valid one 
is found. The result
+   * is this can be called to select a target for failover when a previous 
target fails.
+   */
+  HostDBInfo *select_best_http(ts_time now, ts_seconds fail_window, sockaddr 
const *hash_addr);
+  HostDBInfo *select_best_srv(char *target, InkRand *rand, ts_time now, 
ts_seconds fail_window);
 
-  HostDBInfo() : _iobuffer_index{-1} {}
+  bool is_failed() const;
 
-  HostDBInfo(HostDBInfo const &src) : RefCountObj()
-  {
-    memcpy(static_cast<void *>(this), static_cast<const void *>(&src), 
sizeof(*this));
-    _iobuffer_index = -1;
-  }
+  void set_failed();
 
-  HostDBInfo &
-  operator=(HostDBInfo const &src)
-  {
-    if (this != &src) {
-      int iob_idx = _iobuffer_index;
-      memcpy(static_cast<void *>(this), static_cast<const void *>(&src), 
sizeof(*this));
-      _iobuffer_index = iob_idx;
-    }
-    return *this;
-  }
+  /// @return The time point when the item expires.
+  ts_time expiry_time() const;
 
-  bool
-  from_alloc() const
-  {
-    return _iobuffer_index >= 0;
-  }
+  ts_seconds ip_interval() const;
 
-private:
-  // The value of this will be -1 for objects that are not created by the 
alloc() static member function.
-  int _iobuffer_index;
-};
+  ts_seconds ip_time_remaining() const;
+
+  bool is_ip_stale() const;
+
+  bool is_ip_timeout() const;
+
+  bool is_ip_fail_timeout() const;
+
+  void refresh_ip();
+
+  bool serve_stale_but_revalidate() const;
+
+  /// Deallocate @a this.
+  void free() override;
 
-struct HostDBRoundRobin {
-  /** Total number (to compute space used). */
-  short rrcount = 0;
+  /** The current round robin index.
+   *
+   * @return The current index.
+   *
+   * @note The internal index may be out of range due to concurrency 
constraints - this insures the
+   * returned valu is in range.
+   */
+  unsigned short rr_idx() const;
 
-  /** Number which have not failed a connect. */
-  short good = 0;
+  /** Offset from the current round robin index.
+   *
+   * @param delta Distance from the current index.
+   * @return The effective index.
+   */
+  unsigned short rr_idx(unsigned short delta) const;
 
-  unsigned short current    = 0;
-  ink_time_t timed_rr_ctime = 0;
+  /// The index of @a target in this record.
+  int index_of(HostDBInfo const *target) const;
 
-  // This is the equivalent of a variable length array, we can't use a VLA 
because
-  // HostDBInfo is a non-POD type-- so this is the best we can do.
-  HostDBInfo &
-  info(short n)
+  /** Allocation and initialize an instance from a serialized buffer.

Review comment:
       Allocation -> Allocate

##########
File path: iocore/hostdb/I_HostDBProcessor.h
##########
@@ -84,350 +89,539 @@ makeHostHash(const char *string)
 // Types
 //
 
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on 
context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any 
change is made,
- * the @c object_version for the cache must be updated by modifying @c 
HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
-  /// Generic storage. This is verified to be the size of the union.
-  struct application_data_allotment {
-    unsigned int application1;
-    unsigned int application2;
-  } allotment;
-
-  //////////////////////////////////////////////////////////
-  // http server attributes in the host database          //
-  //                                                      //
-  // http_version       - one of HTTPVersion              //
-  // last_failure       - UNIX time for the last time     //
-  //                      we tried the server & failed    //
-  // fail_count         - Number of times we tried and    //
-  //                       and failed to contact the host //
-  //////////////////////////////////////////////////////////
-  struct http_server_attr {
-    uint32_t last_failure;
-    HTTPVersion http_version;
-    uint8_t fail_count;
-    http_server_attr() : http_version() {}
-  } http_data;
-
-  struct application_data_rr {
-    unsigned int offset;
-  } rr;
-  HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
 
+/// Information for an SRV record.
 struct SRVInfo {
-  unsigned int srv_offset : 16;
+  unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
   unsigned int srv_weight : 16;
   unsigned int srv_priority : 16;
   unsigned int srv_port : 16;
   unsigned int key;
 };
 
-struct HostDBInfo : public RefCountObj {
-  /** Internal IP address data.
-      This is at least large enough to hold an IPv6 address.
-  */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+  UNSPEC, ///< No valid data.
+  ADDR,   ///< IP address.
+  SRV,    ///< SRV record.
+  HOST    ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);
 
-  static HostDBInfo *
-  alloc(int size = 0)
-  {
-    size += sizeof(HostDBInfo);
-    int iobuffer_index = iobuffer_size_to_index(size, hostdb_max_iobuf_index);
-    ink_release_assert(iobuffer_index >= 0);
-    void *ptr = ioBufAllocator[iobuffer_index].alloc_void();
-    memset(ptr, 0, size);
-    HostDBInfo *ret      = new (ptr) HostDBInfo();
-    ret->_iobuffer_index = iobuffer_index;
-    return ret;
-  }
+/** Information about a single target.
+ */
+struct HostDBInfo {
+  using self_type = HostDBInfo; ///< Self reference type.
 
-  void
-  free() override
-  {
-    ink_release_assert(from_alloc());
-    Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), 
this);
-    ioBufAllocator[_iobuffer_index].free_void((void *)(this));
-  }
+  /// Default constructor.
+  HostDBInfo() = default;
 
-  /// Effectively the @c object_version for cache data.
-  /// This is used to indicate incompatible changes in the binary layout of 
HostDB records.
-  /// It must be updated if any such change is made, even if it is 
functionally equivalent.
-  static ts::VersionNumber
-  version()
-  {
-    /// - 1.0 Initial version.
-    /// - 1.1 tweak HostDBApplicationInfo::http_data.
-    return ts::VersionNumber(1, 1);
-  }
+  HostDBInfo &operator=(HostDBInfo const &that);
 
-  static HostDBInfo *
-  unmarshall(char *buf, unsigned int size)
-  {
-    if (size < sizeof(HostDBInfo)) {
-      return nullptr;
-    }
-    HostDBInfo *ret = HostDBInfo::alloc(size - sizeof(HostDBInfo));
-    int buf_index   = ret->_iobuffer_index;
-    memcpy((void *)ret, buf, size);
-    // Reset the refcount back to 0, this is a bit ugly-- but I'm not sure we 
want to expose a method
-    // to mess with the refcount, since this is a fairly unique use case
-    ret                  = new (ret) HostDBInfo();
-    ret->_iobuffer_index = buf_index;
-    return ret;
-  }
+  /// Absolute time of when this target failed.
+  /// A value of zero (@c TS_TIME_ZERO ) indicates no failure.
+  ts_time last_fail_time() const;
 
-  // return expiry time (in seconds since epoch)
-  ink_time_t
-  expiry_time() const
-  {
-    return ip_timestamp + ip_timeout_interval + 
hostdb_serve_stale_but_revalidate;
-  }
+  /// Target is alive - no known failure.
+  bool is_alive();
 
-  sockaddr *
-  ip()
-  {
-    return &data.ip.sa;
-  }
+  /// Target has failed and is still in the blocked time window.
+  bool is_dead(ts_time now, ts_seconds fail_window);
 
-  sockaddr const *
-  ip() const
-  {
-    return &data.ip.sa;
-  }
+  /** Select this target.
+   *
+   * @param now Current time.
+   * @param fail_window Failure window.
+   * @return Status of the selection.
+   *
+   * If a zombie is selected the failure time is updated to make it look dead 
to other threads in a thread safe
+   * manner. The caller should check @c last_fail_time to see if a zombie was 
selected.
+   */
+  bool select(ts_time now, ts_seconds fail_window);
 
-  char *hostname() const;
-  char *perm_hostname() const;
-  char *srvname(HostDBRoundRobin *rr) const;
+  /// Check if this info is valid.
+  bool is_valid() const;
 
-  /// Check if this entry is an element of a round robin entry.
-  /// If @c true then this entry is part of and was obtained from a round 
robin root. This is useful if the
-  /// address doesn't work - a retry can probably get a new address by doing 
another lookup and resolving to
-  /// a different element of the round robin.
-  bool
-  is_rr_elt() const
-  {
-    return 0 != round_robin_elt;
-  }
+  /// Mark this info as invalid.
+  void invalidate();
 
-  HostDBRoundRobin *rr();
+  /** Mark the entry as down.
+   *
+   * @param now Time of the failure.
+   * @return @c true if @a this was marked down, @c false if not.
+   *
+   * This can return @c false if the entry is already marked down, in which 
case the failure time is not updated.
+   */
+  bool mark_down(ts_time now);
 
-  unsigned int
-  ip_interval() const
-  {
-    return (hostdb_current_interval - ip_timestamp) & 0x7FFFFFFF;
-  }
+  /** Mark the target as up / alive.
+   *
+   * @return Previous alive state of the target.
+   */
+  bool mark_up();
 
-  int
-  ip_time_remaining() const
-  {
-    return static_cast<int>(ip_timeout_interval) - 
static_cast<int>(this->ip_interval());
-  }
+  char const *srvname() const;
 
-  bool
-  is_ip_stale() const
-  {
-    return ip_timeout_interval >= 2 * hostdb_ip_stale_interval && 
ip_interval() >= hostdb_ip_stale_interval;
-  }
+  /** Migrate data after a DNS update.
+   *
+   * @param that Source item.
+   *
+   * This moves only specific state information, it is not a generic copy.
+   */
+  void migrate_from(self_type const &that);
 
-  bool
-  is_ip_timeout() const
-  {
-    return ip_interval() >= ip_timeout_interval;
-  }
+  /// A target is either an IP address or an SRV record.
+  /// The type should be indicated by @c flags.f.is_srv;
+  union {
+    IpAddr ip;   ///< IP address / port data.
+    SRVInfo srv; ///< SRV record.
+  } data{IpAddr{}};
+
+  /// Data that migrates after updated DNS records are processed.
+  /// @see migrate_from
+  /// @{
+  /// Last time a failure was recorded.
+  std::atomic<ts_time> last_failure{TS_TIME_ZERO};
+  /// Count of connection failures
+  std::atomic<uint8_t> fail_count{0};
+  /// Expected HTTP version of the target based on earlier transactions.
+  HTTPVersion http_version = HTTP_INVALID;
+  /// @}
+
+  self_type &assign(IpAddr const &addr);
+
+protected:
+  self_type &assign(sa_family_t af, void const *addr);
+  self_type &assign(SRV const *srv, char const *name);
+
+  HostDBType type = HostDBType::UNSPEC; ///< Invalid data.
+
+  friend HostDBContinuation;
+};
 
-  bool
-  is_ip_fail_timeout() const
-  {
-    return ip_interval() >= hostdb_ip_fail_timeout_interval;
+inline HostDBInfo &
+HostDBInfo::operator=(HostDBInfo const &that)
+{
+  if (this != &that) {
+    memcpy(static_cast<void *>(this), static_cast<const void *>(&that), 
sizeof(*this));
   }
+  return *this;
+}
 
-  void
-  refresh_ip()
-  {
-    ip_timestamp = hostdb_current_interval;
-  }
+inline ts_time
+HostDBInfo::last_fail_time() const
+{
+  return last_failure;
+}
 
-  bool
-  serve_stale_but_revalidate() const
-  {
-    // the option is disabled
-    if (hostdb_serve_stale_but_revalidate <= 0) {
-      return false;
-    }
+inline bool
+HostDBInfo::is_alive()
+{
+  return this->last_fail_time() == TS_TIME_ZERO;
+}
 
-    // ip_timeout_interval == DNS TTL
-    // hostdb_serve_stale_but_revalidate == number of seconds
-    // ip_interval() is the number of seconds between now() and when the entry 
was inserted
-    if ((ip_timeout_interval + hostdb_serve_stale_but_revalidate) > 
ip_interval()) {
-      Debug("hostdb", "serving stale entry %d | %d | %d as requested by 
config", ip_timeout_interval,
-            hostdb_serve_stale_but_revalidate, ip_interval());
-      return true;
-    }
+inline bool
+HostDBInfo::is_dead(ts_time now, ts_seconds fail_window)
+{
+  auto last_fail = this->last_fail_time();
+  return (last_fail != TS_TIME_ZERO) && (last_fail + fail_window < now);
+}
+
+inline bool
+HostDBInfo::mark_up()
+{
+  auto t = last_failure.exchange(TS_TIME_ZERO);
+  return t != TS_TIME_ZERO;
+}
+
+inline bool
+HostDBInfo::mark_down(ts_time now)
+{
+  auto t0{TS_TIME_ZERO};
+  return last_failure.compare_exchange_strong(t0, now);
+}
 
-    // otherwise, the entry is too old
-    return false;
+inline bool
+HostDBInfo::select(ts_time now, ts_seconds fail_window)
+{
+  auto t0 = this->last_fail_time();
+  if (t0 == TS_TIME_ZERO) {
+    return true; // it's alive and so is valid for selection.
   }
+  // Success means this is a zombie and this thread updated the failure time.
+  return (t0 + fail_window < now) && last_failure.compare_exchange_strong(t0, 
now);
+}
+
+inline void
+HostDBInfo::migrate_from(HostDBInfo::self_type const &that)
+{
+  this->last_failure = that.last_failure.load();
+  this->http_version = that.http_version;
+}
 
-  /*
-   * Given the current time `now` and the fail_window, determine if this real 
is alive
+inline bool
+HostDBInfo::is_valid() const
+{
+  return type != HostDBType::UNSPEC;
+}
+
+inline void
+HostDBInfo::invalidate()
+{
+  type = HostDBType::UNSPEC;
+}
+
+// ----
+/** Root item for HostDB.
+ * This is the container for HostDB data. It is always an array of @c 
HostDBInfo instances plus metadata.
+ * All strings are C-strings and therefore don't need a distinct size.
+ *
+ */
+class HostDBRecord : public RefCountObj
+{
+  friend struct HostDBContinuation;
+  friend struct ShowHostDB;
+  using self_type = HostDBRecord;
+
+  /// Size of the IO buffer block owned by @a this.
+  /// If negative @a this is in not allocated memory.
+  int _iobuffer_index{-1};
+  /// Actual size of the data.
+  unsigned _record_size = sizeof(self_type);
+
+public:
+  HostDBRecord()                      = default;
+  HostDBRecord(self_type const &that) = delete;
+
+  using Handle = Ptr<HostDBRecord>; ///< Shared pointer type to hold an 
instance.
+
+  /** Allocate an instance from the IOBuffers.
+   *
+   * @param query_name Name of the query for the record.
+   * @param rr_count Number of info instances.
+   * @param srv_name_size Storage for SRV names, if any.
+   * @return An instance sufficient to hold the specified data.
+   *
+   * The query name will stored and initialized, and the info instances 
initialized.
    */
-  bool
-  is_alive(ink_time_t now, int32_t fail_window)
-  {
-    unsigned int last_failure = app.http_data.last_failure;
-
-    if (last_failure == 0 || (unsigned int)(now - fail_window) > last_failure) 
{
-      return true;
-    } else {
-      // Entry is marked down.  Make sure some nasty clock skew
-      //  did not occur.  Use the retry time to set an upper bound
-      //  as to how far in the future we should tolerate bogus last
-      //  failure times.  This sets the upper bound that we would ever
-      //  consider a server down to 2*down_server_timeout
-      if ((unsigned int)(now + fail_window) < last_failure) {
-        app.http_data.last_failure = 0;
-        return false;
-      }
-      return false;
-    }
-  }
+  static self_type *alloc(ts::TextView query_name, unsigned rr_count, size_t 
srv_name_size = 0);
 
-  bool
-  is_failed() const
-  {
-    return !((is_srv && data.srv.srv_offset) || (reverse_dns && 
data.hostname_offset) || ats_is_ip(ip()));
-  }
+  /// Type of data stored in this record.
+  HostDBType record_type = HostDBType::UNSPEC;
 
-  void
-  set_failed()
-  {
-    if (is_srv) {
-      data.srv.srv_offset = 0;
-    } else if (reverse_dns) {
-      data.hostname_offset = 0;
-    } else {
-      ats_ip_invalidate(ip());
-    }
-  }
+  /// IP family of this record.
+  sa_family_t af_family = AF_UNSPEC;
+
+  /// Offset from @a this to the VLA.
+  unsigned short rr_offset = 0;
+
+  /// Number of @c HostDBInfo instances.
+  unsigned short rr_count = 0;
+
+  /// Timing data for switch records in the RR.
+  std::atomic<ts_time> rr_ctime{TS_TIME_ZERO};
 
+  /// Hash key.
   uint64_t key;
 
-  // Application specific data. NOTE: We need an integral number of
-  // these per block. This structure is 32 bytes. (at 200k hosts =
-  // 8 Meg). Which gives us 7 bytes of application information.
-  HostDBApplicationInfo app;
+  /// When the data was received.
+  ts_time ip_timestamp;
 
-  union {
-    IpEndpoint ip;                ///< IP address / port data.
-    unsigned int hostname_offset; ///< Some hostname thing.
-    SRVInfo srv;
-  } data;
+  /// Valid duration of the data.
+  ts_seconds ip_timeout_interval;
 
-  unsigned int hostname_offset; // always maintain a permanent copy of the 
hostname for non-rev dns records.
+  /** Atomically advance the round robin index.
+   *
+   * If multiple threads call this simultaneously each thread will get a 
distinct return value.
+   *
+   * @return The new round robin index.
+   */
+  unsigned next_rr();
+
+  /** Pick the next round robin and update the record atomically.
+   *
+   * @note This may select a zombie server and reserve it for the caller, 
therefore the caller must
+   * attempt to connect to the selected target if possible.
+   *
+   * @param now Current time to use for aliveness calculations.
+   * @param fail_window Blackout time for dead servers.
+   * @return Status of the updated target.
+   *
+   * If the return value is @c HostDBInfo::Status::DEAD this means all targets 
are dead and there is
+   * no valid upstream.
+   *
+   * @note Concurrency - this is not done under lock and depends on the caller 
for correct use.
+   * For strict round robin, it is a feature that every call will get a 
distinct index. For
+   * timed round robin, the caller must arrange to have only one thread call 
this per time interval.
+   */
+  HostDBInfo *select_next_rr(ts_time now, ts_seconds fail_window);
 
-  unsigned int ip_timestamp;
+  /// Check if this record is of SRV targets.
+  bool is_srv() const;
 
-  unsigned int ip_timeout_interval; // bounded between 1 and HOST_DB_MAX_TTL 
(0x1FFFFF, 24 days)
+  /** Query name for the record.
+   * @return A C-string.
+   * If this is a @c HOST record, this is the resolved named and the query was 
based on the IP address.
+   * Otherwise this is the name used in the DNS query.
+   */
+  char const *name() const;
+
+  /** Query name for the record.
+   * @return A view.
+   * If this is a @c HOST record, this is the resolved named and the query was 
based on the IP address.
+   * Otherwise this is the name used in the DNS query.
+   * @note Although not included in the view, the name is always nul 
terminated and the string can
+   * be used as a C-string.
+   */
+  ts::TextView name_view() const;
 
-  unsigned int is_srv : 1;
-  unsigned int reverse_dns : 1;
+  /// Get the array of info instances.
+  ts::MemSpan<HostDBInfo> rr_info();
 
-  unsigned int round_robin : 1;     // This is the root of a round robin block
-  unsigned int round_robin_elt : 1; // This is an address in a round robin 
block
+  /** Find a host record by IP address.
+   *
+   * @param addr Address key.
+   * @return A pointer to the info instance if a match is found, @c nullptr if 
not.
+   */
+  HostDBInfo *find(sockaddr const *addr);
+
+  /** Select an upstream target.
+   *
+   * @param now Current time.
+   * @param fail_window Dead server blackout time.
+   * @param hash_addr Inbound remote IP address.
+   * @return A selected target, or @c nullptr if there are no valid targets.
+   *
+   * This accounts for the round robin setting. The default is to use "client 
affinity" in
+   * which case @a hash_addr is as a hash seed to select the target.
+   *
+   * This may select a zombie target, which can be detected by checking the 
target's last
+   * failure time. If it is not @c TS_TIME_ZERO the target is a zombie. Other 
transactions will
+   * be blocked from selecting that target until @a fail_window time has 
passed.
+   *
+   * In cases other than strict round robin, a base target is selected. If 
valid, that is returned,
+   * but if not then the targets in this record are searched until a valid one 
is found. The result
+   * is this can be called to select a target for failover when a previous 
target fails.
+   */
+  HostDBInfo *select_best_http(ts_time now, ts_seconds fail_window, sockaddr 
const *hash_addr);
+  HostDBInfo *select_best_srv(char *target, InkRand *rand, ts_time now, 
ts_seconds fail_window);
 
-  HostDBInfo() : _iobuffer_index{-1} {}
+  bool is_failed() const;
 
-  HostDBInfo(HostDBInfo const &src) : RefCountObj()
-  {
-    memcpy(static_cast<void *>(this), static_cast<const void *>(&src), 
sizeof(*this));
-    _iobuffer_index = -1;
-  }
+  void set_failed();
 
-  HostDBInfo &
-  operator=(HostDBInfo const &src)
-  {
-    if (this != &src) {
-      int iob_idx = _iobuffer_index;
-      memcpy(static_cast<void *>(this), static_cast<const void *>(&src), 
sizeof(*this));
-      _iobuffer_index = iob_idx;
-    }
-    return *this;
-  }
+  /// @return The time point when the item expires.
+  ts_time expiry_time() const;
 
-  bool
-  from_alloc() const
-  {
-    return _iobuffer_index >= 0;
-  }
+  ts_seconds ip_interval() const;
 
-private:
-  // The value of this will be -1 for objects that are not created by the 
alloc() static member function.
-  int _iobuffer_index;
-};
+  ts_seconds ip_time_remaining() const;
+
+  bool is_ip_stale() const;
+
+  bool is_ip_timeout() const;
+
+  bool is_ip_fail_timeout() const;
+
+  void refresh_ip();

Review comment:
       Probably good to document what this function does.

##########
File path: doc/developer-guide/core-architecture/hostdb.en.rst
##########
@@ -0,0 +1,191 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+.. include:: ../../common.defs
+
+.. highlight:: cpp
+.. default-domain:: cpp
+
+.. _developer-doc-hostdb:
+
+HostDB
+******
+
+HostDB is a cache of DNS results. It is used to increase performance by 
aggregating address
+resolution across transactions. HostDB also stores state information for 
specific IP addresses.
+
+Operation
+=========
+
+The primary operation for HostDB is to resolve a fully qualified domain name 
("FQDN"). As noted each
+FQDN is associated with a single record. Each record has an array of items. 
When a resolution
+request is made the database is checked to see if the record is already 
present. If so, it is
+served. Otherwise a DNS request is made. When the nameserver replies a record 
is created, added
+to the database, and then returned to the requestor.
+
+Each info tracks several status values for its corresponding upstream. These 
are
+
+*  HTTP version
+*  Last failure time
+

Review comment:
       The HostDBInfo also has the IP address or SRV name, depending upon its 
type. That maybe should be mentioned here too.

##########
File path: iocore/hostdb/I_HostDBProcessor.h
##########
@@ -84,350 +89,539 @@ makeHostHash(const char *string)
 // Types
 //
 
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on 
context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any 
change is made,
- * the @c object_version for the cache must be updated by modifying @c 
HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
-  /// Generic storage. This is verified to be the size of the union.
-  struct application_data_allotment {
-    unsigned int application1;
-    unsigned int application2;
-  } allotment;
-
-  //////////////////////////////////////////////////////////
-  // http server attributes in the host database          //
-  //                                                      //
-  // http_version       - one of HTTPVersion              //
-  // last_failure       - UNIX time for the last time     //
-  //                      we tried the server & failed    //
-  // fail_count         - Number of times we tried and    //
-  //                       and failed to contact the host //
-  //////////////////////////////////////////////////////////
-  struct http_server_attr {
-    uint32_t last_failure;
-    HTTPVersion http_version;
-    uint8_t fail_count;
-    http_server_attr() : http_version() {}
-  } http_data;
-
-  struct application_data_rr {
-    unsigned int offset;
-  } rr;
-  HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
 
+/// Information for an SRV record.
 struct SRVInfo {
-  unsigned int srv_offset : 16;
+  unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
   unsigned int srv_weight : 16;
   unsigned int srv_priority : 16;
   unsigned int srv_port : 16;
   unsigned int key;
 };
 
-struct HostDBInfo : public RefCountObj {
-  /** Internal IP address data.
-      This is at least large enough to hold an IPv6 address.
-  */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+  UNSPEC, ///< No valid data.
+  ADDR,   ///< IP address.
+  SRV,    ///< SRV record.
+  HOST    ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);
 
-  static HostDBInfo *
-  alloc(int size = 0)
-  {
-    size += sizeof(HostDBInfo);
-    int iobuffer_index = iobuffer_size_to_index(size, hostdb_max_iobuf_index);
-    ink_release_assert(iobuffer_index >= 0);
-    void *ptr = ioBufAllocator[iobuffer_index].alloc_void();
-    memset(ptr, 0, size);
-    HostDBInfo *ret      = new (ptr) HostDBInfo();
-    ret->_iobuffer_index = iobuffer_index;
-    return ret;
-  }
+/** Information about a single target.
+ */
+struct HostDBInfo {
+  using self_type = HostDBInfo; ///< Self reference type.
 
-  void
-  free() override
-  {
-    ink_release_assert(from_alloc());
-    Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), 
this);
-    ioBufAllocator[_iobuffer_index].free_void((void *)(this));
-  }
+  /// Default constructor.
+  HostDBInfo() = default;
 
-  /// Effectively the @c object_version for cache data.
-  /// This is used to indicate incompatible changes in the binary layout of 
HostDB records.
-  /// It must be updated if any such change is made, even if it is 
functionally equivalent.
-  static ts::VersionNumber
-  version()
-  {
-    /// - 1.0 Initial version.
-    /// - 1.1 tweak HostDBApplicationInfo::http_data.
-    return ts::VersionNumber(1, 1);
-  }
+  HostDBInfo &operator=(HostDBInfo const &that);
 
-  static HostDBInfo *
-  unmarshall(char *buf, unsigned int size)
-  {
-    if (size < sizeof(HostDBInfo)) {
-      return nullptr;
-    }
-    HostDBInfo *ret = HostDBInfo::alloc(size - sizeof(HostDBInfo));
-    int buf_index   = ret->_iobuffer_index;
-    memcpy((void *)ret, buf, size);
-    // Reset the refcount back to 0, this is a bit ugly-- but I'm not sure we 
want to expose a method
-    // to mess with the refcount, since this is a fairly unique use case
-    ret                  = new (ret) HostDBInfo();
-    ret->_iobuffer_index = buf_index;
-    return ret;
-  }
+  /// Absolute time of when this target failed.
+  /// A value of zero (@c TS_TIME_ZERO ) indicates no failure.
+  ts_time last_fail_time() const;
 
-  // return expiry time (in seconds since epoch)
-  ink_time_t
-  expiry_time() const
-  {
-    return ip_timestamp + ip_timeout_interval + 
hostdb_serve_stale_but_revalidate;
-  }
+  /// Target is alive - no known failure.
+  bool is_alive();
 
-  sockaddr *
-  ip()
-  {
-    return &data.ip.sa;
-  }
+  /// Target has failed and is still in the blocked time window.
+  bool is_dead(ts_time now, ts_seconds fail_window);
 
-  sockaddr const *
-  ip() const
-  {
-    return &data.ip.sa;
-  }
+  /** Select this target.
+   *
+   * @param now Current time.
+   * @param fail_window Failure window.
+   * @return Status of the selection.
+   *
+   * If a zombie is selected the failure time is updated to make it look dead 
to other threads in a thread safe
+   * manner. The caller should check @c last_fail_time to see if a zombie was 
selected.
+   */
+  bool select(ts_time now, ts_seconds fail_window);
 
-  char *hostname() const;
-  char *perm_hostname() const;
-  char *srvname(HostDBRoundRobin *rr) const;
+  /// Check if this info is valid.
+  bool is_valid() const;
 
-  /// Check if this entry is an element of a round robin entry.
-  /// If @c true then this entry is part of and was obtained from a round 
robin root. This is useful if the
-  /// address doesn't work - a retry can probably get a new address by doing 
another lookup and resolving to
-  /// a different element of the round robin.
-  bool
-  is_rr_elt() const
-  {
-    return 0 != round_robin_elt;
-  }
+  /// Mark this info as invalid.
+  void invalidate();
 
-  HostDBRoundRobin *rr();
+  /** Mark the entry as down.
+   *
+   * @param now Time of the failure.
+   * @return @c true if @a this was marked down, @c false if not.
+   *
+   * This can return @c false if the entry is already marked down, in which 
case the failure time is not updated.
+   */
+  bool mark_down(ts_time now);
 
-  unsigned int
-  ip_interval() const
-  {
-    return (hostdb_current_interval - ip_timestamp) & 0x7FFFFFFF;
-  }
+  /** Mark the target as up / alive.
+   *
+   * @return Previous alive state of the target.
+   */
+  bool mark_up();
 
-  int
-  ip_time_remaining() const
-  {
-    return static_cast<int>(ip_timeout_interval) - 
static_cast<int>(this->ip_interval());
-  }
+  char const *srvname() const;
 
-  bool
-  is_ip_stale() const
-  {
-    return ip_timeout_interval >= 2 * hostdb_ip_stale_interval && 
ip_interval() >= hostdb_ip_stale_interval;
-  }
+  /** Migrate data after a DNS update.
+   *
+   * @param that Source item.
+   *
+   * This moves only specific state information, it is not a generic copy.
+   */
+  void migrate_from(self_type const &that);
 
-  bool
-  is_ip_timeout() const
-  {
-    return ip_interval() >= ip_timeout_interval;
-  }
+  /// A target is either an IP address or an SRV record.
+  /// The type should be indicated by @c flags.f.is_srv;
+  union {
+    IpAddr ip;   ///< IP address / port data.
+    SRVInfo srv; ///< SRV record.
+  } data{IpAddr{}};
+
+  /// Data that migrates after updated DNS records are processed.
+  /// @see migrate_from
+  /// @{
+  /// Last time a failure was recorded.
+  std::atomic<ts_time> last_failure{TS_TIME_ZERO};
+  /// Count of connection failures
+  std::atomic<uint8_t> fail_count{0};
+  /// Expected HTTP version of the target based on earlier transactions.
+  HTTPVersion http_version = HTTP_INVALID;
+  /// @}
+
+  self_type &assign(IpAddr const &addr);
+
+protected:
+  self_type &assign(sa_family_t af, void const *addr);
+  self_type &assign(SRV const *srv, char const *name);
+
+  HostDBType type = HostDBType::UNSPEC; ///< Invalid data.
+
+  friend HostDBContinuation;
+};
 
-  bool
-  is_ip_fail_timeout() const
-  {
-    return ip_interval() >= hostdb_ip_fail_timeout_interval;
+inline HostDBInfo &
+HostDBInfo::operator=(HostDBInfo const &that)
+{
+  if (this != &that) {
+    memcpy(static_cast<void *>(this), static_cast<const void *>(&that), 
sizeof(*this));
   }
+  return *this;
+}
 
-  void
-  refresh_ip()
-  {
-    ip_timestamp = hostdb_current_interval;
-  }
+inline ts_time
+HostDBInfo::last_fail_time() const
+{
+  return last_failure;
+}
 
-  bool
-  serve_stale_but_revalidate() const
-  {
-    // the option is disabled
-    if (hostdb_serve_stale_but_revalidate <= 0) {
-      return false;
-    }
+inline bool
+HostDBInfo::is_alive()
+{
+  return this->last_fail_time() == TS_TIME_ZERO;
+}
 
-    // ip_timeout_interval == DNS TTL
-    // hostdb_serve_stale_but_revalidate == number of seconds
-    // ip_interval() is the number of seconds between now() and when the entry 
was inserted
-    if ((ip_timeout_interval + hostdb_serve_stale_but_revalidate) > 
ip_interval()) {
-      Debug("hostdb", "serving stale entry %d | %d | %d as requested by 
config", ip_timeout_interval,
-            hostdb_serve_stale_but_revalidate, ip_interval());
-      return true;
-    }
+inline bool
+HostDBInfo::is_dead(ts_time now, ts_seconds fail_window)
+{
+  auto last_fail = this->last_fail_time();
+  return (last_fail != TS_TIME_ZERO) && (last_fail + fail_window < now);
+}
+
+inline bool
+HostDBInfo::mark_up()
+{
+  auto t = last_failure.exchange(TS_TIME_ZERO);
+  return t != TS_TIME_ZERO;
+}
+
+inline bool
+HostDBInfo::mark_down(ts_time now)
+{
+  auto t0{TS_TIME_ZERO};
+  return last_failure.compare_exchange_strong(t0, now);
+}
 
-    // otherwise, the entry is too old
-    return false;
+inline bool
+HostDBInfo::select(ts_time now, ts_seconds fail_window)
+{
+  auto t0 = this->last_fail_time();
+  if (t0 == TS_TIME_ZERO) {
+    return true; // it's alive and so is valid for selection.
   }
+  // Success means this is a zombie and this thread updated the failure time.
+  return (t0 + fail_window < now) && last_failure.compare_exchange_strong(t0, 
now);
+}
+
+inline void
+HostDBInfo::migrate_from(HostDBInfo::self_type const &that)
+{
+  this->last_failure = that.last_failure.load();
+  this->http_version = that.http_version;
+}
 
-  /*
-   * Given the current time `now` and the fail_window, determine if this real 
is alive
+inline bool
+HostDBInfo::is_valid() const
+{
+  return type != HostDBType::UNSPEC;
+}
+
+inline void
+HostDBInfo::invalidate()
+{
+  type = HostDBType::UNSPEC;
+}
+
+// ----
+/** Root item for HostDB.
+ * This is the container for HostDB data. It is always an array of @c 
HostDBInfo instances plus metadata.
+ * All strings are C-strings and therefore don't need a distinct size.
+ *
+ */
+class HostDBRecord : public RefCountObj
+{
+  friend struct HostDBContinuation;
+  friend struct ShowHostDB;
+  using self_type = HostDBRecord;
+
+  /// Size of the IO buffer block owned by @a this.
+  /// If negative @a this is in not allocated memory.
+  int _iobuffer_index{-1};
+  /// Actual size of the data.
+  unsigned _record_size = sizeof(self_type);
+
+public:
+  HostDBRecord()                      = default;
+  HostDBRecord(self_type const &that) = delete;
+
+  using Handle = Ptr<HostDBRecord>; ///< Shared pointer type to hold an 
instance.
+
+  /** Allocate an instance from the IOBuffers.
+   *
+   * @param query_name Name of the query for the record.
+   * @param rr_count Number of info instances.
+   * @param srv_name_size Storage for SRV names, if any.
+   * @return An instance sufficient to hold the specified data.
+   *
+   * The query name will stored and initialized, and the info instances 
initialized.
    */
-  bool
-  is_alive(ink_time_t now, int32_t fail_window)
-  {
-    unsigned int last_failure = app.http_data.last_failure;
-
-    if (last_failure == 0 || (unsigned int)(now - fail_window) > last_failure) 
{
-      return true;
-    } else {
-      // Entry is marked down.  Make sure some nasty clock skew
-      //  did not occur.  Use the retry time to set an upper bound
-      //  as to how far in the future we should tolerate bogus last
-      //  failure times.  This sets the upper bound that we would ever
-      //  consider a server down to 2*down_server_timeout
-      if ((unsigned int)(now + fail_window) < last_failure) {
-        app.http_data.last_failure = 0;
-        return false;
-      }
-      return false;
-    }
-  }
+  static self_type *alloc(ts::TextView query_name, unsigned rr_count, size_t 
srv_name_size = 0);
 
-  bool
-  is_failed() const
-  {
-    return !((is_srv && data.srv.srv_offset) || (reverse_dns && 
data.hostname_offset) || ats_is_ip(ip()));
-  }
+  /// Type of data stored in this record.
+  HostDBType record_type = HostDBType::UNSPEC;
 
-  void
-  set_failed()
-  {
-    if (is_srv) {
-      data.srv.srv_offset = 0;
-    } else if (reverse_dns) {
-      data.hostname_offset = 0;
-    } else {
-      ats_ip_invalidate(ip());
-    }
-  }
+  /// IP family of this record.
+  sa_family_t af_family = AF_UNSPEC;
+
+  /// Offset from @a this to the VLA.
+  unsigned short rr_offset = 0;
+
+  /// Number of @c HostDBInfo instances.
+  unsigned short rr_count = 0;
+
+  /// Timing data for switch records in the RR.
+  std::atomic<ts_time> rr_ctime{TS_TIME_ZERO};
 
+  /// Hash key.
   uint64_t key;
 
-  // Application specific data. NOTE: We need an integral number of
-  // these per block. This structure is 32 bytes. (at 200k hosts =
-  // 8 Meg). Which gives us 7 bytes of application information.
-  HostDBApplicationInfo app;
+  /// When the data was received.
+  ts_time ip_timestamp;
 
-  union {
-    IpEndpoint ip;                ///< IP address / port data.
-    unsigned int hostname_offset; ///< Some hostname thing.
-    SRVInfo srv;
-  } data;
+  /// Valid duration of the data.
+  ts_seconds ip_timeout_interval;
 
-  unsigned int hostname_offset; // always maintain a permanent copy of the 
hostname for non-rev dns records.
+  /** Atomically advance the round robin index.
+   *
+   * If multiple threads call this simultaneously each thread will get a 
distinct return value.
+   *
+   * @return The new round robin index.
+   */
+  unsigned next_rr();
+
+  /** Pick the next round robin and update the record atomically.
+   *
+   * @note This may select a zombie server and reserve it for the caller, 
therefore the caller must
+   * attempt to connect to the selected target if possible.
+   *
+   * @param now Current time to use for aliveness calculations.
+   * @param fail_window Blackout time for dead servers.
+   * @return Status of the updated target.
+   *
+   * If the return value is @c HostDBInfo::Status::DEAD this means all targets 
are dead and there is
+   * no valid upstream.
+   *
+   * @note Concurrency - this is not done under lock and depends on the caller 
for correct use.
+   * For strict round robin, it is a feature that every call will get a 
distinct index. For
+   * timed round robin, the caller must arrange to have only one thread call 
this per time interval.
+   */
+  HostDBInfo *select_next_rr(ts_time now, ts_seconds fail_window);
 
-  unsigned int ip_timestamp;
+  /// Check if this record is of SRV targets.
+  bool is_srv() const;
 
-  unsigned int ip_timeout_interval; // bounded between 1 and HOST_DB_MAX_TTL 
(0x1FFFFF, 24 days)
+  /** Query name for the record.
+   * @return A C-string.
+   * If this is a @c HOST record, this is the resolved named and the query was 
based on the IP address.
+   * Otherwise this is the name used in the DNS query.
+   */
+  char const *name() const;
+
+  /** Query name for the record.
+   * @return A view.
+   * If this is a @c HOST record, this is the resolved named and the query was 
based on the IP address.
+   * Otherwise this is the name used in the DNS query.
+   * @note Although not included in the view, the name is always nul 
terminated and the string can
+   * be used as a C-string.
+   */
+  ts::TextView name_view() const;
 
-  unsigned int is_srv : 1;
-  unsigned int reverse_dns : 1;
+  /// Get the array of info instances.
+  ts::MemSpan<HostDBInfo> rr_info();
 
-  unsigned int round_robin : 1;     // This is the root of a round robin block
-  unsigned int round_robin_elt : 1; // This is an address in a round robin 
block
+  /** Find a host record by IP address.
+   *
+   * @param addr Address key.
+   * @return A pointer to the info instance if a match is found, @c nullptr if 
not.
+   */
+  HostDBInfo *find(sockaddr const *addr);
+
+  /** Select an upstream target.
+   *
+   * @param now Current time.
+   * @param fail_window Dead server blackout time.
+   * @param hash_addr Inbound remote IP address.
+   * @return A selected target, or @c nullptr if there are no valid targets.
+   *
+   * This accounts for the round robin setting. The default is to use "client 
affinity" in
+   * which case @a hash_addr is as a hash seed to select the target.
+   *
+   * This may select a zombie target, which can be detected by checking the 
target's last
+   * failure time. If it is not @c TS_TIME_ZERO the target is a zombie. Other 
transactions will
+   * be blocked from selecting that target until @a fail_window time has 
passed.
+   *
+   * In cases other than strict round robin, a base target is selected. If 
valid, that is returned,
+   * but if not then the targets in this record are searched until a valid one 
is found. The result
+   * is this can be called to select a target for failover when a previous 
target fails.
+   */
+  HostDBInfo *select_best_http(ts_time now, ts_seconds fail_window, sockaddr 
const *hash_addr);
+  HostDBInfo *select_best_srv(char *target, InkRand *rand, ts_time now, 
ts_seconds fail_window);
 
-  HostDBInfo() : _iobuffer_index{-1} {}
+  bool is_failed() const;
 
-  HostDBInfo(HostDBInfo const &src) : RefCountObj()
-  {
-    memcpy(static_cast<void *>(this), static_cast<const void *>(&src), 
sizeof(*this));
-    _iobuffer_index = -1;
-  }
+  void set_failed();
 
-  HostDBInfo &
-  operator=(HostDBInfo const &src)
-  {
-    if (this != &src) {
-      int iob_idx = _iobuffer_index;
-      memcpy(static_cast<void *>(this), static_cast<const void *>(&src), 
sizeof(*this));
-      _iobuffer_index = iob_idx;
-    }
-    return *this;
-  }
+  /// @return The time point when the item expires.
+  ts_time expiry_time() const;
 
-  bool
-  from_alloc() const
-  {
-    return _iobuffer_index >= 0;
-  }
+  ts_seconds ip_interval() const;
 
-private:
-  // The value of this will be -1 for objects that are not created by the 
alloc() static member function.
-  int _iobuffer_index;
-};
+  ts_seconds ip_time_remaining() const;
+
+  bool is_ip_stale() const;
+
+  bool is_ip_timeout() const;
+
+  bool is_ip_fail_timeout() const;
+
+  void refresh_ip();
+
+  bool serve_stale_but_revalidate() const;
+
+  /// Deallocate @a this.
+  void free() override;
 
-struct HostDBRoundRobin {
-  /** Total number (to compute space used). */
-  short rrcount = 0;
+  /** The current round robin index.
+   *
+   * @return The current index.
+   *
+   * @note The internal index may be out of range due to concurrency 
constraints - this insures the
+   * returned valu is in range.
+   */
+  unsigned short rr_idx() const;
 
-  /** Number which have not failed a connect. */
-  short good = 0;
+  /** Offset from the current round robin index.
+   *
+   * @param delta Distance from the current index.
+   * @return The effective index.
+   */
+  unsigned short rr_idx(unsigned short delta) const;
 
-  unsigned short current    = 0;
-  ink_time_t timed_rr_ctime = 0;
+  /// The index of @a target in this record.
+  int index_of(HostDBInfo const *target) const;

Review comment:
       Should this take a HostDBInfo reference instead of pointer?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to