This is an automated email from the ASF dual-hosted git repository.
eze pushed a commit to branch 7.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/7.1.x by this push:
new 1da4fbc This is a backport patch for ATS 7 that fixes issue #5642.
1da4fbc is described below
commit 1da4fbcd5fd32ff86888ed401e0c297b22e00c52
Author: John Rushford <[email protected]>
AuthorDate: Wed Aug 7 20:43:23 2019 +0000
This is a backport patch for ATS 7 that fixes issue #5642.
Origin_max_connections can create state machine loops
when using parent selection to select a parent
for redundancy and/or load balancing.
The parent selection handling in the state machine did not
handle the case when origin max connections is reached or
exceeded when using throttling. This led to state machine
looping and a crash when the stack is overflowed due to the
looping. Now this is handled by either trying a new uncongested
origin or sending a negative 503 response if all available
origins are congested.
---
proxy/http/HttpSM.cc | 5 +-
proxy/http/HttpTransact.cc | 220 +++++++++++++++++++++++----------------------
2 files changed, 117 insertions(+), 108 deletions(-)
diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc
index 962c5bb..a0e25d8 100644
--- a/proxy/http/HttpSM.cc
+++ b/proxy/http/HttpSM.cc
@@ -4720,8 +4720,9 @@ HttpSM::do_cache_prepare_action(HttpCacheSM *c_sm,
CacheHTTPInfo *object_read_in
void
HttpSM::send_origin_throttled_response()
{
- t_state.current.attempts = t_state.txn_conf->connect_attempts_max_retries;
- // t_state.current.state = HttpTransact::CONNECTION_ERROR;
+ if (t_state.current.request_to != HttpTransact::PARENT_PROXY) {
+ t_state.current.attempts = t_state.txn_conf->connect_attempts_max_retries;
+ }
t_state.current.state = HttpTransact::CONGEST_CONTROL_CONGESTED_ON_F;
call_transact_and_set_next_state(HttpTransact::HandleResponse);
}
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index 114d52c..073587d 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -260,8 +260,13 @@ find_server_and_update_current_info(HttpTransact::State *s)
// wanted it for all requests to local_host.
s->parent_result.result = PARENT_DIRECT;
} else if (s->method == HTTP_WKSIDX_CONNECT &&
s->http_config_param->disable_ssl_parenting) {
- s->parent_params->findParent(&s->request_data, &s->parent_result,
s->txn_conf->parent_fail_threshold,
- s->txn_conf->parent_retry_time);
+ if (s->parent_result.result == PARENT_SPECIFIED) {
+ s->parent_params->nextParent(&s->request_data, &s->parent_result,
s->txn_conf->parent_fail_threshold,
+ s->txn_conf->parent_retry_time);
+ } else {
+ s->parent_params->findParent(&s->request_data, &s->parent_result,
s->txn_conf->parent_fail_threshold,
+ s->txn_conf->parent_retry_time);
+ }
if (!s->parent_result.is_some() || s->parent_result.is_api_result() ||
s->parent_result.parent_is_proxy()) {
DebugTxn("http_trans", "request not cacheable, so bypass parent");
s->parent_result.result = PARENT_DIRECT;
@@ -274,8 +279,13 @@ find_server_and_update_current_info(HttpTransact::State *s)
// we are assuming both child and parent have similar configuration
// with respect to whether a request is cacheable or not.
// For example, the cache_urls_that_look_dynamic variable.
- s->parent_params->findParent(&s->request_data, &s->parent_result,
s->txn_conf->parent_fail_threshold,
- s->txn_conf->parent_retry_time);
+ if (s->parent_result.result == PARENT_SPECIFIED) {
+ s->parent_params->nextParent(&s->request_data, &s->parent_result,
s->txn_conf->parent_fail_threshold,
+ s->txn_conf->parent_retry_time);
+ } else {
+ s->parent_params->findParent(&s->request_data, &s->parent_result,
s->txn_conf->parent_fail_threshold,
+ s->txn_conf->parent_retry_time);
+ }
if (!s->parent_result.is_some() || s->parent_result.is_api_result() ||
s->parent_result.parent_is_proxy()) {
DebugTxn("http_trans", "request not cacheable, so bypass parent");
s->parent_result.result = PARENT_DIRECT;
@@ -3600,6 +3610,7 @@
HttpTransact::handle_response_from_icp_suggested_host(State *s)
void
HttpTransact::handle_response_from_parent(State *s)
{
+ LookingUp_t next_lookup = UNDEFINED_LOOKUP;
DebugTxn("http_trans", "[handle_response_from_parent] (hrfp)");
HTTP_RELEASE_ASSERT(s->current.server == &s->parent_info);
@@ -3623,122 +3634,116 @@ HttpTransact::handle_response_from_parent(State *s)
}
handle_forward_server_connection_open(s);
break;
- default: {
- LookingUp_t next_lookup = UNDEFINED_LOOKUP;
-
- if (s->current.state == PARENT_ORIGIN_RETRY) {
- if (s->current.retry_type == PARENT_RETRY_SIMPLE) {
- if (s->current.simple_retry_attempts >=
s->parent_result.max_retries(PARENT_RETRY_SIMPLE)) {
- DebugTxn("http_trans", "PARENT_RETRY_SIMPLE: retried all parents,
send error to client.");
- s->current.retry_type = PARENT_RETRY_NONE;
- } else {
- s->current.simple_retry_attempts++;
- DebugTxn("http_trans", "PARENT_RETRY_SIMPLE: try another parent.");
- s->current.retry_type = PARENT_RETRY_NONE;
- next_lookup = find_server_and_update_current_info(s);
- }
- } else if (s->current.retry_type == PARENT_RETRY_UNAVAILABLE_SERVER) {
- if (s->current.unavailable_server_retry_attempts >=
s->parent_result.max_retries(PARENT_RETRY_UNAVAILABLE_SERVER)) {
- DebugTxn("http_trans", "PARENT_RETRY_UNAVAILABLE_SERVER: retried all
parents, send error to client.");
- s->current.retry_type = PARENT_RETRY_NONE;
- } else {
- s->current.unavailable_server_retry_attempts++;
- DebugTxn("http_trans", "PARENT_RETRY_UNAVAILABLE_SERVER: marking
parent down and trying another.");
- s->current.retry_type = PARENT_RETRY_NONE;
- HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count);
- s->parent_params->markParentDown(&s->parent_result,
s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time);
- next_lookup = find_server_and_update_current_info(s);
- }
+ case PARENT_ORIGIN_RETRY:
+ if (s->current.retry_type == PARENT_RETRY_SIMPLE) {
+ if (s->current.simple_retry_attempts >=
s->parent_result.max_retries(PARENT_RETRY_SIMPLE)) {
+ DebugTxn("http_trans", "PARENT_RETRY_SIMPLE: retried all parents, send
error to client.");
+ s->current.retry_type = PARENT_RETRY_NONE;
+ } else {
+ s->current.simple_retry_attempts++;
+ DebugTxn("http_trans", "PARENT_RETRY_SIMPLE: try another parent.");
+ s->current.retry_type = PARENT_RETRY_NONE;
+ next_lookup = find_server_and_update_current_info(s);
}
- } else {
- DebugTxn("http_trans", "[hrfp] connection not alive");
- SET_VIA_STRING(VIA_DETAIL_PP_CONNECT, VIA_DETAIL_PP_FAILURE);
+ } else if (s->current.retry_type == PARENT_RETRY_UNAVAILABLE_SERVER) {
+ if (s->current.unavailable_server_retry_attempts >=
s->parent_result.max_retries(PARENT_RETRY_UNAVAILABLE_SERVER)) {
+ DebugTxn("http_trans", "PARENT_RETRY_UNAVAILABLE_SERVER: retried all
parents, send error to client.");
+ s->current.retry_type = PARENT_RETRY_NONE;
+ } else {
+ s->current.unavailable_server_retry_attempts++;
+ DebugTxn("http_trans", "PARENT_RETRY_UNAVAILABLE_SERVER: marking
parent down and trying another.");
+ s->current.retry_type = PARENT_RETRY_NONE;
+ HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count);
+ s->parent_params->markParentDown(&s->parent_result,
s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time);
+ next_lookup = find_server_and_update_current_info(s);
+ }
+ }
+ break;
+ default:
+ DebugTxn("http_trans", "[hrfp] connection not alive");
+ SET_VIA_STRING(VIA_DETAIL_PP_CONNECT, VIA_DETAIL_PP_FAILURE);
- ink_assert(s->hdr_info.server_request.valid());
+ ink_assert(s->hdr_info.server_request.valid());
- s->current.server->connect_result = ENOTCONN;
- // only mark the parent down in hostdb if the configuration allows it,
- // see proxy.config.http.parent_proxy.mark_down_hostdb in records.config.
- if (s->txn_conf->parent_failures_update_hostdb) {
- s->state_machine->do_hostdb_update_if_necessary();
- }
+ s->current.server->connect_result = ENOTCONN;
+ // only mark the parent down in hostdb if the configuration allows it,
+ // see proxy.config.http.parent_proxy.mark_down_hostdb in records.config.
+ if (s->txn_conf->parent_failures_update_hostdb && s->current.state !=
CONGEST_CONTROL_CONGESTED_ON_F) {
+ s->state_machine->do_hostdb_update_if_necessary();
+ }
- char addrbuf[INET6_ADDRSTRLEN];
- DebugTxn("http_trans", "[%d] failed to connect to parent %s",
s->current.attempts,
- ats_ip_ntop(&s->current.server->dst_addr.sa, addrbuf,
sizeof(addrbuf)));
+ char addrbuf[INET6_ADDRSTRLEN];
+ DebugTxn("http_trans", "[%d] failed to connect to parent %s",
s->current.attempts,
+ ats_ip_ntop(&s->current.server->dst_addr.sa, addrbuf,
sizeof(addrbuf)));
+
+ // If the request is not retryable, just give up!
+ if (!is_request_retryable(s) && s->current.state !=
CONGEST_CONTROL_CONGESTED_ON_F) {
+ HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count);
+ s->parent_params->markParentDown(&s->parent_result,
s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time);
+ s->parent_result.result = PARENT_FAIL;
+ handle_parent_died(s);
+ return;
+ }
- // If the request is not retryable, just give up!
- if (!is_request_retryable(s)) {
- HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count);
- s->parent_params->markParentDown(&s->parent_result,
s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time);
- s->parent_result.result = PARENT_FAIL;
- handle_parent_died(s);
- return;
- }
+ if (s->current.attempts < s->txn_conf->parent_connect_attempts) {
+ HTTP_INCREMENT_DYN_STAT(http_total_parent_retries_stat);
+ s->current.attempts++;
- if (s->current.attempts < s->txn_conf->parent_connect_attempts) {
- HTTP_INCREMENT_DYN_STAT(http_total_parent_retries_stat);
- s->current.attempts++;
-
- // Are we done with this particular parent?
- if ((s->current.attempts - 1) %
s->txn_conf->per_parent_connect_attempts != 0) {
- // No we are not done with this parent so retry
- HTTP_INCREMENT_DYN_STAT(http_total_parent_switches_stat);
- s->next_action = how_to_open_connection(s);
- DebugTxn("http_trans", "%s Retrying parent for attempt %d, max %"
PRId64, "[handle_response_from_parent]",
- s->current.attempts,
s->txn_conf->per_parent_connect_attempts);
- return;
- } else {
- DebugTxn("http_trans", "%s %d per parent attempts exhausted",
"[handle_response_from_parent]", s->current.attempts);
- HTTP_INCREMENT_DYN_STAT(http_total_parent_retries_exhausted_stat);
-
- // Only mark the parent down if we failed to connect
- // to the parent otherwise slow origin servers cause
- // us to mark the parent down
- if (s->current.state == CONNECTION_ERROR) {
- HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count);
- s->parent_params->markParentDown(&s->parent_result,
s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time);
- }
- // We are done so look for another parent if any
- next_lookup = find_server_and_update_current_info(s);
- }
+ // Are we done with this particular parent?
+ if ((s->current.attempts - 1) % s->txn_conf->per_parent_connect_attempts
!= 0) {
+ // No we are not done with this parent so retry
+ HTTP_INCREMENT_DYN_STAT(http_total_parent_switches_stat);
+ s->next_action = how_to_open_connection(s);
+ DebugTxn("http_trans", "%s Retrying parent for attempt %d, max %"
PRId64, "[handle_response_from_parent]",
+ s->current.attempts,
s->txn_conf->per_parent_connect_attempts);
+ return;
} else {
- // Done trying parents... fail over to origin server if that is
- // appropriate
+ DebugTxn("http_trans", "%s %d per parent attempts exhausted",
"[handle_response_from_parent]", s->current.attempts);
HTTP_INCREMENT_DYN_STAT(http_total_parent_retries_exhausted_stat);
- DebugTxn("http_trans", "[handle_response_from_parent] Error. No more
retries.");
+
+ // Only mark the parent down if we failed to connect
+ // to the parent otherwise slow origin servers cause
+ // us to mark the parent down
if (s->current.state == CONNECTION_ERROR) {
HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count);
s->parent_params->markParentDown(&s->parent_result,
s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time);
}
- s->parent_result.result = PARENT_FAIL;
- next_lookup = find_server_and_update_current_info(s);
+ // We are done so look for another parent if any
+ next_lookup = find_server_and_update_current_info(s);
}
+ } else {
+ // Done trying parents... fail over to origin server if that is
+ // appropriate
+ HTTP_INCREMENT_DYN_STAT(http_total_parent_retries_exhausted_stat);
+ DebugTxn("http_trans", "[handle_response_from_parent] Error. No more
retries.");
+ if (s->current.state == CONNECTION_ERROR) {
+ HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count);
+ s->parent_params->markParentDown(&s->parent_result,
s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time);
+ }
+ s->parent_result.result = PARENT_FAIL;
+ next_lookup = HOST_NONE;
}
+ }
- // We have either tried to find a new parent or failed over to the
- // origin server
- switch (next_lookup) {
- case PARENT_PROXY:
- ink_assert(s->current.request_to == PARENT_PROXY);
- TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup);
- break;
- case ORIGIN_SERVER:
- // Next lookup is Origin Server, try DNS for Origin Server
- TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup);
- break;
- case HOST_NONE:
- handle_parent_died(s);
- break;
- default:
- // This handles:
- // UNDEFINED_LOOKUP, ICP_SUGGESTED_HOST,
- // INCOMING_ROUTER
- break;
- }
-
+ // We have either tried to find a new parent or failed over to the
+ // origin server
+ switch (next_lookup) {
+ case PARENT_PROXY:
+ ink_assert(s->current.request_to == PARENT_PROXY);
+ TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup);
+ break;
+ case ORIGIN_SERVER:
+ // Next lookup is Origin Server, try DNS for Origin Server
+ TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup);
+ break;
+ case HOST_NONE:
+ handle_parent_died(s);
+ break;
+ default:
+ // This handles:
+ // UNDEFINED_LOOKUP, ICP_SUGGESTED_HOST,
+ // INCOMING_ROUTER
break;
- }
}
}
@@ -7705,8 +7710,11 @@ void
HttpTransact::handle_parent_died(State *s)
{
ink_assert(s->parent_result.result == PARENT_FAIL);
-
- build_error_response(s, HTTP_STATUS_BAD_GATEWAY, "Next Hop Connection
Failed", "connect#failed_connect", nullptr);
+ if (s->current.state == CONGEST_CONTROL_CONGESTED_ON_F) {
+ build_error_response(s, HTTP_STATUS_SERVICE_UNAVAILABLE, "Next Hop
Congested ", "congestion#retryAfter", nullptr);
+ } else {
+ build_error_response(s, HTTP_STATUS_BAD_GATEWAY, "Next Hop Connection
Failed", "connect#failed_connect", nullptr);
+ }
TRANSACT_RETURN(SM_ACTION_SEND_ERROR_CACHE_NOOP, nullptr);
}