Updated Branches: refs/heads/master 14c9cf2c8 -> f63b27d52
TS-1424: Fix KeepAlive 4-tuple collision for transparent proxy Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/f63b27d5 Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/f63b27d5 Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/f63b27d5 Branch: refs/heads/master Commit: f63b27d5220c95cd95764fb0ca0cb4e648b2f055 Parents: 14c9cf2 Author: Alan M. Carroll <a...@network-geographics.com> Authored: Fri Aug 23 10:44:08 2013 -0500 Committer: Alan M. Carroll <a...@network-geographics.com> Committed: Fri Aug 23 22:35:51 2013 -0500 ---------------------------------------------------------------------- proxy/http/HttpSM.cc | 43 +++++++++++++++++++++++++++++++++-------- proxy/http/HttpTransact.cc | 33 +++++++++++++++++-------------- proxy/http/HttpTransact.h | 20 ++++++++++++------- 3 files changed, 66 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f63b27d5/proxy/http/HttpSM.cc ---------------------------------------------------------------------- diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc index 92bd92c..ab120db 100644 --- a/proxy/http/HttpSM.cc +++ b/proxy/http/HttpSM.cc @@ -1712,7 +1712,34 @@ HttpSM::state_http_server_open(int event, void *data) case VC_EVENT_ERROR: case NET_EVENT_OPEN_FAILED: t_state.current.state = HttpTransact::CONNECTION_ERROR; - call_transact_and_set_next_state(HttpTransact::HandleResponse); + // save the errno from the connect fail for future use (passed as negative value, flip back) + t_state.current.server->set_connect_fail(event == NET_EVENT_OPEN_FAILED ? -reinterpret_cast<intptr_t>(data) : EREMOTEIO); + + /* If we get this error, then we simply can't bind to the 4-tuple to make the connection. There's no hope of + retries succeeding in the near future. The best option is to just shut down the connection without further + comment. The only known cause for this is outbound transparency combined with use client target address / source + port, as noted in TS-1424. If the keep alives desync the current connection can be attempting to rebind the 4 + tuple simultaneously with the shut down of an existing connection. Dropping the client side will cause it to pick + a new source port and recover from this issue. + */ + if (EADDRNOTAVAIL == t_state.current.server->connect_result) { + if (is_debug_tag_set("http_tproxy")) { + ip_port_text_buffer ip_c, ip_s; + Debug("http_tproxy", "Force close of client connect (%s->%s) due to EADDRNOTAVAIL [%" PRId64 "]" + , ats_ip_nptop(&t_state.client_info.addr.sa, ip_c, sizeof ip_c) + , ats_ip_nptop(&t_state.server_info.addr.sa, ip_s, sizeof ip_s) + , sm_id + ); + } + t_state.client_info.keep_alive = HTTP_NO_KEEPALIVE; // part of the problem, clear it. + if (ua_entry && ua_entry->vc) { + vc_table.cleanup_entry(ua_entry); + ua_entry = NULL; + ua_session = NULL; + } + } else { + call_transact_and_set_next_state(HttpTransact::HandleResponse); + } return 0; case CONGESTION_EVENT_CONGESTED_ON_F: t_state.current.state = HttpTransact::CONGEST_CONTROL_CONGESTED_ON_F; @@ -2048,7 +2075,6 @@ HttpSM::process_hostdb_info(HostDBInfo * r) // current time when calling select_best_http(). HostDBRoundRobin *rr = r->rr(); ret = rr->select_best_http(&t_state.client_info.addr.sa, now, (int) t_state.txn_conf->down_server_timeout); - t_state.dns_info.round_robin = true; // set the srv target`s last_failure if (t_state.dns_info.srv_lookup_success) { @@ -2068,7 +2094,6 @@ HttpSM::process_hostdb_info(HostDBInfo * r) } } else { ret = r; - t_state.dns_info.round_robin = false; } if (ret) { t_state.host_db_info = *ret; @@ -2079,9 +2104,9 @@ HttpSM::process_hostdb_info(HostDBInfo * r) DebugSM("http", "[%" PRId64 "] DNS lookup failed for '%s'", sm_id, t_state.dns_info.lookup_name); t_state.dns_info.lookup_success = false; - t_state.dns_info.round_robin = false; t_state.host_db_info.app.allotment.application1 = 0; t_state.host_db_info.app.allotment.application2 = 0; + ink_assert(!t_state.host_db_info.round_robin); } milestones.dns_lookup_end = ink_get_hrtime(); @@ -2923,10 +2948,12 @@ HttpSM::tunnel_handler_server(int event, HttpTunnelProducer * p) p->read_vio = NULL; /* TS-1424: if we're outbound transparent and using the client source port for the outbound connection we must effectively - propagate server closes back to the client. + propagate server closes back to the client. Part of that is + disabling KeepAlive if the server closes. */ - if (ua_session && ua_session->f_outbound_transparent && t_state.http_config_param->use_client_source_port) + if (ua_session && ua_session->f_outbound_transparent && t_state.http_config_param->use_client_source_port) { t_state.client_info.keep_alive = HTTP_NO_KEEPALIVE; + } } else { server_session->attach_hostname(t_state.current.server->name); server_session->server_trans_stat--; @@ -3991,7 +4018,7 @@ HttpSM::do_hostdb_update_if_necessary() t_state.updated_server_version = HostDBApplicationInfo::HTTP_VERSION_UNDEFINED; } // Check to see if we need to report or clear a connection failure - if (t_state.current.server->connect_failure) { + if (t_state.current.server->had_connect_fail()) { issue_update |= 1; mark_host_failure(&t_state.host_db_info, t_state.client_request_time); } else { @@ -4887,7 +4914,7 @@ HttpSM::mark_server_down_on_client_abort() wait = 0; } if (ink_hrtime_to_sec(wait) > t_state.txn_conf->client_abort_threshold) { - t_state.current.server->connect_failure = true; + t_state.current.server->set_connect_fail(ETIMEDOUT); do_hostdb_update_if_necessary(); } } http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f63b27d5/proxy/http/HttpTransact.cc ---------------------------------------------------------------------- diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index ac074ed..e39e7e9 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -1408,7 +1408,7 @@ HttpTransact::PPDNSLookup(State* s) // lookup succeeded, open connection to p.p. ats_ip_copy(&s->parent_info.addr, s->host_db_info.ip()); get_ka_info_from_host_db(s, &s->parent_info, &s->client_info, &s->host_db_info); - s->parent_info.dns_round_robin = s->dns_info.round_robin; + s->parent_info.dns_round_robin = s->host_db_info.round_robin; char addrbuf[INET6_ADDRSTRLEN]; DebugTxn("http_trans", "[PPDNSLookup] DNS lookup for sm_id[%" PRId64"] successful IP: %s", @@ -1452,19 +1452,19 @@ void HttpTransact::ReDNSRoundRobin(State* s) { ink_assert(s->current.server == &s->server_info); - ink_assert(s->current.server->connect_failure); + ink_assert(s->current.server->had_connect_fail()); if (s->dns_info.lookup_success) { // We using a new server now so clear the connection // failure mark - s->current.server->connect_failure = 0; + s->current.server->clear_connect_fail(); // Our ReDNS of the server succeeeded so update the necessary // information and try again ats_ip_copy(&s->server_info.addr, s->host_db_info.ip()); ats_ip_copy(&s->request_data.dest_ip, &s->server_info.addr); get_ka_info_from_host_db(s, &s->server_info, &s->client_info, &s->host_db_info); - s->server_info.dns_round_robin = s->dns_info.round_robin; + s->server_info.dns_round_robin = s->host_db_info.round_robin; char addrbuf[INET6_ADDRSTRLEN]; DebugTxn("http_trans", "[ReDNSRoundRobin] DNS lookup for O.S. successful IP: %s", @@ -1612,7 +1612,7 @@ HttpTransact::OSDNSLookup(State* s) // We've backed off from a client supplied address and found some // HostDB addresses. We use those if they're different from the CTA. // In all cases we now commit to client or HostDB for our source. - if (s->dns_info.round_robin) { + if (s->host_db_info.round_robin) { HostDBInfo* cta = s->host_db_info.rr()->select_next(&s->current.server->addr.sa); if (cta) { // found another addr, lock in host DB. @@ -1635,7 +1635,7 @@ HttpTransact::OSDNSLookup(State* s) ats_ip_copy(&s->server_info.addr, s->host_db_info.ip()); ats_ip_copy(&s->request_data.dest_ip, &s->server_info.addr); get_ka_info_from_host_db(s, &s->server_info, &s->client_info, &s->host_db_info); - s->server_info.dns_round_robin = s->dns_info.round_robin; + s->server_info.dns_round_robin = s->host_db_info.round_robin; char addrbuf[INET6_ADDRSTRLEN]; DebugTxn("http_trans", "[OSDNSLookup] DNS lookup for O.S. successful " @@ -3356,7 +3356,7 @@ HttpTransact::handle_response_from_parent(State* s) switch (s->current.state) { case CONNECTION_ALIVE: DebugTxn("http_trans", "[hrfp] connection alive"); - s->current.server->connect_failure = 0; + s->current.server->connect_result = 0; SET_VIA_STRING(VIA_DETAIL_PP_CONNECT, VIA_DETAIL_PP_SUCCESS); if (s->parent_result.retry) { s->parent_params->recordRetrySuccess(&s->parent_result); @@ -3371,7 +3371,7 @@ HttpTransact::handle_response_from_parent(State* s) ink_assert(s->hdr_info.server_request.valid()); - s->current.server->connect_failure = 1; + s->current.server->connect_result = ENOTCONN; char addrbuf[INET6_ADDRSTRLEN]; DebugTxn("http_trans", "[%d] failed to connect to parent %s", s->current.attempts, @@ -3481,14 +3481,14 @@ HttpTransact::handle_response_from_server(State* s) case CONNECTION_ALIVE: DebugTxn("http_trans", "[hrfs] connection alive"); SET_VIA_STRING(VIA_DETAIL_SERVER_CONNECT, VIA_DETAIL_SERVER_SUCCESS); - s->current.server->connect_failure = 0; + s->current.server->clear_connect_fail(); handle_forward_server_connection_open(s); break; case CONGEST_CONTROL_CONGESTED_ON_F: case CONGEST_CONTROL_CONGESTED_ON_M: DebugTxn("http_trans", "[handle_response_from_server] Error. congestion control -- congested."); SET_VIA_STRING(VIA_DETAIL_SERVER_CONNECT, VIA_DETAIL_SERVER_FAILURE); - s->current.server->connect_failure = 1; + s->current.server->set_connect_fail(EUSERS); // too many users handle_server_connection_not_open(s); break; case OPEN_RAW_ERROR: @@ -3504,7 +3504,10 @@ HttpTransact::handle_response_from_server(State* s) case CONNECTION_CLOSED: /* fall through */ case BAD_INCOMING_RESPONSE: - s->current.server->connect_failure = 1; + // Set to generic I/O error if not already set specifically. + if (!s->current.server->had_connect_fail()) + s->current.server->set_connect_fail(EIO); + if (is_server_negative_cached(s)) { max_connect_retries = s->txn_conf->connect_attempts_max_retries_dead_server; } else { @@ -3549,7 +3552,7 @@ HttpTransact::handle_response_from_server(State* s) case ACTIVE_TIMEOUT: DebugTxn("http_trans", "[hrfs] connection not alive"); SET_VIA_STRING(VIA_DETAIL_SERVER_CONNECT, VIA_DETAIL_SERVER_FAILURE); - s->current.server->connect_failure = 1; + s->current.server->set_connect_fail(ETIMEDOUT); handle_server_connection_not_open(s); break; default: @@ -3582,7 +3585,7 @@ HttpTransact::delete_server_rr_entry(State* s, int max_retries) DebugTxn("http_trans", "[%d] failed to connect to %s", s->current.attempts, ats_ip_ntop(&s->current.server->addr.sa, addrbuf, sizeof(addrbuf))); DebugTxn("http_trans", "[delete_server_rr_entry] marking rr entry " "down and finding next one"); - ink_assert(s->current.server->connect_failure); + ink_assert(s->current.server->had_connect_fail()); ink_assert(s->current.request_to == ORIGIN_SERVER); ink_assert(s->current.server == &s->server_info); update_dns_info(&s->dns_info, &s->current, 0, &s->arena); @@ -3609,7 +3612,7 @@ HttpTransact::retry_server_connection_not_open(State* s, ServerState_t conn_stat ink_assert(s->current.state != CONNECTION_ALIVE); ink_assert(s->current.state != ACTIVE_TIMEOUT); ink_assert(s->current.attempts <= max_retries); - ink_assert(s->current.server->connect_failure != 0); + ink_assert(s->current.server->had_connect_fail()); char addrbuf[INET6_ADDRSTRLEN]; char *url_string = s->hdr_info.client_request.url_string_get(&s->arena); @@ -3660,7 +3663,7 @@ HttpTransact::handle_server_connection_not_open(State* s) DebugTxn("http_trans", "[handle_server_connection_not_open] (hscno)"); DebugTxn("http_seq", "[HttpTransact::handle_server_connection_not_open] "); ink_assert(s->current.state != CONNECTION_ALIVE); - ink_assert(s->current.server->connect_failure != 0); + ink_assert(s->current.server->had_connect_fail()); SET_VIA_STRING(VIA_SERVER_RESULT, VIA_SERVER_ERROR); HTTP_INCREMENT_TRANS_STAT(http_broken_server_connections_stat); http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f63b27d5/proxy/http/HttpTransact.h ---------------------------------------------------------------------- diff --git a/proxy/http/HttpTransact.h b/proxy/http/HttpTransact.h index 27e4ffb..6a66fd1 100644 --- a/proxy/http/HttpTransact.h +++ b/proxy/http/HttpTransact.h @@ -677,7 +677,7 @@ public: { } } RedirectInfo; - typedef struct _ConnectionAttributes + struct ConnectionAttributes { HTTPVersion http_version; HTTPKeepAlive keep_alive; @@ -687,9 +687,11 @@ public: bool receive_chunked_response; bool pipeline_possible; bool proxy_connect_hdr; + /// @c errno from the most recent attempt to connect. + /// zero means no failure (not attempted, succeeded). + int connect_result; char *name; bool dns_round_robin; - bool connect_failure; TransferEncoding_t transfer_encoding; IpEndpoint addr; // replaces 'ip' field @@ -709,15 +711,19 @@ public: /// @c true if the connection is transparent. bool is_transparent; - _ConnectionAttributes() + bool had_connect_fail() const { return 0 != connect_result; } + void clear_connect_fail() { connect_result = 0; } + void set_connect_fail(int e) { connect_result = e; } + + ConnectionAttributes() : http_version(), keep_alive(HTTP_KEEPALIVE_UNDEFINED), receive_chunked_response(false), pipeline_possible(false), proxy_connect_hdr(false), + connect_result(0), name(NULL), dns_round_robin(false), - connect_failure(false), transfer_encoding(NO_TRANSFER_ENCODING), port(0), state(STATE_UNDEFINED), @@ -727,7 +733,8 @@ public: { memset(&addr, 0, sizeof(addr)); } - } ConnectionAttributes; + + }; typedef struct _CurrentInfo { @@ -770,14 +777,13 @@ public: char *lookup_name; char srv_hostname[MAXDNAME]; LookingUp_t looking_up; - bool round_robin; bool srv_lookup_success; short srv_port; HostDBApplicationInfo srv_app; _DNSLookupInfo() : attempts(0), os_addr_style(OS_ADDR_TRY_DEFAULT), - lookup_success(false), lookup_name(NULL), looking_up(UNDEFINED_LOOKUP), round_robin(false), + lookup_success(false), lookup_name(NULL), looking_up(UNDEFINED_LOOKUP), srv_lookup_success(false), srv_port(0) { srv_hostname[0] = '\0';