This is an automated email from the ASF dual-hosted git repository. asherman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 17b16f8ab5a89db1f3f18b5f7fd7f8a200c34fd5 Author: Andrew Sherman <[email protected]> AuthorDate: Fri Dec 6 16:36:51 2024 -0800 IMPALA-13335: ignore duplicate ‘X-Forwarded-For’ headers When using the hs2-http protocol, Impala clients communicate with Impala by sending and receiving http messages. In a modern deployment environment like Kubernetes these http messages may travel through a series of http proxies. These proxies may record information about the path through the system in one or more ‘X-Forwarded-For’ http headers. At present Impala uses this header in various ways, (1) to skip Authentication for connection from a trusted domain, and (2) to record information about the origin of a query in the runtime profile. If there are multiple copies of the ‘X-Forwarded-For’ header then Impala should only use the first of these headers that it sees. For reference see details in https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For We have to be careful to reset the 'origin_' field in THttpServer after headers have been processed. This allows the field to reused when a new HTTP message is processed. As part of this a variable in headersDone() is renamed to avoid multiple variables called 'origin'. TESTING Add a new test which uses a new Impyla mechanism from release 0.21a1 to add multiple headers to its http messages. Change-Id: Iee7b452842aa391d285bd445d6a9e6cbbedd7fbb Reviewed-on: http://gerrit.cloudera.org:8080/22186 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/transport/THttpServer.cpp | 25 +++++++++++++++---------- tests/hs2/test_hs2.py | 27 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/be/src/transport/THttpServer.cpp b/be/src/transport/THttpServer.cpp index a6697fb77..43ea10d70 100644 --- a/be/src/transport/THttpServer.cpp +++ b/be/src/transport/THttpServer.cpp @@ -174,7 +174,10 @@ void THttpServer::parseHeader(char* header) { chunked_ = false; contentLength_ = atoi(value); } else if (MatchesHeader(header, HEADER_X_FORWARDED_FOR, sz)) { - origin_ = value; + // Only set the origin field the first time that we see the 'X-Forwarded-For' header. + if (origin_.empty()) { + origin_ = value; + } } else if ((has_ldap_ || has_kerberos_ || has_saml_ || has_jwt_ || has_oauth_) && MatchesHeader(header, HEADER_AUTHORIZATION, sz)) { auth_value_ = string(value); @@ -279,6 +282,8 @@ void THttpServer::headersDone() { // Trim and truncate the value of the 'X-Forwarded-For' header. string origin = origin_; + // After copying origin, reset the value so that it can be reused for the next message. + origin_ = ""; StripWhiteSpace(&origin); if (origin.length() > MAX_X_FORWARDED_HEADER_LENGTH) { origin = origin.substr(0, MAX_X_FORWARDED_HEADER_LENGTH); @@ -394,21 +399,21 @@ void THttpServer::headersDone() { // the client started the SAML workflow then it doesn't expect Impala to succeed with // another mechanism. if (!authorized && check_trusted_domain_) { - string origin; + string transport_origin; if (FLAGS_trusted_domain_use_xff_header) { - impala::Status status = impala::GetXFFOriginClientAddress(origin_, origin); + impala::Status status = impala::GetXFFOriginClientAddress(origin, transport_origin); if (!status.ok()) LOG(WARNING) << status.GetDetail(); } else { - origin = transport_->getOrigin(); + transport_origin = transport_->getOrigin(); } - StripWhiteSpace(&origin); - if (origin.empty() && FLAGS_trusted_domain_use_xff_header && + StripWhiteSpace(&transport_origin); + if (transport_origin.empty() && FLAGS_trusted_domain_use_xff_header && FLAGS_trusted_domain_empty_xff_header_use_origin) { - origin = transport_->getOrigin(); - StripWhiteSpace(&origin); + transport_origin = transport_->getOrigin(); + StripWhiteSpace(&transport_origin); } - if (!origin.empty()) { - if (callbacks_.trusted_domain_check_fn(origin, auth_value_)) { + if (!transport_origin.empty()) { + if (callbacks_.trusted_domain_check_fn(transport_origin, auth_value_)) { authorized = true; if (metrics_enabled_) { http_metrics_->total_trusted_domain_check_success_->Increment(1); diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py index 6c47287a5..672cb4f52 100644 --- a/tests/hs2/test_hs2.py +++ b/tests/hs2/test_hs2.py @@ -28,6 +28,9 @@ import random import threading import time import uuid +import impala.dbapi as impyla + +from tests.common.impala_test_suite import IMPALAD_HOSTNAME, IMPALAD_HS2_HTTP_PORT try: from urllib.request import urlopen @@ -1189,3 +1192,27 @@ class TestHS2(HS2TestSuite): # Run another query, which should fail since the session is closed. self.execute_statement("select 3", expected_error_prefix="Invalid session id", expected_status_code=TCLIService.TStatusCode.ERROR_STATUS) + + def test_duplicate_headers(self): + """Test that if duplicate X-Forwarded-For headers are sent to Impala, + only the first is used""" + impyla_conn = impyla.connect(host=IMPALAD_HOSTNAME, port=IMPALAD_HS2_HTTP_PORT, + use_http_transport=True, + http_path="cliservice", + get_user_custom_headers_func=get_user_custom_headers) + cursor = impyla_conn.cursor(convert_types=False) + cursor.execute('select 1') + rows = cursor.fetchall() + assert rows == [(1,)] + profile = cursor.get_profile() + assert profile is not None + assert "Http Origin: value1" in profile + assert "Http Origin: value2" not in profile + + +def get_user_custom_headers(): + """Add duplicate X-Forwarded-For headers.""" + return [ + ("X-Forwarded-For", 'value1'), + ("X-Forwarded-For", 'value2') + ]
