This is an automated email from the ASF dual-hosted git repository.

asherman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 17b16f8ab5a89db1f3f18b5f7fd7f8a200c34fd5
Author: Andrew Sherman <[email protected]>
AuthorDate: Fri Dec 6 16:36:51 2024 -0800

    IMPALA-13335: ignore duplicate ‘X-Forwarded-For’ headers
    
    When using the hs2-http protocol, Impala clients communicate with Impala
    by sending and receiving http messages. In a modern deployment
    environment like Kubernetes these http messages may travel through a
    series of http proxies. These proxies may record information about the
    path through the system in one or more ‘X-Forwarded-For’ http headers.
    At present Impala uses this header in various ways, (1) to skip
    Authentication for connection from a trusted domain, and (2) to record
    information about the origin of a query in the runtime profile.
    
    If there are multiple copies of the ‘X-Forwarded-For’ header then
    Impala should only use the first of these headers that it sees.
    For reference see details in
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
    
    We have to be careful to reset the 'origin_' field in THttpServer after
    headers have been processed. This allows the field to reused when a new
    HTTP message is processed. As part of this a variable in
    headersDone() is renamed to avoid multiple variables called 'origin'.
    
    TESTING
    
    Add a new test which uses a new Impyla mechanism from release 0.21a1 to
    add multiple headers to its http messages.
    
    Change-Id: Iee7b452842aa391d285bd445d6a9e6cbbedd7fbb
    Reviewed-on: http://gerrit.cloudera.org:8080/22186
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/transport/THttpServer.cpp | 25 +++++++++++++++----------
 tests/hs2/test_hs2.py            | 27 +++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/be/src/transport/THttpServer.cpp b/be/src/transport/THttpServer.cpp
index a6697fb77..43ea10d70 100644
--- a/be/src/transport/THttpServer.cpp
+++ b/be/src/transport/THttpServer.cpp
@@ -174,7 +174,10 @@ void THttpServer::parseHeader(char* header) {
     chunked_ = false;
     contentLength_ = atoi(value);
   } else if (MatchesHeader(header, HEADER_X_FORWARDED_FOR, sz)) {
-    origin_ = value;
+    // Only set the origin field the first time that we see the 
'X-Forwarded-For' header.
+    if (origin_.empty()) {
+      origin_ = value;
+    }
   } else if ((has_ldap_ || has_kerberos_ || has_saml_ || has_jwt_ || 
has_oauth_)
       && MatchesHeader(header, HEADER_AUTHORIZATION, sz)) {
     auth_value_ = string(value);
@@ -279,6 +282,8 @@ void THttpServer::headersDone() {
 
   // Trim and truncate the value of the 'X-Forwarded-For' header.
   string origin = origin_;
+  // After copying origin, reset the value so that it can be reused for the 
next message.
+  origin_ = "";
   StripWhiteSpace(&origin);
   if (origin.length() > MAX_X_FORWARDED_HEADER_LENGTH) {
     origin = origin.substr(0, MAX_X_FORWARDED_HEADER_LENGTH);
@@ -394,21 +399,21 @@ void THttpServer::headersDone() {
   // the client started the SAML workflow then it doesn't expect Impala to 
succeed with
   // another mechanism.
   if (!authorized && check_trusted_domain_) {
-    string origin;
+    string transport_origin;
     if (FLAGS_trusted_domain_use_xff_header) {
-      impala::Status status = impala::GetXFFOriginClientAddress(origin_, 
origin);
+      impala::Status status = impala::GetXFFOriginClientAddress(origin, 
transport_origin);
       if (!status.ok()) LOG(WARNING) << status.GetDetail();
     } else {
-      origin = transport_->getOrigin();
+      transport_origin = transport_->getOrigin();
     }
-    StripWhiteSpace(&origin);
-    if (origin.empty() && FLAGS_trusted_domain_use_xff_header &&
+    StripWhiteSpace(&transport_origin);
+    if (transport_origin.empty() && FLAGS_trusted_domain_use_xff_header &&
         FLAGS_trusted_domain_empty_xff_header_use_origin) {
-      origin = transport_->getOrigin();
-      StripWhiteSpace(&origin);
+      transport_origin = transport_->getOrigin();
+      StripWhiteSpace(&transport_origin);
     }
-    if (!origin.empty()) {
-      if (callbacks_.trusted_domain_check_fn(origin, auth_value_)) {
+    if (!transport_origin.empty()) {
+      if (callbacks_.trusted_domain_check_fn(transport_origin, auth_value_)) {
         authorized = true;
         if (metrics_enabled_) {
           http_metrics_->total_trusted_domain_check_success_->Increment(1);
diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py
index 6c47287a5..672cb4f52 100644
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -28,6 +28,9 @@ import random
 import threading
 import time
 import uuid
+import impala.dbapi as impyla
+
+from tests.common.impala_test_suite import IMPALAD_HOSTNAME, 
IMPALAD_HS2_HTTP_PORT
 
 try:
   from urllib.request import urlopen
@@ -1189,3 +1192,27 @@ class TestHS2(HS2TestSuite):
     # Run another query, which should fail since the session is closed.
     self.execute_statement("select 3", expected_error_prefix="Invalid session 
id",
         expected_status_code=TCLIService.TStatusCode.ERROR_STATUS)
+
+  def test_duplicate_headers(self):
+    """Test that if duplicate X-Forwarded-For headers are sent to Impala,
+     only the first is used"""
+    impyla_conn = impyla.connect(host=IMPALAD_HOSTNAME, 
port=IMPALAD_HS2_HTTP_PORT,
+                                 use_http_transport=True,
+                                 http_path="cliservice",
+                                 
get_user_custom_headers_func=get_user_custom_headers)
+    cursor = impyla_conn.cursor(convert_types=False)
+    cursor.execute('select 1')
+    rows = cursor.fetchall()
+    assert rows == [(1,)]
+    profile = cursor.get_profile()
+    assert profile is not None
+    assert "Http Origin: value1" in profile
+    assert "Http Origin: value2" not in profile
+
+
+def get_user_custom_headers():
+  """Add duplicate X-Forwarded-For headers."""
+  return [
+    ("X-Forwarded-For", 'value1'),
+    ("X-Forwarded-For", 'value2')
+  ]

Reply via email to