This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new eb8c8bd4c IMPALA-14674: Implement connect_timeout_ms for HS2-HTTP
eb8c8bd4c is described below

commit eb8c8bd4ceb233b7cc17222ff431fa8aab7694b3
Author: Michael Smith <[email protected]>
AuthorDate: Mon Oct 6 15:52:27 2025 -0700

    IMPALA-14674: Implement connect_timeout_ms for HS2-HTTP
    
    Implements connect_timeout_ms support for HS2-HTTP. This only applies
    the timeout while establishing the connection, so later requests do not
    timeout. This is preferrable to configuring http_socket_timeout_s, which
    can result in timing out operations that just take awhile to execute.
    
    Removed ImpalaHttpClient.setTimeout as it's unused and not part of the
    specification for TTransportBase.
    
    Testing:
    - updates tests now that HS2-HTTP supports connect_timeout_ms
    - test_impala_shell_timeout for HS2-HTTP without http_socket_timeout_s
    - marks test_http_socket_timeout to run serially because it relies on
      short timeouts; inconsistent behavior can leave a dangling session
    
    Change-Id: I9012066fb0d16497f309532021d7b323404b9fb2
    Reviewed-on: http://gerrit.cloudera.org:8080/23499
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 shell/impala_shell/ImpalaHttpClient.py | 20 ++++++++--------
 shell/impala_shell/impala_client.py    | 23 ++++++++-----------
 tests/shell/test_shell_commandline.py  | 42 ++++++++++++----------------------
 3 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/shell/impala_shell/ImpalaHttpClient.py 
b/shell/impala_shell/ImpalaHttpClient.py
index ae9eda609..5a6390705 100644
--- a/shell/impala_shell/ImpalaHttpClient.py
+++ b/shell/impala_shell/ImpalaHttpClient.py
@@ -56,7 +56,8 @@ class ImpalaHttpClient(TTransportBase):
   MIN_REQUEST_SIZE_FOR_EXPECT = 1024
 
   def __init__(self, uri_or_host, ssl_context=None, http_cookie_names=None,
-               socket_timeout_s=None, verbose=False, reuse_connection=True):
+               socket_timeout_s=None, verbose=False, reuse_connection=True,
+               connect_timeout_ms=None):
     """To properly authenticate against an HTTPS server, provide an 
ssl_context created
     with ssl.create_default_context() to validate the server certificate.
 
@@ -121,7 +122,8 @@ class ImpalaHttpClient(TTransportBase):
     self.__wbuf = BytesIO()
     self.__http = None
     self.__http_response = None
-    self.__timeout = socket_timeout_s
+    self.__connect_timeout_s = connect_timeout_ms / 1000.0 if 
connect_timeout_ms else None
+    self.__socket_timeout_s = socket_timeout_s
     # __custom_headers is used to store HTTP headers which are generated in 
runtime for
     # new request.
     self.__custom_headers = None
@@ -147,14 +149,18 @@ class ImpalaHttpClient(TTransportBase):
   def open(self):
     if self.scheme == 'http':
       self.__http = http_client.HTTPConnection(self.host, self.port,
-                                               timeout=self.__timeout)
+                                               
timeout=self.__connect_timeout_s)
     elif self.scheme == 'https':
       self.__http = http_client.HTTPSConnection(self.host, self.port,
-                                                timeout=self.__timeout,
+                                                
timeout=self.__connect_timeout_s,
                                                 context=self.context)
     if self.using_proxy():
       self.__http.set_tunnel(self.realhost, self.realport,
                              {"Proxy-Authorization": self.proxy_auth})
+    # Initiate connection with configured timeout.
+    self.__http.connect()
+    # Set socket_timeout_s for the remainder of the session.
+    self.__http.sock.settimeout(self.__socket_timeout_s)
 
   def close(self):
     self.__http.close()
@@ -164,12 +170,6 @@ class ImpalaHttpClient(TTransportBase):
   def isOpen(self):
     return self.__http is not None
 
-  def setTimeout(self, ms):
-    if ms is None:
-      self.__timeout = None
-    else:
-      self.__timeout = ms / 1000.0
-
   def getCustomHeadersWithBasicAuth(self, cookie_header, has_auth_cookie):
     custom_headers = {}
     if cookie_header:
diff --git a/shell/impala_shell/impala_client.py 
b/shell/impala_shell/impala_client.py
index ea6de5045..61a3f227c 100644
--- a/shell/impala_shell/impala_client.py
+++ b/shell/impala_shell/impala_client.py
@@ -415,21 +415,14 @@ class ImpalaClient(object):
     if not hasattr(ssl, "create_default_context"):
       print("Python version too old. SSLContext not supported.", 
file=sys.stderr)
       raise NotImplementedError()
-    # Current implementation of ImpalaHttpClient does a close() and open() of 
the
-    # underlying http connection on every flush() (THRIFT-4600). Due to this, 
setting a
-    # connect timeout does not achieve the desirable result as the subsequent 
open() could
-    # block similary in case of problematic remote end points.
-    # TODO: Investigate connection reuse in ImpalaHttpClient and revisit this.
-    if connect_timeout_ms > 0 and self.verbose:
-      print("Warning: --connect_timeout_ms is currently ignored with HTTP 
transport.",
-            file=sys.stderr)
-
-    # Notes on http socket timeout:
+
+    # Notes on http socket timeout
     # https://docs.python.org/3/library/socket.html#socket-timeouts
-    # Having a default timeout of 'None' (blocking mode) could result in hang 
like
-    # symptoms in case of a problematic remote endpoint. It's better to have a 
finite
-    # timeout so that in case of any connection errors, the client retries 
have a better
-    # chance of succeeding.
+    # We set connect_timeout_ms while establishing a connection to quickly 
detect problem
+    # servers, similar to other transports. After the connection is 
established, we set
+    # the http_socket_timeout_s (if provided) for the remainder of the 
session. Setting
+    # this can be more problematic because Impala HS2-HTTP can have 
long-running requests
+    # that may exceed a timeout, particularly during slow planning or large 
result sets.
     host_and_port = self._to_host_port(self.impalad_host, self.impalad_port)
     assert self.http_path
     # ImpalaHttpClient relies on the URI scheme (http vs https) to open an 
appropriate
@@ -444,12 +437,14 @@ class ImpalaClient(object):
       url = "https://{0}/{1}".format(host_and_port, self.http_path)
       transport = ImpalaHttpClient(url, ssl_context=ssl_ctx,
                                    http_cookie_names=self.http_cookie_names,
+                                   connect_timeout_ms=connect_timeout_ms,
                                    socket_timeout_s=self.http_socket_timeout_s,
                                    verbose=self.verbose,
                                    reuse_connection=self.reuse_http_connection)
     else:
       url = "http://{0}/{1}".format(host_and_port, self.http_path)
       transport = ImpalaHttpClient(url, 
http_cookie_names=self.http_cookie_names,
+                                   connect_timeout_ms=connect_timeout_ms,
                                    socket_timeout_s=self.http_socket_timeout_s,
                                    verbose=self.verbose,
                                    reuse_connection=self.reuse_http_connection)
diff --git a/tests/shell/test_shell_commandline.py 
b/tests/shell/test_shell_commandline.py
index 095ec4ed5..f67239c46 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1175,20 +1175,13 @@ class TestImpalaShell(ImpalaTestSuite):
        indefinitely while connecting
     """
 
-    # --connect_timeout_ms not supported with HTTP transport. Refer to the 
comment
-    # in ImpalaClient::_get_http_transport() for details.
-    # --http_socket_timeout_s not supported for strict_hs2_protocol.
-    if (vector.get_value('protocol') == 'hs2-http'
-        and vector.get_value('strict_hs2_protocol')):
-        pytest.skip("THRIFT-4600")
-
     with closing(socket.socket()) as s:
       s.bind(("", 0))
       # maximum number of queued connections on this socket is 1.
       s.listen(1)
       test_port = s.getsockname()[1]
-      args = ['-q', 'select foo; select bar;', '--ssl', '-t', '2000',
-              '--http_socket_timeout_s', '2', '-i', 'localhost:%d' % 
(test_port)]
+      args = ['-q', 'select 1', '--ssl', '-t', '1000',
+              '-i', 'localhost:%d' % (test_port)]
       run_impala_shell_cmd(vector, args, expect_success=False)
 
   def test_client_identifier(self, vector):
@@ -1409,34 +1402,27 @@ class TestImpalaShell(ImpalaTestSuite):
       rows_from_file = [line.rstrip() for line in f]
       assert rows_from_stdout == rows_from_file
 
+  @pytest.mark.execute_serially
   def test_http_socket_timeout(self, vector):
     """Test setting different http_socket_timeout_s values."""
     if (vector.get_value('strict_hs2_protocol')
         or vector.get_value('protocol') != 'hs2-http'):
         pytest.skip("http socket timeout not supported in strict hs2 mode."
                     " Only supported with hs2-http protocol.")
-    # Test http_socket_timeout_s=0, expect errors
-    args = ['--quiet', '-B', '--query', 'select 0;']
-    result = run_impala_shell_cmd(vector, args + ['--http_socket_timeout_s=0'],
+    # Test very short http_socket_timeout_s, expect errors. After the 
connection is
+    # established - which now uses connect_timeout_ms - RPCs in the 
minicluster appear to
+    # respond immediately, so non-blocking mode (timeout=0) does not result in 
an error.
+    # Instead, use a very small timeout that the RPC cannot meet.
+    args = ['--quiet', '-B', '--query', 'select 0']
+    result = run_impala_shell_cmd(vector, args + 
['--http_socket_timeout_s=0.001'],
                                   expect_success=False)
 
-    # Outside the docker-based tests, Python 2 and Python 3 produce "Operating
-    # now in progress" with slightly different error classes. When running with
-    # docker-based tests, it results in a different error code and "Cannot
-    # assign requested address" for both Python 2 and Python 3.
-    # Tolerate all three of these variants.
-    error_template = (
-      "[Exception] type=<class '{0}'> in OpenSession. Num remaining tries: 3 "
-      "[Errno {1}] {2}")
-    expected_err_py2 = \
-        error_template.format("socket.error", 115, "Operation now in progress")
-    expected_err_py3 = \
-        error_template.format("BlockingIOError", 115, "Operation now in 
progress")
-    expected_err_docker = \
-        error_template.format("OSError", 99, "Cannot assign requested address")
+    # Python 2 throws socket.timeout and Python 3 throws TimeoutError.
+    error_template = "[Exception] type=<class '{0}'> in ExecuteStatement.  
timed out"
+    pyver_exceptions = ["socket.timeout", "TimeoutError"]
     actual_err = result.stderr.splitlines()[0]
-    assert actual_err[TS_LEN:] in [expected_err_py2, expected_err_py3,
-                                   expected_err_docker]
+    assert actual_err[TS_LEN:] in [error_template.format(pever_exception)
+                                   for pever_exception in pyver_exceptions]
 
     # Test http_socket_timeout_s=-1, expect errors
     result = run_impala_shell_cmd(vector, args + 
['--http_socket_timeout_s=-1'],

Reply via email to