IMPALA-7678: Reapply "IMPALA-7660: Support ECDH ciphers for debug webserver"
This patch reverses the revert of IMPALA-7660. The problem with IMPALA-7660 was that urllib.urlopen added the 'context' parameter in 2.7.9, so it isn't present on rhel7, which uses 2.7.5 The fix is to switch to using the 'requests' library, which supports ssl connections on all the platforms Impala is supported on. This patch also adds more info to the error message printed by start-impala-cluster.py when the debug webserver cannot be reached yet to help with debugging these issues in the future. Testing: - Ran full builds on rhel7, rhel6, and ubuntu16. Change-Id: I679469ed7f27944f75004ec4b16d513e6ea6b544 Reviewed-on: http://gerrit.cloudera.org:8080/11625 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5e92d139 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5e92d139 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5e92d139 Branch: refs/heads/master Commit: 5e92d139b951e77d3f9f355c1e46736454a654b0 Parents: 0cd9151 Author: Thomas Tauber-Marshall <tmarsh...@cloudera.com> Authored: Mon Oct 8 15:43:53 2018 -0700 Committer: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Committed: Wed Oct 17 05:39:32 2018 +0000 ---------------------------------------------------------------------- be/src/thirdparty/squeasel/squeasel.c | 36 ++++++++++++++++++++++++---- tests/common/impala_cluster.py | 27 +++++++++++++-------- tests/common/impala_service.py | 36 +++++++++++++++++----------- tests/custom_cluster/test_client_ssl.py | 35 ++++++++++++++++++++++----- tests/custom_cluster/test_redaction.py | 22 +++++++---------- tests/run-tests.py | 6 ++--- 6 files changed, 110 insertions(+), 52 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/be/src/thirdparty/squeasel/squeasel.c ---------------------------------------------------------------------- diff --git a/be/src/thirdparty/squeasel/squeasel.c b/be/src/thirdparty/squeasel/squeasel.c index 2149497..045740d 100644 --- a/be/src/thirdparty/squeasel/squeasel.c +++ b/be/src/thirdparty/squeasel/squeasel.c @@ -4298,11 +4298,37 @@ static int set_ssl_option(struct sq_context *ctx) { (void) SSL_CTX_use_certificate_chain_file(ctx->ssl_ctx, pem); } - if (ctx->config[SSL_CIPHERS] != NULL && - (SSL_CTX_set_cipher_list(ctx->ssl_ctx, ctx->config[SSL_CIPHERS]) == 0)) { - cry(fc(ctx), "SSL_CTX_set_cipher_list: error setting ciphers (%s): %s", - ctx->config[SSL_CIPHERS], ssl_error()); - return 0; + if (ctx->config[SSL_CIPHERS] != NULL) { + if (SSL_CTX_set_cipher_list(ctx->ssl_ctx, ctx->config[SSL_CIPHERS]) == 0) { + cry(fc(ctx), "SSL_CTX_set_cipher_list: error setting ciphers (%s): %s", + ctx->config[SSL_CIPHERS], ssl_error()); + return 0; + } +#ifndef OPENSSL_NO_ECDH +#if OPENSSL_VERSION_NUMBER < 0x10002000L + // OpenSSL 1.0.1 and below only support setting a single ECDH curve at once. + // We choose prime256v1 because it's the first curve listed in the "modern + // compatibility" section of the Mozilla Server Side TLS recommendations, + // accessed Feb. 2017. + EC_KEY* ecdh = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); + if (ecdh == NULL) { + cry(fc(ctx), "EC_KEY_new_by_curve_name: %s", ssl_error()); + } + + int rc = SSL_CTX_set_tmp_ecdh(ctx->ssl_ctx, ecdh); + if (rc <= 0) { + cry(fc(ctx), "SSL_CTX_set_tmp_ecdh: %s", ssl_error()); + } +#elif OPENSSL_VERSION_NUMBER < 0x10100000L + // OpenSSL 1.0.2 provides the set_ecdh_auto API which internally figures out + // the best curve to use. + int rc = SSL_CTX_set_ecdh_auto(ctx->ssl_ctx, 1); + if (rc <= 0) { + cry(fc(ctx), "SSL_CTX_set_ecdh_auto: %s", ssl_error()); + } +#endif +#endif + } return 1; http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/common/impala_cluster.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py index f25c8ed..f0c1f8f 100644 --- a/tests/common/impala_cluster.py +++ b/tests/common/impala_cluster.py @@ -215,13 +215,19 @@ class Process(object): # Base class for all Impala processes class BaseImpalaProcess(Process): - def __init__(self, cmd, hostname): + def __init__(self, cmd): super(BaseImpalaProcess, self).__init__(cmd) - self.hostname = hostname + self.hostname = self._get_hostname() def _get_webserver_port(self, default=None): return int(self._get_arg_value('webserver_port', default)) + def _get_webserver_certificate_file(self): + return self._get_arg_value("webserver_certificate_file", "") + + def _get_hostname(self): + return self._get_arg_value("hostname", socket.gethostname()) + def _get_arg_value(self, arg_name, default=None): """Gets the argument value for given argument name""" for arg in self.cmd: @@ -235,11 +241,12 @@ class BaseImpalaProcess(Process): # Represents an impalad process class ImpaladProcess(BaseImpalaProcess): def __init__(self, cmd): - super(ImpaladProcess, self).__init__(cmd, socket.gethostname()) + super(ImpaladProcess, self).__init__(cmd) self.service = ImpaladService(self.hostname, self._get_webserver_port(default=25000), self.__get_beeswax_port(default=21000), self.__get_be_port(default=22000), - self.__get_hs2_port(default=21050)) + self.__get_hs2_port(default=21050), + self._get_webserver_certificate_file()) def __get_beeswax_port(self, default=None): return int(self._get_arg_value('beeswax_port', default)) @@ -263,17 +270,17 @@ class ImpaladProcess(BaseImpalaProcess): # Represents a statestored process class StateStoreProcess(BaseImpalaProcess): def __init__(self, cmd): - super(StateStoreProcess, self).__init__(cmd, socket.gethostname()) - self.service =\ - StateStoredService(self.hostname, self._get_webserver_port(default=25010)) + super(StateStoreProcess, self).__init__(cmd) + self.service = StateStoredService(self.hostname, + self._get_webserver_port(default=25010), self._get_webserver_certificate_file()) # Represents a catalogd process class CatalogdProcess(BaseImpalaProcess): def __init__(self, cmd): - super(CatalogdProcess, self).__init__(cmd, socket.gethostname()) - self.service = CatalogdService(self.hostname, - self._get_webserver_port(default=25020), self.__get_port(default=26000)) + super(CatalogdProcess, self).__init__(cmd) + self.service = CatalogdService(self.hostname, self._get_webserver_port(default=25020), + self._get_webserver_certificate_file(), self.__get_port(default=26000)) def __get_port(self, default=None): return int(self._get_arg_value('catalog_service_port', default)) http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/common/impala_service.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_service.py b/tests/common/impala_service.py index 16419ae..cfd4b55 100644 --- a/tests/common/impala_service.py +++ b/tests/common/impala_service.py @@ -22,7 +22,7 @@ import json import logging import re -import urllib +import requests from time import sleep, time from tests.common.impala_connection import create_connection, create_ldap_connection @@ -41,31 +41,36 @@ LOG.setLevel(level=logging.DEBUG) # Base class for all Impala services # TODO: Refactor the retry/timeout logic into a common place. class BaseImpalaService(object): - def __init__(self, hostname, webserver_port): + def __init__(self, hostname, webserver_port, webserver_certificate_file): self.hostname = hostname self.webserver_port = webserver_port + self.webserver_certificate_file = webserver_certificate_file def open_debug_webpage(self, page_name, timeout=10, interval=1): start_time = time() while (time() - start_time < timeout): try: - return urllib.urlopen("http://%s:%d/%s" % - (self.hostname, int(self.webserver_port), page_name)) - except Exception: - LOG.info("Debug webpage not yet available.") + protocol = "http" + if self.webserver_certificate_file != "": + protocol = "https" + url = "%s://%s:%d/%s" % \ + (protocol, self.hostname, int(self.webserver_port), page_name) + return requests.get(url, verify=self.webserver_certificate_file) + except Exception as e: + LOG.info("Debug webpage not yet available: %s", str(e)) sleep(interval) assert 0, 'Debug webpage did not become available in expected time.' def read_debug_webpage(self, page_name, timeout=10, interval=1): - return self.open_debug_webpage(page_name, timeout=timeout, interval=interval).read() + return self.open_debug_webpage(page_name, timeout=timeout, interval=interval).text def get_thrift_profile(self, query_id, timeout=10, interval=1): """Returns thrift profile of the specified query ID, if available""" page_name = "query_profile_encoded?query_id=%s" % (query_id) try: response = self.open_debug_webpage(page_name, timeout=timeout, interval=interval) - tbuf = response.read() + tbuf = response.text except Exception as e: LOG.info("Thrift profile for query %s not yet available: %s", query_id, str(e)) return None @@ -166,8 +171,9 @@ class BaseImpalaService(object): # new connections or accessing the debug webpage. class ImpaladService(BaseImpalaService): def __init__(self, hostname, webserver_port=25000, beeswax_port=21000, be_port=22000, - hs2_port=21050): - super(ImpaladService, self).__init__(hostname, webserver_port) + hs2_port=21050, webserver_certificate_file=""): + super(ImpaladService, self).__init__( + hostname, webserver_port, webserver_certificate_file) self.beeswax_port = beeswax_port self.be_port = be_port self.hs2_port = hs2_port @@ -327,8 +333,9 @@ class ImpaladService(BaseImpalaService): # Allows for interacting with the StateStore service to perform operations such as # accessing the debug webpage. class StateStoredService(BaseImpalaService): - def __init__(self, hostname, webserver_port): - super(StateStoredService, self).__init__(hostname, webserver_port) + def __init__(self, hostname, webserver_port, webserver_certificate_file): + super(StateStoredService, self).__init__( + hostname, webserver_port, webserver_certificate_file) def wait_for_live_subscribers(self, num_subscribers, timeout=15, interval=1): self.wait_for_metric_value('statestore.live-backends', num_subscribers, @@ -338,8 +345,9 @@ class StateStoredService(BaseImpalaService): # Allows for interacting with the Catalog service to perform operations such as # accessing the debug webpage. class CatalogdService(BaseImpalaService): - def __init__(self, hostname, webserver_port, service_port): - super(CatalogdService, self).__init__(hostname, webserver_port) + def __init__(self, hostname, webserver_port, webserver_certificate_file, service_port): + super(CatalogdService, self).__init__( + hostname, webserver_port, webserver_certificate_file) self.service_port = service_port def get_catalog_version(self, timeout=10, interval=1): http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/custom_cluster/test_client_ssl.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_client_ssl.py b/tests/custom_cluster/test_client_ssl.py index b6f7e04..edea640 100644 --- a/tests/custom_cluster/test_client_ssl.py +++ b/tests/custom_cluster/test_client_ssl.py @@ -19,6 +19,7 @@ import logging import os import pytest +import requests import signal import ssl import socket @@ -110,13 +111,28 @@ class TestClientSsl(CustomClusterTestSuite): assert "Query Status: Cancelled" in result.stdout assert impalad.wait_for_num_in_flight_queries(0) + WEBSERVER_SSL_ARGS = ("--webserver_certificate_file=%(cert_dir)s/server-cert.pem " + "--webserver_private_key_file=%(cert_dir)s/server-key.pem " + "--hostname=localhost" # Must match hostname in certificate + % {'cert_dir': CERT_DIR}) + + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args(impalad_args=WEBSERVER_SSL_ARGS, + statestored_args=WEBSERVER_SSL_ARGS, + catalogd_args=WEBSERVER_SSL_ARGS) + def test_webserver_ssl(self): + "Tests that the debug web pages are reachable when run with ssl." + self._verify_ssl_webserver() + # Test that the shell can connect to a ECDH only cluster. - TLS_ECDH_ARGS = ("--ssl_client_ca_certificate=%s/server-cert.pem " - "--ssl_server_certificate=%s/server-cert.pem " - "--ssl_private_key=%s/server-key.pem " - "--hostname=localhost " # Required to match hostname in certificate" - "--ssl_cipher_list=ECDHE-RSA-AES128-GCM-SHA256 " - % (CERT_DIR, CERT_DIR, CERT_DIR)) + TLS_ECDH_ARGS = ("--ssl_client_ca_certificate=%(cert_dir)s/server-cert.pem " + "--ssl_server_certificate=%(cert_dir)s/server-cert.pem " + "--ssl_private_key=%(cert_dir)s/server-key.pem " + "--hostname=localhost " # Must match hostname in certificate + "--ssl_cipher_list=ECDHE-RSA-AES128-GCM-SHA256 " + "--webserver_certificate_file=%(cert_dir)s/server-cert.pem " + "--webserver_private_key_file=%(cert_dir)s/server-key.pem " + % {'cert_dir': CERT_DIR}) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args(impalad_args=TLS_ECDH_ARGS, @@ -128,6 +144,7 @@ class TestClientSsl(CustomClusterTestSuite): def test_tls_ecdh(self, vector): self._verify_negative_cases() self._validate_positive_cases("%s/server-cert.pem" % self.CERT_DIR) + self._verify_ssl_webserver() # Test that the shell can connect to a TLS1.2 only cluster, and for good measure # restrict the cipher suite to just one choice. @@ -209,3 +226,9 @@ class TestClientSsl(CustomClusterTestSuite): result = run_impala_shell_cmd(shell_options) for msg in [self.SSL_ENABLED, self.CONNECTED, self.FETCHED]: assert msg in result.stderr + + def _verify_ssl_webserver(self): + for port in ["25000", "25010", "25020"]: + url = "https://localhost:%s" % port + response = requests.get(url, verify="%s/server-cert.pem" % self.CERT_DIR) + assert response.status_code == requests.codes.ok, url http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/custom_cluster/test_redaction.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_redaction.py b/tests/custom_cluster/test_redaction.py index 7789e06..3bf831e 100644 --- a/tests/custom_cluster/test_redaction.py +++ b/tests/custom_cluster/test_redaction.py @@ -109,10 +109,9 @@ class TestRedaction(CustomClusterTestSuite, unittest.TestCase): # TODO: The HS2 interface may be better about exposing the query handle even if a # query fails. Maybe investigate that after the switch to HS2. regex = re.compile(r'query_id=(\w+:\w+)') - for line in self.create_impala_service().open_debug_webpage('queries'): - match = regex.search(line) - if match: - return match.group(1) + match = regex.search(self.create_impala_service().read_debug_webpage('queries')) + if match: + return match.group(1) raise Exception('Unable to find any query id') def assert_server_fails_to_start(self, rules, start_options, expected_error_message): @@ -147,9 +146,8 @@ class TestRedaction(CustomClusterTestSuite, unittest.TestCase): for response_format in ('html', 'json'): # The 'html' param is actually ignored by the server. url = page + '?query_id=' + query_id + "&" + response_format - results = grep_file(impala_service.open_debug_webpage(url), unredacted_value) - assert not results, "Web page %s should not contain '%s' but does" \ - % (url, unredacted_value) + assert unredacted_value not in impala_service.read_debug_webpage(url), \ + "Web page %s should not contain '%s' but does" % (url, unredacted_value) # But the redacted value should be shown. self.assert_web_ui_contains(query_id, redacted_value) @@ -159,17 +157,15 @@ class TestRedaction(CustomClusterTestSuite, unittest.TestCase): impala_service = self.create_impala_service() for page in ('queries', 'query_stmt', 'query_plan_text', 'query_profile'): url = '%s?query_id=%s' % (page, query_id) - results = grep_file(impala_service.open_debug_webpage(url), search) - assert results, "Web page %s should contain '%s' but does not" \ - % (url, search) + assert search in impala_service.read_debug_webpage(url), \ + "Web page %s should contain '%s' but does not" % (url, search) def assert_query_profile_contains(self, query_id, search): ''' Asserts that the query profile for 'query_id' contains 'search' string''' impala_service = self.create_impala_service() url = 'query_profile?query_id=%s' % query_id - results = grep_file(impala_service.open_debug_webpage(url), search) - assert results, "Query profile %s should contain '%s' but does not" \ - % (url, search) + assert search in impala_service.read_debug_webpage(url), \ + "Query profile %s should contain '%s' but does not" % (url, search) @pytest.mark.execute_serially def test_bad_rules(self): http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/run-tests.py ---------------------------------------------------------------------- diff --git a/tests/run-tests.py b/tests/run-tests.py index c2fd420..9aaeaa5 100755 --- a/tests/run-tests.py +++ b/tests/run-tests.py @@ -214,10 +214,8 @@ def print_metrics(substring): print ">" * 80 port = impalad._get_webserver_port() print "connections metrics for impalad at port {0}:".format(port) - debug_info = json.loads(ImpaladService( - impalad.hostname, - webserver_port=port) - .open_debug_webpage('metrics?json').read()) + debug_info = json.loads(ImpaladService(impalad.hostname, webserver_port=port) + .read_debug_webpage('metrics?json')) for metric in debug_info['metric_group']['metrics']: if substring in metric['name']: print json.dumps(metric, indent=1)