IMPALA-5263: test infra: support CA bundles with secure clusters This patch adds the command line option --ca_cert to the common test infra CLI options for use alongside --use-ssl. This is useful when testing against a secured Impala cluster in which the SSL certs are self-signed. This will allow the SSL request to be validated. Using this option will also suppress noisy console warnings like:
InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.org/en/latest/security.html We also go further in this patch and use the warnings module to print these SSL-related warnings once and only once, instead of all over the place. In the case of the stress test, this greatly reduces the noise in the console log. Testing: - quick concurrent_select.py calls with and without --ca_cert to observe that connections still get made and the test runs smoothly. Some of this testing occurred without warning suppression, so that I could be sure the InsecureRequestWarnings were not occurring when using --ca_cert anymore. - ensured warnings are printed once, not multiple times Change-Id: Ifb9e466e4b7cde704cdc4cf98159c068c0a400a9 Reviewed-on: http://gerrit.cloudera.org:8080/7152 Reviewed-by: David Knupp <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/428b5a1b Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/428b5a1b Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/428b5a1b Branch: refs/heads/master Commit: 428b5a1bfe5e8a533db95c98f8ffbc1f825cdcef Parents: 467ccd1 Author: Michael Brown <[email protected]> Authored: Sat Jun 10 16:35:00 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Tue Jun 13 19:25:57 2017 +0000 ---------------------------------------------------------------------- tests/comparison/cli_options.py | 30 ++++++++++++++++++++++++++++++ tests/comparison/cluster.py | 31 ++++++++++++++++++++++--------- tests/comparison/db_connection.py | 6 ++++-- 3 files changed, 56 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/428b5a1b/tests/comparison/cli_options.py ---------------------------------------------------------------------- diff --git a/tests/comparison/cli_options.py b/tests/comparison/cli_options.py index 91dd0ce..885ef84 100644 --- a/tests/comparison/cli_options.py +++ b/tests/comparison/cli_options.py @@ -21,7 +21,12 @@ import logging import os import sys from getpass import getuser +from requests.packages.urllib3.exceptions import ( + InsecurePlatformWarning, + InsecureRequestWarning, + SecurityWarning) from tempfile import gettempdir +from warnings import filterwarnings from tests.comparison import db_connection from tests.comparison.cluster import ( @@ -172,6 +177,10 @@ def add_ssl_options(parser): group.add_argument( '--use-ssl', action='store_true', default=False, help='Use SSL to connect') + group.add_argument( + '--ca_cert', default=None, metavar='CA cert path', + help='Path to optional CA certificate. This is needed to verify SSL requests if ' + 'the Impala certificate is self-signed in a test environment.') def create_cluster(args): @@ -187,6 +196,27 @@ def create_cluster(args): cluster.hadoop_user_name = args.hadoop_user_name cluster.use_kerberos = getattr(args, 'use_kerberos', False) cluster.use_ssl = getattr(args, 'use_ssl', False) + if cluster.use_ssl: + # Prevent excessive warning spam on the console. + # + # The first warning is related to certificates that do not comply with RFC 2818. + # https://github.com/shazow/urllib3/issues/497 . Permit one warning. + filterwarnings( + 'once', + 'Certificate has no `subjectAltName`', + SecurityWarning) + # Permit one warning with unverified HTTPS requests + filterwarnings( + 'once', + 'Unverified HTTPS request is being made', + InsecureRequestWarning) + # TODO: IMPALA-5264 to fix python environment to prevent InsecurePlatformWarning . + # Once we fix that we should remove this suppression. + filterwarnings( + 'once', + 'A true SSLContext object is not available', + InsecurePlatformWarning) + cluster.ca_cert = getattr(args, 'ca_cert', None) return cluster http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/428b5a1b/tests/comparison/cluster.py ---------------------------------------------------------------------- diff --git a/tests/comparison/cluster.py b/tests/comparison/cluster.py index e0328b0..761bc57 100644 --- a/tests/comparison/cluster.py +++ b/tests/comparison/cluster.py @@ -78,6 +78,7 @@ class Cluster(object): self.hadoop_user_name = getuser() self.use_kerberos = False self.use_ssl = False + self.ca_cert = None self._hdfs = None self._yarn = None @@ -491,9 +492,15 @@ class Hive(Service): return self._warehouse_dir def connect(self, db_name=None): - conn = HiveConnection(host_name=self.hs2_host_name, port=self.hs2_port, - user_name=self.cluster.hadoop_user_name, db_name=db_name, - use_kerberos=self.cluster.use_kerberos, use_ssl=self.cluster.use_ssl) + conn = HiveConnection( + host_name=self.hs2_host_name, + port=self.hs2_port, + user_name=self.cluster.hadoop_user_name, + db_name=db_name, + use_kerberos=self.cluster.use_kerberos, + use_ssl=self.cluster.use_ssl, + ca_cert=self.cluster.ca_cert, + ) conn.cluster = self.cluster return conn @@ -525,9 +532,15 @@ class Impala(Service): def connect(self, db_name=None, impalad=None): if not impalad: impalad = choice(self.impalads) - conn = ImpalaConnection(host_name=impalad.host_name, port=impalad.hs2_port, - user_name=self.cluster.hadoop_user_name, db_name=db_name, - use_kerberos=self.cluster.use_kerberos, use_ssl=self.cluster.use_ssl) + conn = ImpalaConnection( + host_name=impalad.host_name, + port=impalad.hs2_port, + user_name=self.cluster.hadoop_user_name, + db_name=db_name, + use_kerberos=self.cluster.use_kerberos, + use_ssl=self.cluster.use_ssl, + ca_cert=self.cluster.ca_cert, + ) conn.cluster = self.cluster return conn @@ -774,9 +787,9 @@ class Impalad(object): port=self.web_ui_port, url=relative_url) try: - # verify=False is needed because of self-signed certifiates - # TODO: support a CA bundle that users could point to instead - resp = requests.get(url, params=params, timeout=timeout_secs, verify=False) + verify_ca = self.cluster.ca_cert if self.cluster.ca_cert is not None else False + resp = requests.get(url, params=params, timeout=timeout_secs, + verify=verify_ca) except requests.exceptions.Timeout as e: raise Timeout(underlying_exception=e) resp.raise_for_status() http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/428b5a1b/tests/comparison/db_connection.py ---------------------------------------------------------------------- diff --git a/tests/comparison/db_connection.py b/tests/comparison/db_connection.py index 6f9b7ce..71786b8 100644 --- a/tests/comparison/db_connection.py +++ b/tests/comparison/db_connection.py @@ -853,10 +853,11 @@ class ImpalaConnection(DbConnection): _KERBEROS_SERVICE_NAME = 'impala' _NON_KERBEROS_AUTH_MECH = 'NOSASL' - def __init__(self, use_kerberos=False, use_ssl=False, **kwargs): + def __init__(self, use_kerberos=False, use_ssl=False, ca_cert=None, **kwargs): self._use_kerberos = use_kerberos self.cluster = None self._use_ssl = use_ssl + self._ca_cert = ca_cert DbConnection.__init__(self, **kwargs) def clone(self, db_name): @@ -887,7 +888,8 @@ class ImpalaConnection(DbConnection): timeout=(60 * 60), auth_mechanism=('GSSAPI' if self._use_kerberos else self._NON_KERBEROS_AUTH_MECH), kerberos_service_name=self._KERBEROS_SERVICE_NAME, - use_ssl=self._use_ssl) + use_ssl=self._use_ssl, + ca_cert=self._ca_cert) class HiveCursor(ImpalaCursor):
