IMPALA-5263: test infra: support CA bundles with secure clusters

This patch adds the command line option --ca_cert to the common test
infra CLI options for use alongside --use-ssl. This is useful when
testing against a secured Impala cluster in which the SSL certs are
self-signed. This will allow the SSL request to be validated. Using this
option will also suppress noisy console warnings like:

  InsecureRequestWarning: Unverified HTTPS request is being made. Adding
  certificate verification is strongly advised. See:
  https://urllib3.readthedocs.org/en/latest/security.html

We also go further in this patch and use the warnings module to print
these SSL-related warnings once and only once, instead of all over the
place. In the case of the stress test, this greatly reduces the noise in
the console log.

Testing:
- quick concurrent_select.py calls with and without --ca_cert to observe
  that connections still get made and the test runs smoothly. Some of
  this testing occurred without warning suppression, so that I could be
  sure the InsecureRequestWarnings were not occurring when using
  --ca_cert anymore.
- ensured warnings are printed once, not multiple times

Change-Id: Ifb9e466e4b7cde704cdc4cf98159c068c0a400a9
Reviewed-on: http://gerrit.cloudera.org:8080/7152
Reviewed-by: David Knupp <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/428b5a1b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/428b5a1b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/428b5a1b

Branch: refs/heads/master
Commit: 428b5a1bfe5e8a533db95c98f8ffbc1f825cdcef
Parents: 467ccd1
Author: Michael Brown <[email protected]>
Authored: Sat Jun 10 16:35:00 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Tue Jun 13 19:25:57 2017 +0000

----------------------------------------------------------------------
 tests/comparison/cli_options.py   | 30 ++++++++++++++++++++++++++++++
 tests/comparison/cluster.py       | 31 ++++++++++++++++++++++---------
 tests/comparison/db_connection.py |  6 ++++--
 3 files changed, 56 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/428b5a1b/tests/comparison/cli_options.py
----------------------------------------------------------------------
diff --git a/tests/comparison/cli_options.py b/tests/comparison/cli_options.py
index 91dd0ce..885ef84 100644
--- a/tests/comparison/cli_options.py
+++ b/tests/comparison/cli_options.py
@@ -21,7 +21,12 @@ import logging
 import os
 import sys
 from getpass import getuser
+from requests.packages.urllib3.exceptions import (
+    InsecurePlatformWarning,
+    InsecureRequestWarning,
+    SecurityWarning)
 from tempfile import gettempdir
+from warnings import filterwarnings
 
 from tests.comparison import db_connection
 from tests.comparison.cluster import (
@@ -172,6 +177,10 @@ def add_ssl_options(parser):
   group.add_argument(
       '--use-ssl', action='store_true', default=False,
       help='Use SSL to connect')
+  group.add_argument(
+      '--ca_cert', default=None, metavar='CA cert path',
+      help='Path to optional CA certificate. This is needed to verify SSL 
requests if '
+           'the Impala certificate is self-signed in a test environment.')
 
 
 def create_cluster(args):
@@ -187,6 +196,27 @@ def create_cluster(args):
   cluster.hadoop_user_name = args.hadoop_user_name
   cluster.use_kerberos = getattr(args, 'use_kerberos', False)
   cluster.use_ssl = getattr(args, 'use_ssl', False)
+  if cluster.use_ssl:
+    # Prevent excessive warning spam on the console.
+    #
+    # The first warning is related to certificates that do not comply with RFC 
2818.
+    # https://github.com/shazow/urllib3/issues/497 . Permit one warning.
+    filterwarnings(
+        'once',
+        'Certificate has no `subjectAltName`',
+        SecurityWarning)
+    # Permit one warning with unverified HTTPS requests
+    filterwarnings(
+        'once',
+        'Unverified HTTPS request is being made',
+        InsecureRequestWarning)
+    # TODO: IMPALA-5264 to fix python environment to prevent 
InsecurePlatformWarning .
+    # Once we fix that we should remove this suppression.
+    filterwarnings(
+        'once',
+        'A true SSLContext object is not available',
+        InsecurePlatformWarning)
+  cluster.ca_cert = getattr(args, 'ca_cert', None)
   return cluster
 
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/428b5a1b/tests/comparison/cluster.py
----------------------------------------------------------------------
diff --git a/tests/comparison/cluster.py b/tests/comparison/cluster.py
index e0328b0..761bc57 100644
--- a/tests/comparison/cluster.py
+++ b/tests/comparison/cluster.py
@@ -78,6 +78,7 @@ class Cluster(object):
     self.hadoop_user_name = getuser()
     self.use_kerberos = False
     self.use_ssl = False
+    self.ca_cert = None
 
     self._hdfs = None
     self._yarn = None
@@ -491,9 +492,15 @@ class Hive(Service):
     return self._warehouse_dir
 
   def connect(self, db_name=None):
-    conn = HiveConnection(host_name=self.hs2_host_name, port=self.hs2_port,
-        user_name=self.cluster.hadoop_user_name, db_name=db_name,
-        use_kerberos=self.cluster.use_kerberos, use_ssl=self.cluster.use_ssl)
+    conn = HiveConnection(
+        host_name=self.hs2_host_name,
+        port=self.hs2_port,
+        user_name=self.cluster.hadoop_user_name,
+        db_name=db_name,
+        use_kerberos=self.cluster.use_kerberos,
+        use_ssl=self.cluster.use_ssl,
+        ca_cert=self.cluster.ca_cert,
+    )
     conn.cluster = self.cluster
     return conn
 
@@ -525,9 +532,15 @@ class Impala(Service):
   def connect(self, db_name=None, impalad=None):
     if not impalad:
       impalad = choice(self.impalads)
-    conn = ImpalaConnection(host_name=impalad.host_name, port=impalad.hs2_port,
-        user_name=self.cluster.hadoop_user_name, db_name=db_name,
-        use_kerberos=self.cluster.use_kerberos, use_ssl=self.cluster.use_ssl)
+    conn = ImpalaConnection(
+        host_name=impalad.host_name,
+        port=impalad.hs2_port,
+        user_name=self.cluster.hadoop_user_name,
+        db_name=db_name,
+        use_kerberos=self.cluster.use_kerberos,
+        use_ssl=self.cluster.use_ssl,
+        ca_cert=self.cluster.ca_cert,
+    )
     conn.cluster = self.cluster
     return conn
 
@@ -774,9 +787,9 @@ class Impalad(object):
         port=self.web_ui_port,
         url=relative_url)
     try:
-      # verify=False is needed because of self-signed certifiates
-      # TODO: support a CA bundle that users could point to instead
-      resp = requests.get(url, params=params, timeout=timeout_secs, 
verify=False)
+      verify_ca = self.cluster.ca_cert if self.cluster.ca_cert is not None 
else False
+      resp = requests.get(url, params=params, timeout=timeout_secs,
+                          verify=verify_ca)
     except requests.exceptions.Timeout as e:
       raise Timeout(underlying_exception=e)
     resp.raise_for_status()

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/428b5a1b/tests/comparison/db_connection.py
----------------------------------------------------------------------
diff --git a/tests/comparison/db_connection.py 
b/tests/comparison/db_connection.py
index 6f9b7ce..71786b8 100644
--- a/tests/comparison/db_connection.py
+++ b/tests/comparison/db_connection.py
@@ -853,10 +853,11 @@ class ImpalaConnection(DbConnection):
   _KERBEROS_SERVICE_NAME = 'impala'
   _NON_KERBEROS_AUTH_MECH = 'NOSASL'
 
-  def __init__(self, use_kerberos=False, use_ssl=False, **kwargs):
+  def __init__(self, use_kerberos=False, use_ssl=False, ca_cert=None, 
**kwargs):
     self._use_kerberos = use_kerberos
     self.cluster = None
     self._use_ssl = use_ssl
+    self._ca_cert = ca_cert
     DbConnection.__init__(self, **kwargs)
 
   def clone(self, db_name):
@@ -887,7 +888,8 @@ class ImpalaConnection(DbConnection):
         timeout=(60 * 60),
         auth_mechanism=('GSSAPI' if self._use_kerberos else 
self._NON_KERBEROS_AUTH_MECH),
         kerberos_service_name=self._KERBEROS_SERVICE_NAME,
-        use_ssl=self._use_ssl)
+        use_ssl=self._use_ssl,
+        ca_cert=self._ca_cert)
 
 
 class HiveCursor(ImpalaCursor):

Reply via email to