This is an automated email from the ASF dual-hosted git repository. dbecker pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 04bdb4d32c598bd66c4774eac59947e4d95e0173 Author: Gergely Farkas <[email protected]> AuthorDate: Thu Nov 9 09:54:24 2023 +0100 IMPALA-12552: Fix Kerberos authentication issue that occurs in python 3 environment when kerberos_host_fqdn option is used In Pyhton 2, the sasl layer does not accept unicode strings, so we have to explicitly encode the kerberos_host_fqdn string to ascii. However, this is not the case in python 3, where we have to omit the encode, because if we don't do this, impala-shell wants to use the following service principal during Kerberos auth: my_service_name/b'my.kerberos.host.fqdn'@MY.REALM instead of the correct one, which is: my_service_name/[email protected] (This is because the output of the encode function is a byte array in python 3.) Tested with new unit tests and with a snapshot build manually in CDP PVC DS. Change-Id: I8b157d76824ad67faf531a529256a8afe2ab9d49 Reviewed-on: http://gerrit.cloudera.org:8080/20691 Reviewed-by: Michael Smith <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Wenzhe Zhou <[email protected]> --- .../customcluster/KerberosKdcEnvironment.java | 35 +++++++++ .../customcluster/LdapKerberosImpalaShellTest.java | 68 ++++++++++++++-- .../impala/customcluster/RunShellCommand.java | 1 - infra/python/deps/py2-requirements.txt | 1 + infra/python/deps/py3-requirements.txt | 1 + shell/impala_client.py | 6 +- shell/kerberos_util.py | 28 +++++++ shell/make_shell_tarball.sh | 1 + shell/packaging/make_python_package.sh | 1 + .../shell/test_kerberos_util.py | 27 +++---- .../shell/test_shell_commandline_kerberos_auth.py | 91 ++++++++++++++++++++++ 11 files changed, 237 insertions(+), 23 deletions(-) diff --git a/fe/src/test/java/org/apache/impala/customcluster/KerberosKdcEnvironment.java b/fe/src/test/java/org/apache/impala/customcluster/KerberosKdcEnvironment.java index 5f393c3c2..e43d6ed69 100644 --- a/fe/src/test/java/org/apache/impala/customcluster/KerberosKdcEnvironment.java +++ b/fe/src/test/java/org/apache/impala/customcluster/KerberosKdcEnvironment.java @@ -84,11 +84,29 @@ class KerberosKdcEnvironment extends ExternalResource { return servicePrincipal; } + public String getServicePrincipal(String service, String hostname) { + return String.format("%s/%s@%s", service, hostname, realm); + } + public String getServiceKeytabFilePath() throws IOException { return new File(kerbyServer.getWorkDir().getCanonicalPath() + "/impala.keytab") .getCanonicalPath(); } + public String getKeytabFilePathWithServicePrincipal(String service, String hostname) + throws IOException, KrbException { + String servicePrincipal = getServicePrincipal(service, hostname); + + String keytabFileName = String.format("%s.%s.keytab", service, hostname); + File keytabFile = new File(kerbyServer.getWorkDir().getCanonicalPath() + + "/" + keytabFileName); + + kerbyServer.createPrincipal(servicePrincipal, "password"); + kerbyServer.exportPrincipal(servicePrincipal, keytabFile); + + return keytabFile.getCanonicalPath(); + } + public String getKrb5ConfigPath() throws IOException { return kerbyServer.getWorkDir().getCanonicalPath() + "/krb5.conf"; } @@ -128,6 +146,23 @@ class KerberosKdcEnvironment extends ExternalResource { ); } + public Map<String, String> getKerberosAuthFlagsWithCustomServicePrincipal( + String service, String hostname) throws IOException, KrbException { + return ImmutableMap.of( + "principal", getServicePrincipal(service, hostname), // enables Kerberos auth + "keytab_file", getKeytabFilePathWithServicePrincipal(service, hostname), + + // To use the internal Kerberos authentication, the impala daemons must be + // started with a service principal that has a valid hostname, + // which in this test environment can be practically only "localhost", + // but here we want to specify a unique hostname, + // so we need to disable internal Kerberos authentication. + // Another, more complicated option would be to put more service principals + // in the keytab file above and specify the "be_principal" flag accordingly. + "skip_internal_kerberos_auth", "true" + ); + } + private Map<String, String> getClusterEnv() throws IOException { String krb5Config = getKrb5ConfigPath(); return ImmutableMap.of( diff --git a/fe/src/test/java/org/apache/impala/customcluster/LdapKerberosImpalaShellTest.java b/fe/src/test/java/org/apache/impala/customcluster/LdapKerberosImpalaShellTest.java index 4be56f987..3dd27e1c3 100644 --- a/fe/src/test/java/org/apache/impala/customcluster/LdapKerberosImpalaShellTest.java +++ b/fe/src/test/java/org/apache/impala/customcluster/LdapKerberosImpalaShellTest.java @@ -586,12 +586,47 @@ public class LdapKerberosImpalaShellTest extends LdapKerberosImpalaShellTestBase "", ""); } + /** + * Tests Kerberos authentication with the Kerberos hostname override option. + */ + @Test + public void testShellKerberosAuthWithKerberosHostnameOverride() + throws Exception { + String kerberosHostFqdn = "any.host"; + + Map<String, String> flags = mergeFlags( + + // enable Kerberos authentication + kerberosKdcEnvironment.getKerberosAuthFlagsWithCustomServicePrincipal( + "impala", kerberosHostFqdn) + + ); + int ret = startImpalaCluster(flagsToArgs(flags)); + assertEquals(ret, 0); + + // Cluster should pass impala-shell Kerberos auth tests: + // In this test, the impala daemon is running on localhost, and we connect to it + // with impala-shell, but the service principal used in the impala daemon does + // not have a "localhost" hostname, so we need to use the kerberos_host_fqdn option. + testShellKerberosAuthWithUser( kerberosKdcEnvironment, "user", + /* shouldSucceed */ true, kerberosHostFqdn); + } + /** * Tests Kerberos authentication using impala-shell. */ protected void testShellKerberosAuthWithUser( KerberosKdcEnvironment kerberosKdcEnvironment, String username, boolean shouldSucceed) throws Exception { + testShellKerberosAuthWithUser(kerberosKdcEnvironment, username, shouldSucceed, null); + } + + /** + * Tests Kerberos authentication using impala-shell. + */ + protected void testShellKerberosAuthWithUser( + KerberosKdcEnvironment kerberosKdcEnvironment, String username, + boolean shouldSucceed, String kerberosHostFqdn) throws Exception { List<String> protocolsToTest = Arrays.asList("beeswax", "hs2"); if (pythonSupportsSSLContext()) { @@ -605,12 +640,11 @@ public class LdapKerberosImpalaShellTest extends LdapKerberosImpalaShellTestBase kerberosKdcEnvironment.createUserPrincipalAndCredentialsCache(username); for (String protocol : protocolsToTest) { - String[] command = { - "impala-shell.sh", - String.format("--protocol=%s", protocol), - "--kerberos", - "--query=select logged_in_user()" - }; + String[] command = + kerberosHostFqdn == null ? + createCommandForProtocol(protocol) : + createCommandForProtocolAndKerberosHostFqdn(protocol, + kerberosHostFqdn); String expectedOut = shouldSucceed ? kerberosKdcEnvironment.getUserPrincipal(username) : ""; String expectedErr = @@ -622,6 +656,28 @@ public class LdapKerberosImpalaShellTest extends LdapKerberosImpalaShellTestBase } } + private String[] createCommandForProtocol(String protocol) { + String[] command = { + "impala-shell.sh", + String.format("--protocol=%s", protocol), + "--kerberos", + "--query=select logged_in_user()" + }; + return command; + } + + private String[] createCommandForProtocolAndKerberosHostFqdn(String protocol, + String kerberosHostFqdn) { + String[] command = { + "impala-shell.sh", + String.format("--protocol=%s", protocol), + "--kerberos", + String.format("--kerberos_host_fqdn=%s", kerberosHostFqdn), + "--query=select logged_in_user()" + }; + return command; + } + private Map<String, String> getLdapSimpleBindFlags() { String ldapUri = String.format("ldap://localhost:%s", serverRule.getLdapServer().getPort()); diff --git a/fe/src/test/java/org/apache/impala/customcluster/RunShellCommand.java b/fe/src/test/java/org/apache/impala/customcluster/RunShellCommand.java index 9dee84373..c2ba1abe4 100644 --- a/fe/src/test/java/org/apache/impala/customcluster/RunShellCommand.java +++ b/fe/src/test/java/org/apache/impala/customcluster/RunShellCommand.java @@ -22,7 +22,6 @@ import static org.junit.Assert.assertTrue; import java.io.BufferedReader; import java.io.InputStreamReader; -import java.io.IOException; /** * Helper class to run a shell command. diff --git a/infra/python/deps/py2-requirements.txt b/infra/python/deps/py2-requirements.txt index 122b5ab68..919f7d41c 100644 --- a/infra/python/deps/py2-requirements.txt +++ b/infra/python/deps/py2-requirements.txt @@ -32,3 +32,4 @@ flake8 == 3.9.2 contextlib2 == 0.6.0 pathlib2 == 2.3.7.post1 zipp == 1.2.0 +k5test==0.9.2 diff --git a/infra/python/deps/py3-requirements.txt b/infra/python/deps/py3-requirements.txt index d6195a1e8..b61bc461c 100644 --- a/infra/python/deps/py3-requirements.txt +++ b/infra/python/deps/py3-requirements.txt @@ -29,3 +29,4 @@ pylint == 2.10.2 toml == 0.10.2 platformdirs == 2.4.1 typing-extensions == 3.10.0.2 +k5test==0.10.3 diff --git a/shell/impala_client.py b/shell/impala_client.py index a8cd36f0c..ff6af015c 100755 --- a/shell/impala_client.py +++ b/shell/impala_client.py @@ -46,6 +46,7 @@ from TCLIService.TCLIService import (TExecuteStatementReq, TOpenSessionReq, TOperationState, TFetchResultsReq, TFetchOrientation, TGetLogReq, TGetResultSetMetadataReq, TTypeId, TCancelOperationReq, TCloseOperationReq) from ImpalaHttpClient import ImpalaHttpClient +from kerberos_util import get_kerb_host_from_kerberos_host_fqdn from thrift.protocol import TBinaryProtocol from thrift_sasl import TSaslClientTransport from thrift.transport.TSocket import TSocket @@ -432,7 +433,7 @@ class ImpalaClient(object): elif self.use_kerberos or self.kerberos_host_fqdn: # Set the Kerberos service if self.kerberos_host_fqdn is not None: - kerb_host = self.kerberos_host_fqdn.split(':')[0].encode('ascii', 'ignore') + kerb_host = get_kerb_host_from_kerberos_host_fqdn(self.kerberos_host_fqdn) else: kerb_host = self.impalad_host kerb_service = "{0}@{1}".format(self.kerberos_service_name, kerb_host) @@ -463,14 +464,13 @@ class ImpalaClient(object): # Systems. Only attempt to import TSSLSocket if the user wants an SSL connection. from TSSLSocketWithWildcardSAN import TSSLSocketWithWildcardSAN - # sasl does not accept unicode strings, explicitly encode the string into ascii. # The kerberos_host_fqdn option exposes the SASL client's hostname attribute to # the user. impala-shell checks to ensure this host matches the host in the kerberos # principal. So when a load balancer is configured to be used, its hostname is # expected by impala-shell. Setting this option to the load balancer hostname allows # impala-shell to connect directly to an impalad. if self.kerberos_host_fqdn is not None: - sasl_host = self.kerberos_host_fqdn.split(':')[0].encode('ascii', 'ignore') + sasl_host = get_kerb_host_from_kerberos_host_fqdn(self.kerberos_host_fqdn) else: sasl_host = self.impalad_host diff --git a/shell/kerberos_util.py b/shell/kerberos_util.py new file mode 100644 index 000000000..2d7033116 --- /dev/null +++ b/shell/kerberos_util.py @@ -0,0 +1,28 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import sys + + +def get_kerb_host_from_kerberos_host_fqdn(kerberos_host_fqdn): + kerb_host = kerberos_host_fqdn.split(':')[0] + if sys.version_info.major < 3: + # sasl does not accept unicode strings, explicitly encode the string into ascii. + kerb_host = kerb_host.encode('ascii', 'ignore') + return kerb_host diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh index 15b44ffc4..0fcc08b7a 100755 --- a/shell/make_shell_tarball.sh +++ b/shell/make_shell_tarball.sh @@ -167,6 +167,7 @@ cp ${SHELL_HOME}/ImpalaHttpClient.py ${TARBALL_ROOT}/lib cp ${SHELL_HOME}/shell_exceptions.py ${TARBALL_ROOT}/lib cp ${SHELL_HOME}/shell_output.py ${TARBALL_ROOT}/lib cp ${SHELL_HOME}/cookie_util.py ${TARBALL_ROOT}/lib +cp ${SHELL_HOME}/kerberos_util.py ${TARBALL_ROOT}/lib cp ${SHELL_HOME}/value_converter.py ${TARBALL_ROOT}/lib cp ${SHELL_HOME}/impala-shell ${TARBALL_ROOT} cp ${SHELL_HOME}/impala_shell.py ${TARBALL_ROOT} diff --git a/shell/packaging/make_python_package.sh b/shell/packaging/make_python_package.sh index 7bdd9ae74..a5af9d1b3 100755 --- a/shell/packaging/make_python_package.sh +++ b/shell/packaging/make_python_package.sh @@ -60,6 +60,7 @@ assemble_package_files() { cp "${SHELL_HOME}/ImpalaHttpClient.py" "${MODULE_LIB_DIR}" cp "${SHELL_HOME}/shell_exceptions.py" "${MODULE_LIB_DIR}" cp "${SHELL_HOME}/cookie_util.py" "${MODULE_LIB_DIR}" + cp "${SHELL_HOME}/kerberos_util.py" "${MODULE_LIB_DIR}" cp "${SHELL_HOME}/value_converter.py" "${MODULE_LIB_DIR}" cp "${SHELL_HOME}/thrift_printer.py" "${MODULE_LIB_DIR}" diff --git a/infra/python/deps/py3-requirements.txt b/tests/shell/test_kerberos_util.py similarity index 59% copy from infra/python/deps/py3-requirements.txt copy to tests/shell/test_kerberos_util.py index d6195a1e8..5cfad0e1c 100644 --- a/infra/python/deps/py3-requirements.txt +++ b/tests/shell/test_kerberos_util.py @@ -1,3 +1,6 @@ +#!/usr/bin/env impala-python +# -*- coding: utf-8 -*- +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,17 +18,15 @@ # specific language governing permissions and limitations # under the License. -# Python3-only requirements +from __future__ import absolute_import, division, print_function + +import unittest + +from shell.kerberos_util import get_kerb_host_from_kerberos_host_fqdn + -pylint == 2.10.2 - astroid == 2.7.3 - lazy-object-proxy == 1.6.0 - wrapt == 1.12.1 - typed-ast == 1.4.3 - configparser == 4.0.2 - isort == 4.3.21 - futures == 3.3.0; python_version == "2.7" - singledispatch == 3.6.1 - toml == 0.10.2 - platformdirs == 2.4.1 - typing-extensions == 3.10.0.2 +class TestKerberosUtil(unittest.TestCase): + def test_get_kerb_host_from_kerberos_host_fqdn(self): + assert isinstance(get_kerb_host_from_kerberos_host_fqdn("any.host:1234"), str) + assert get_kerb_host_from_kerberos_host_fqdn("any.host:1234") == "any.host" + assert get_kerb_host_from_kerberos_host_fqdn("any.host") == "any.host" diff --git a/tests/shell/test_shell_commandline_kerberos_auth.py b/tests/shell/test_shell_commandline_kerberos_auth.py new file mode 100644 index 000000000..3c302cfa5 --- /dev/null +++ b/tests/shell/test_shell_commandline_kerberos_auth.py @@ -0,0 +1,91 @@ +#!/usr/bin/env impala-python +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import absolute_import, division, print_function +import pytest + +from tests.common.impala_test_suite import ImpalaTestSuite +from tests.common.test_dimensions import create_client_protocol_http_transport +from tests.shell.util import create_impala_shell_executable_dimension +from tests.shell.util import run_impala_shell_cmd +from k5test import K5Realm + + +class TestImpalaShellKerberosAuth(ImpalaTestSuite): + + @classmethod + def get_workload(self): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + """Overrides all other add_dimension methods in super classes up the entire class + hierarchy ensuring that each test in this class only get run once + on different python versions.""" + cls.ImpalaTestMatrix.add_dimension(create_client_protocol_http_transport()) + cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension()) + + @pytest.mark.execute_serially + def test_kerberos_host_fqdn_option(self, vector): + """ + This test checks whether impala-shell uses the hostname specified in + the kerberos_host_fqdn option when looking for a service principal + for Kerberos authentication. + + Note: Since the Kerberos authentication is not enabled in the python + test environment, the connection will fail for sure, but the Kerberos + log can be used to check if the correct service principal is used. + """ + realm = None + try: + realm = self._create_kerberos_realm_and_user("testuser", "password") + env = { + "KRB5CCNAME": "FILE:" + realm.ccache, # Ticket cache created by kinit + "KRB5_TRACE": "/dev/stderr", # Krb log to validate the principals + } + result = run_impala_shell_cmd(vector, ['--kerberos', + '--connect_max_tries=1', + '--protocol=hs2-http', + '--kerberos_host_fqdn=any.host', + '--quiet'], env=env) + + assert "[email protected]" in result.stderr, \ + "Principal '[email protected]' should be in the Kerberos log" + assert "impala/[email protected]" in result.stderr, \ + "Principal 'impala/[email protected]' should be in the Kerberos log" + finally: + realm.stop_kdc() + + def _create_kerberos_realm_and_user(self, principal, password): + """ + Initializes a test Kerberos realm, creates a new user principal, + and runs kinit to get a Kerberos ticket. + + Args: + principal (str): Name of the new Kerberos user principal. + password (str): Password of the new Kerberos user principal. + + Returns: + realm (K5Realm): The Kerberos realm. + """ + realm = K5Realm(create_host=False, get_creds=False) + realm.addprinc(principal, password) + realm.kinit(principal, password) + return realm
