This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 04bdb4d32c598bd66c4774eac59947e4d95e0173
Author: Gergely Farkas <[email protected]>
AuthorDate: Thu Nov 9 09:54:24 2023 +0100

    IMPALA-12552: Fix Kerberos authentication issue that occurs
    in python 3 environment when kerberos_host_fqdn option is used
    
    In Pyhton 2, the sasl layer does not accept unicode strings,
    so we have to explicitly encode the kerberos_host_fqdn string
    to ascii. However, this is not the case in python 3, where
    we have to omit the encode, because if we don't do this,
    impala-shell wants to use the following service principal
    during Kerberos auth:
    my_service_name/b'my.kerberos.host.fqdn'@MY.REALM
    instead of the correct one, which is:
    my_service_name/[email protected]
    (This is because the output of the encode function
    is a byte array in python 3.)
    
    Tested with new unit tests and with a snapshot build
    manually in CDP PVC DS.
    
    Change-Id: I8b157d76824ad67faf531a529256a8afe2ab9d49
    Reviewed-on: http://gerrit.cloudera.org:8080/20691
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Wenzhe Zhou <[email protected]>
---
 .../customcluster/KerberosKdcEnvironment.java      | 35 +++++++++
 .../customcluster/LdapKerberosImpalaShellTest.java | 68 ++++++++++++++--
 .../impala/customcluster/RunShellCommand.java      |  1 -
 infra/python/deps/py2-requirements.txt             |  1 +
 infra/python/deps/py3-requirements.txt             |  1 +
 shell/impala_client.py                             |  6 +-
 shell/kerberos_util.py                             | 28 +++++++
 shell/make_shell_tarball.sh                        |  1 +
 shell/packaging/make_python_package.sh             |  1 +
 .../shell/test_kerberos_util.py                    | 27 +++----
 .../shell/test_shell_commandline_kerberos_auth.py  | 91 ++++++++++++++++++++++
 11 files changed, 237 insertions(+), 23 deletions(-)

diff --git 
a/fe/src/test/java/org/apache/impala/customcluster/KerberosKdcEnvironment.java 
b/fe/src/test/java/org/apache/impala/customcluster/KerberosKdcEnvironment.java
index 5f393c3c2..e43d6ed69 100644
--- 
a/fe/src/test/java/org/apache/impala/customcluster/KerberosKdcEnvironment.java
+++ 
b/fe/src/test/java/org/apache/impala/customcluster/KerberosKdcEnvironment.java
@@ -84,11 +84,29 @@ class KerberosKdcEnvironment extends ExternalResource {
     return servicePrincipal;
   }
 
+  public String getServicePrincipal(String service, String hostname) {
+    return String.format("%s/%s@%s", service, hostname, realm);
+  }
+
   public String getServiceKeytabFilePath() throws IOException {
     return new File(kerbyServer.getWorkDir().getCanonicalPath() + 
"/impala.keytab")
             .getCanonicalPath();
   }
 
+  public String getKeytabFilePathWithServicePrincipal(String service, String 
hostname)
+          throws IOException, KrbException {
+    String servicePrincipal = getServicePrincipal(service, hostname);
+
+    String keytabFileName = String.format("%s.%s.keytab", service, hostname);
+    File keytabFile = new File(kerbyServer.getWorkDir().getCanonicalPath() +
+            "/" + keytabFileName);
+
+    kerbyServer.createPrincipal(servicePrincipal, "password");
+    kerbyServer.exportPrincipal(servicePrincipal, keytabFile);
+
+    return keytabFile.getCanonicalPath();
+  }
+
   public String getKrb5ConfigPath() throws IOException {
     return kerbyServer.getWorkDir().getCanonicalPath() + "/krb5.conf";
   }
@@ -128,6 +146,23 @@ class KerberosKdcEnvironment extends ExternalResource {
     );
   }
 
+  public Map<String, String> getKerberosAuthFlagsWithCustomServicePrincipal(
+          String service, String hostname) throws IOException, KrbException {
+    return ImmutableMap.of(
+            "principal", getServicePrincipal(service, hostname), // enables 
Kerberos auth
+            "keytab_file", getKeytabFilePathWithServicePrincipal(service, 
hostname),
+
+            // To use the internal Kerberos authentication, the impala daemons 
must be
+            // started with a service principal that has a valid hostname,
+            // which in this test environment can be practically only 
"localhost",
+            // but here we want to specify a unique hostname,
+            // so we need to disable internal Kerberos authentication.
+            // Another, more complicated option would be to put more service 
principals
+            // in the keytab file above and specify the "be_principal" flag 
accordingly.
+            "skip_internal_kerberos_auth", "true"
+    );
+  }
+
   private Map<String, String> getClusterEnv() throws IOException {
     String krb5Config = getKrb5ConfigPath();
     return ImmutableMap.of(
diff --git 
a/fe/src/test/java/org/apache/impala/customcluster/LdapKerberosImpalaShellTest.java
 
b/fe/src/test/java/org/apache/impala/customcluster/LdapKerberosImpalaShellTest.java
index 4be56f987..3dd27e1c3 100644
--- 
a/fe/src/test/java/org/apache/impala/customcluster/LdapKerberosImpalaShellTest.java
+++ 
b/fe/src/test/java/org/apache/impala/customcluster/LdapKerberosImpalaShellTest.java
@@ -586,12 +586,47 @@ public class LdapKerberosImpalaShellTest extends 
LdapKerberosImpalaShellTestBase
             "", "");
   }
 
+  /**
+   * Tests Kerberos authentication with the Kerberos hostname override option.
+   */
+  @Test
+  public void testShellKerberosAuthWithKerberosHostnameOverride()
+          throws Exception {
+    String kerberosHostFqdn = "any.host";
+
+    Map<String, String> flags = mergeFlags(
+
+            // enable Kerberos authentication
+            
kerberosKdcEnvironment.getKerberosAuthFlagsWithCustomServicePrincipal(
+                    "impala", kerberosHostFqdn)
+
+    );
+    int ret = startImpalaCluster(flagsToArgs(flags));
+    assertEquals(ret, 0);
+
+    // Cluster should pass impala-shell Kerberos auth tests:
+    // In this test, the impala daemon is running on localhost, and we connect 
to it
+    // with impala-shell, but the service principal used in the impala daemon 
does
+    // not have a "localhost" hostname, so we need to use the 
kerberos_host_fqdn option.
+    testShellKerberosAuthWithUser( kerberosKdcEnvironment, "user",
+            /* shouldSucceed */ true, kerberosHostFqdn);
+  }
+
   /**
    * Tests Kerberos authentication using impala-shell.
    */
   protected void testShellKerberosAuthWithUser(
           KerberosKdcEnvironment kerberosKdcEnvironment, String username,
           boolean shouldSucceed) throws Exception {
+    testShellKerberosAuthWithUser(kerberosKdcEnvironment, username, 
shouldSucceed, null);
+  }
+
+  /**
+   * Tests Kerberos authentication using impala-shell.
+   */
+  protected void testShellKerberosAuthWithUser(
+          KerberosKdcEnvironment kerberosKdcEnvironment, String username,
+          boolean shouldSucceed, String kerberosHostFqdn) throws Exception {
 
     List<String> protocolsToTest = Arrays.asList("beeswax", "hs2");
     if (pythonSupportsSSLContext()) {
@@ -605,12 +640,11 @@ public class LdapKerberosImpalaShellTest extends 
LdapKerberosImpalaShellTestBase
             
kerberosKdcEnvironment.createUserPrincipalAndCredentialsCache(username);
 
     for (String protocol : protocolsToTest) {
-      String[] command = {
-              "impala-shell.sh",
-              String.format("--protocol=%s", protocol),
-              "--kerberos",
-              "--query=select logged_in_user()"
-      };
+      String[] command =
+              kerberosHostFqdn == null ?
+                      createCommandForProtocol(protocol) :
+                      createCommandForProtocolAndKerberosHostFqdn(protocol,
+                              kerberosHostFqdn);
       String expectedOut =
               shouldSucceed ? 
kerberosKdcEnvironment.getUserPrincipal(username) : "";
       String expectedErr =
@@ -622,6 +656,28 @@ public class LdapKerberosImpalaShellTest extends 
LdapKerberosImpalaShellTestBase
     }
   }
 
+  private String[] createCommandForProtocol(String protocol) {
+    String[] command = {
+            "impala-shell.sh",
+            String.format("--protocol=%s", protocol),
+            "--kerberos",
+            "--query=select logged_in_user()"
+    };
+    return command;
+  }
+
+  private String[] createCommandForProtocolAndKerberosHostFqdn(String protocol,
+                                                               String 
kerberosHostFqdn) {
+    String[] command = {
+            "impala-shell.sh",
+            String.format("--protocol=%s", protocol),
+            "--kerberos",
+            String.format("--kerberos_host_fqdn=%s", kerberosHostFqdn),
+            "--query=select logged_in_user()"
+    };
+    return command;
+  }
+
   private Map<String, String> getLdapSimpleBindFlags() {
     String ldapUri = String.format("ldap://localhost:%s";,
             serverRule.getLdapServer().getPort());
diff --git 
a/fe/src/test/java/org/apache/impala/customcluster/RunShellCommand.java 
b/fe/src/test/java/org/apache/impala/customcluster/RunShellCommand.java
index 9dee84373..c2ba1abe4 100644
--- a/fe/src/test/java/org/apache/impala/customcluster/RunShellCommand.java
+++ b/fe/src/test/java/org/apache/impala/customcluster/RunShellCommand.java
@@ -22,7 +22,6 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
-import java.io.IOException;
 
 /**
  * Helper class to run a shell command.
diff --git a/infra/python/deps/py2-requirements.txt 
b/infra/python/deps/py2-requirements.txt
index 122b5ab68..919f7d41c 100644
--- a/infra/python/deps/py2-requirements.txt
+++ b/infra/python/deps/py2-requirements.txt
@@ -32,3 +32,4 @@ flake8 == 3.9.2
     contextlib2 == 0.6.0
     pathlib2 == 2.3.7.post1
     zipp == 1.2.0
+k5test==0.9.2
diff --git a/infra/python/deps/py3-requirements.txt 
b/infra/python/deps/py3-requirements.txt
index d6195a1e8..b61bc461c 100644
--- a/infra/python/deps/py3-requirements.txt
+++ b/infra/python/deps/py3-requirements.txt
@@ -29,3 +29,4 @@ pylint == 2.10.2
   toml == 0.10.2
   platformdirs == 2.4.1
   typing-extensions == 3.10.0.2
+k5test==0.10.3
diff --git a/shell/impala_client.py b/shell/impala_client.py
index a8cd36f0c..ff6af015c 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -46,6 +46,7 @@ from TCLIService.TCLIService import (TExecuteStatementReq, 
TOpenSessionReq,
     TOperationState, TFetchResultsReq, TFetchOrientation, TGetLogReq,
     TGetResultSetMetadataReq, TTypeId, TCancelOperationReq, TCloseOperationReq)
 from ImpalaHttpClient import ImpalaHttpClient
+from kerberos_util import get_kerb_host_from_kerberos_host_fqdn
 from thrift.protocol import TBinaryProtocol
 from thrift_sasl import TSaslClientTransport
 from thrift.transport.TSocket import TSocket
@@ -432,7 +433,7 @@ class ImpalaClient(object):
     elif self.use_kerberos or self.kerberos_host_fqdn:
       # Set the Kerberos service
       if self.kerberos_host_fqdn is not None:
-        kerb_host = self.kerberos_host_fqdn.split(':')[0].encode('ascii', 
'ignore')
+        kerb_host = 
get_kerb_host_from_kerberos_host_fqdn(self.kerberos_host_fqdn)
       else:
         kerb_host = self.impalad_host
       kerb_service = "{0}@{1}".format(self.kerberos_service_name, kerb_host)
@@ -463,14 +464,13 @@ class ImpalaClient(object):
       # Systems. Only attempt to import TSSLSocket if the user wants an SSL 
connection.
       from TSSLSocketWithWildcardSAN import TSSLSocketWithWildcardSAN
 
-    # sasl does not accept unicode strings, explicitly encode the string into 
ascii.
     # The kerberos_host_fqdn option exposes the SASL client's hostname 
attribute to
     # the user. impala-shell checks to ensure this host matches the host in 
the kerberos
     # principal. So when a load balancer is configured to be used, its 
hostname is
     # expected by impala-shell. Setting this option to the load balancer 
hostname allows
     # impala-shell to connect directly to an impalad.
     if self.kerberos_host_fqdn is not None:
-      sasl_host = self.kerberos_host_fqdn.split(':')[0].encode('ascii', 
'ignore')
+      sasl_host = 
get_kerb_host_from_kerberos_host_fqdn(self.kerberos_host_fqdn)
     else:
       sasl_host = self.impalad_host
 
diff --git a/shell/kerberos_util.py b/shell/kerberos_util.py
new file mode 100644
index 000000000..2d7033116
--- /dev/null
+++ b/shell/kerberos_util.py
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import sys
+
+
+def get_kerb_host_from_kerberos_host_fqdn(kerberos_host_fqdn):
+  kerb_host = kerberos_host_fqdn.split(':')[0]
+  if sys.version_info.major < 3:
+    # sasl does not accept unicode strings, explicitly encode the string into 
ascii.
+    kerb_host = kerb_host.encode('ascii', 'ignore')
+  return kerb_host
diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh
index 15b44ffc4..0fcc08b7a 100755
--- a/shell/make_shell_tarball.sh
+++ b/shell/make_shell_tarball.sh
@@ -167,6 +167,7 @@ cp ${SHELL_HOME}/ImpalaHttpClient.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/shell_exceptions.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/shell_output.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/cookie_util.py ${TARBALL_ROOT}/lib
+cp ${SHELL_HOME}/kerberos_util.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/value_converter.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/impala-shell ${TARBALL_ROOT}
 cp ${SHELL_HOME}/impala_shell.py ${TARBALL_ROOT}
diff --git a/shell/packaging/make_python_package.sh 
b/shell/packaging/make_python_package.sh
index 7bdd9ae74..a5af9d1b3 100755
--- a/shell/packaging/make_python_package.sh
+++ b/shell/packaging/make_python_package.sh
@@ -60,6 +60,7 @@ assemble_package_files() {
   cp "${SHELL_HOME}/ImpalaHttpClient.py" "${MODULE_LIB_DIR}"
   cp "${SHELL_HOME}/shell_exceptions.py" "${MODULE_LIB_DIR}"
   cp "${SHELL_HOME}/cookie_util.py" "${MODULE_LIB_DIR}"
+  cp "${SHELL_HOME}/kerberos_util.py" "${MODULE_LIB_DIR}"
   cp "${SHELL_HOME}/value_converter.py" "${MODULE_LIB_DIR}"
   cp "${SHELL_HOME}/thrift_printer.py" "${MODULE_LIB_DIR}"
 
diff --git a/infra/python/deps/py3-requirements.txt 
b/tests/shell/test_kerberos_util.py
similarity index 59%
copy from infra/python/deps/py3-requirements.txt
copy to tests/shell/test_kerberos_util.py
index d6195a1e8..5cfad0e1c 100644
--- a/infra/python/deps/py3-requirements.txt
+++ b/tests/shell/test_kerberos_util.py
@@ -1,3 +1,6 @@
+#!/usr/bin/env impala-python
+# -*- coding: utf-8 -*-
+#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -15,17 +18,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Python3-only requirements
+from __future__ import absolute_import, division, print_function
+
+import unittest
+
+from shell.kerberos_util import get_kerb_host_from_kerberos_host_fqdn
+
 
-pylint == 2.10.2
-  astroid == 2.7.3
-    lazy-object-proxy == 1.6.0
-    wrapt == 1.12.1
-    typed-ast == 1.4.3
-  configparser == 4.0.2
-  isort == 4.3.21
-    futures == 3.3.0; python_version == "2.7"
-  singledispatch == 3.6.1
-  toml == 0.10.2
-  platformdirs == 2.4.1
-  typing-extensions == 3.10.0.2
+class TestKerberosUtil(unittest.TestCase):
+  def test_get_kerb_host_from_kerberos_host_fqdn(self):
+    assert isinstance(get_kerb_host_from_kerberos_host_fqdn("any.host:1234"), 
str)
+    assert get_kerb_host_from_kerberos_host_fqdn("any.host:1234") == "any.host"
+    assert get_kerb_host_from_kerberos_host_fqdn("any.host") == "any.host"
diff --git a/tests/shell/test_shell_commandline_kerberos_auth.py 
b/tests/shell/test_shell_commandline_kerberos_auth.py
new file mode 100644
index 000000000..3c302cfa5
--- /dev/null
+++ b/tests/shell/test_shell_commandline_kerberos_auth.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env impala-python
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import absolute_import, division, print_function
+import pytest
+
+from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.test_dimensions import create_client_protocol_http_transport
+from tests.shell.util import create_impala_shell_executable_dimension
+from tests.shell.util import run_impala_shell_cmd
+from k5test import K5Realm
+
+
+class TestImpalaShellKerberosAuth(ImpalaTestSuite):
+
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    """Overrides all other add_dimension methods in super classes up the 
entire class
+    hierarchy ensuring that each test in this class only get run once
+    on different python versions."""
+    cls.ImpalaTestMatrix.add_dimension(create_client_protocol_http_transport())
+    
cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
+
+  @pytest.mark.execute_serially
+  def test_kerberos_host_fqdn_option(self, vector):
+    """
+    This test checks whether impala-shell uses the hostname specified in
+    the kerberos_host_fqdn option when looking for a service principal
+    for Kerberos authentication.
+
+    Note: Since the Kerberos authentication is not enabled in the python
+    test environment, the connection will fail for sure, but the Kerberos
+    log can be used to check if the correct service principal is used.
+    """
+    realm = None
+    try:
+      realm = self._create_kerberos_realm_and_user("testuser", "password")
+      env = {
+        "KRB5CCNAME": "FILE:" + realm.ccache,  # Ticket cache created by kinit
+        "KRB5_TRACE": "/dev/stderr",           # Krb log to validate the 
principals
+      }
+      result = run_impala_shell_cmd(vector, ['--kerberos',
+                                    '--connect_max_tries=1',
+                                    '--protocol=hs2-http',
+                                    '--kerberos_host_fqdn=any.host',
+                                    '--quiet'], env=env)
+
+      assert "[email protected]" in result.stderr, \
+        "Principal '[email protected]' should be in the Kerberos log"
+      assert "impala/[email protected]" in result.stderr, \
+        "Principal 'impala/[email protected]' should be in the Kerberos log"
+    finally:
+      realm.stop_kdc()
+
+  def _create_kerberos_realm_and_user(self, principal, password):
+    """
+    Initializes a test Kerberos realm, creates a new user principal,
+    and runs kinit to get a Kerberos ticket.
+
+    Args:
+      principal (str): Name of the new Kerberos user principal.
+      password (str): Password of the new Kerberos user principal.
+
+    Returns:
+      realm (K5Realm): The Kerberos realm.
+    """
+    realm = K5Realm(create_host=False, get_creds=False)
+    realm.addprinc(principal, password)
+    realm.kinit(principal, password)
+    return realm

Reply via email to