IMPALA-3820: Handle linkage errors while loading Java UDFs in Catalog Currently Catalog aborts at startup if the class loader loading Java UDFs throws a LinkageError (e.g., NoClassDefError). The code has been designed to catch a generic "Exception" and ignore any incompatible or erroneous UDFs but it can't handle any "Error"s. Java generally doesn't encourage to handle all types of "Error"s, however given the Catalog tries to load thirdparty udfs, it is possible that there might be missing dependencies that the Catalog is expected to handle.
Testing: Added an e-e test that loads a corrupt Java UDF from Hive and restarts Catalog to make sure its up and the function count tallies. Change-Id: I1109614f1617caa9bb27e8c151e902aae71a6b41 Reviewed-on: http://gerrit.cloudera.org:8080/4092 Reviewed-by: Bharath Vissapragada <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/deaccbb9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/deaccbb9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/deaccbb9 Branch: refs/heads/master Commit: deaccbb967138afacbe261e6e4fb81312688b09e Parents: 8fb1598 Author: Bharath Vissapragada <[email protected]> Authored: Tue Jul 5 02:46:26 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Tue Aug 23 12:20:44 2016 +0000 ---------------------------------------------------------------------- .../impala/catalog/CatalogServiceCatalog.java | 8 +++- .../impala/hive/executor/UdfExecutor.java | 2 +- tests/custom_cluster/test_permanent_udfs.py | 20 ++++++++++ tests/test-hive-udfs/pom.xml | 10 +++++ .../java/com/cloudera/impala/UnresolvedUdf.java | 39 ++++++++++++++++++++ 5 files changed, 77 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java index ae92996..f0993bb 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java @@ -60,6 +60,7 @@ import com.cloudera.impala.common.ImpalaException; import com.cloudera.impala.common.ImpalaRuntimeException; import com.cloudera.impala.common.JniUtil; import com.cloudera.impala.common.Pair; +import com.cloudera.impala.hive.executor.UdfExecutor; import com.cloudera.impala.thrift.TCatalog; import com.cloudera.impala.thrift.TCatalogObject; import com.cloudera.impala.thrift.TCatalogObjectType; @@ -458,7 +459,7 @@ public class CatalogServiceCatalog extends Catalog { // Load each method in the UDF class and create the corresponding Impala Function // object. for (Method m: udfClass.getMethods()) { - if (!m.getName().equals("evaluate")) continue; + if (!m.getName().equals(UdfExecutor.UDF_FUNCTION_NAME)) continue; Function fn = ScalarFunction.fromHiveFunction(db, function.getFunctionName(), function.getClassName(), m.getParameterTypes(), m.getReturnType(), jarUri); @@ -481,6 +482,11 @@ public class CatalogServiceCatalog extends Catalog { } catch (Exception e) { LOG.error("Skipping function load: " + function.getFunctionName(), e); throw new ImpalaRuntimeException("Error extracting functions", e); + } catch (LinkageError e) { + String errorMsg = "Error resolving dependencies for Java function: " + db + "." + + function.getFunctionName(); + LOG.error(errorMsg); + throw new ImpalaRuntimeException(errorMsg, e); } return result; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java b/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java index bdfe4cd..242c704 100644 --- a/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java +++ b/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java @@ -61,7 +61,7 @@ import com.google.common.collect.Lists; public class UdfExecutor { private static final Logger LOG = Logger.getLogger(UdfExecutor.class); // By convention, the function in the class must be called evaluate() - private static final String UDF_FUNCTION_NAME = "evaluate"; + public static final String UDF_FUNCTION_NAME = "evaluate"; // Object to deserialize ctor params from BE. private final static TBinaryProtocol.Factory PROTOCOL_FACTORY = http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/tests/custom_cluster/test_permanent_udfs.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_permanent_udfs.py b/tests/custom_cluster/test_permanent_udfs.py index 28b4251..2f2457a 100644 --- a/tests/custom_cluster/test_permanent_udfs.py +++ b/tests/custom_cluster/test_permanent_udfs.py @@ -156,6 +156,26 @@ class TestUdfPersistence(CustomClusterTestSuite): @SkipIfS3.hive @SkipIfLocal.hive @pytest.mark.execute_serially + def test_corrupt_java_udf(self): + """ IMPALA-3820: This tests if the Catalog server can gracefully handle + Java UDFs with unresolved dependencies.""" + if self.exploration_strategy() != 'exhaustive': pytest.skip() + # Create a Java UDF with unresolved dependencies from Hive and + # restart the Catalog server. Catalog should ignore the + # function load. + self.run_stmt_in_hive("create function %s.corrupt_udf as \ + 'com.cloudera.impala.UnresolvedUdf' using jar '%s'" + % (self.JAVA_FN_TEST_DB, self.JAVA_UDF_JAR)) + self.__restart_cluster() + # Make sure the function count is 0 + self.verify_function_count( + "SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0) + + + @SkipIfIsilon.hive + @SkipIfS3.hive + @SkipIfLocal.hive + @pytest.mark.execute_serially def test_java_udfs_hive_integration(self): ''' This test checks the integration between Hive and Impala on CREATE FUNCTION and DROP FUNCTION statements for persistent Java UDFs. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/tests/test-hive-udfs/pom.xml ---------------------------------------------------------------------- diff --git a/tests/test-hive-udfs/pom.xml b/tests/test-hive-udfs/pom.xml index 11c3096..002c52a 100644 --- a/tests/test-hive-udfs/pom.xml +++ b/tests/test-hive-udfs/pom.xml @@ -26,6 +26,11 @@ <artifactId>hive-exec</artifactId> <version>${hive.version}</version> </dependency> + <dependency> + <groupId>com.googlecode.libphonenumber</groupId> + <artifactId>libphonenumber</artifactId> + <version>4.0</version> + </dependency> </dependencies> <build> @@ -75,5 +80,10 @@ <enabled>false</enabled> </snapshots> </repository> + <repository> + <id>com.google.libphonenumber</id> + <url>http://repo1.maven.org/maven2/com/googlecode/libphonenumber</url> + <name>Google phone number library</name> + </repository> </repositories> </project> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/tests/test-hive-udfs/src/main/java/com/cloudera/impala/UnresolvedUdf.java ---------------------------------------------------------------------- diff --git a/tests/test-hive-udfs/src/main/java/com/cloudera/impala/UnresolvedUdf.java b/tests/test-hive-udfs/src/main/java/com/cloudera/impala/UnresolvedUdf.java new file mode 100644 index 0000000..6487bab --- /dev/null +++ b/tests/test-hive-udfs/src/main/java/com/cloudera/impala/UnresolvedUdf.java @@ -0,0 +1,39 @@ +// Copyright 2016 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.cloudera.impala; + +// Import a class that is not shaded in the UDF jar. +import com.google.i18n.phonenumbers.NumberParseException; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.IntWritable; + +/** + * This class intends to test a UDF that has some classes that the Catalog's + * class loader can't resolve. We import the class NumberParseException from + * Google's phone number library that won't be shaded with the UDF jar. The + * Catalog should gracefully handle this situation by ignoring the load of + * Java UDF that references this jar. + * + * The jar for this file can be built by running "mvn clean package" in + * tests/test-hive-udfs. This is run in testdata/bin/create-load-data.sh, and + * copied to HDFS in testdata/bin/copy-udfs-uda.sh. + */ +public class UnresolvedUdf extends UDF { + + public IntWritable evaluate(IntWritable a) throws NumberParseException { + return a; + } +}
