This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit bda8d95f2a9b566be9eb3d56b453ff545b7a5b6a
Author: Todd Lipcon <t...@apache.org>
AuthorDate: Fri May 3 15:52:59 2019 -0700

    IMPALA-8369 (part 3): Hive 3: fix test_permanent_udfs.py for Hive 3 support
    
    This fixes two issues in test_permanent_udfs.py:
    
    - two of Hive's built-ins were ported to the new GenericUDF interface
      which Impala can't execute. These UDFs are now excluded from the test
      when running with Hive 3.
    
    - The 'hive' commandline is deprecated nowadays, so the test now uses
      the standard HS2 approach to run Hive queries. Hive 2+ caches UDFs, so
      now that we are connecting to an already-running HS2 rather than
      starting a new standalone 'hive' command, we need to explicitly
      invalidate that cache by using 'RELOAD FUNCTION' after making changes
      to UDFs in Impala.
    
    Change-Id: I7f50845c7d4769d8843cad87988498e165902169
    Reviewed-on: http://gerrit.cloudera.org:8080/13236
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Reviewed-by: Todd Lipcon <t...@apache.org>
---
 tests/common/impala_test_suite.py           |  2 +
 tests/custom_cluster/test_permanent_udfs.py | 59 ++++++++++++-----------------
 2 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/tests/common/impala_test_suite.py 
b/tests/common/impala_test_suite.py
index 98e7d71..fffad06 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -775,6 +775,8 @@ class ImpalaTestSuite(BaseTestSuite):
     # This should never happen.
     assert 0, 'Unable to get location for table: ' + table_name
 
+  # TODO(todd) make this use Thrift to connect to HS2 instead of shelling
+  # out to beeline for better performance
   def run_stmt_in_hive(self, stmt, username=getuser()):
     """
     Run a statement in Hive, returning stdout if successful and throwing
diff --git a/tests/custom_cluster/test_permanent_udfs.py 
b/tests/custom_cluster/test_permanent_udfs.py
index 8e8819a..41b4b01 100644
--- a/tests/custom_cluster/test_permanent_udfs.py
+++ b/tests/custom_cluster/test_permanent_udfs.py
@@ -85,19 +85,6 @@ class TestUdfPersistence(CustomClusterTestSuite):
        % self.HIVE_IMPALA_INTEGRATION_DB)
     shutil.rmtree(self.LOCAL_LIBRARY_DIR, ignore_errors=True)
 
-  def run_stmt_in_hive(self, stmt):
-    """
-    Run a statement in Hive, returning stdout if successful and throwing
-    RuntimeError(stderr) if not.
-    """
-    call = subprocess.Popen(
-        ['hive', '-e', stmt], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    (stdout, stderr) = call.communicate()
-    call.wait()
-    if call.returncode != 0:
-      raise RuntimeError(stderr)
-    return stdout
-
   def __load_drop_functions(self, template, database, location):
     queries = template.format(database=database, location=location)
     # Split queries and remove empty lines
@@ -160,6 +147,13 @@ class TestUdfPersistence(CustomClusterTestSuite):
         db=self.HIVE_IMPALA_INTEGRATION_DB))
     assert stdout is not None and result in str(stdout.data)
 
+  def __describe_udf_in_hive(self, udf, db=HIVE_IMPALA_INTEGRATION_DB):
+    """ Describe the specified function, returning stdout. """
+    # Hive 2+ caches UDFs, so we have to explicitly invalidate the UDF if
+    # we've made changes on the Impala side.
+    stmt = "RELOAD FUNCTION ; DESCRIBE FUNCTION {0}.{1}".format(db, udf)
+    return self.run_stmt_in_hive(stmt)
+
   @SkipIfIsilon.hive
   @SkipIfS3.hive
   @SkipIfABFS.hive
@@ -203,21 +197,22 @@ class TestUdfPersistence(CustomClusterTestSuite):
     # Hive has bug that doesn't display the permanent function in show 
functions
     # statement. So this test relies on describe function statement which 
prints
     # a message if the function is not present.
-    for (fn, fn_symbol) in self.SAMPLE_JAVA_UDFS:
+    udfs_to_test = list(self.SAMPLE_JAVA_UDFS)
+    if int(os.environ['IMPALA_HIVE_MAJOR_VERSION']) == 2:
+      udfs_to_test += self.SAMPLE_JAVA_UDFS_HIVE2_ONLY
+    for (fn, fn_symbol) in udfs_to_test:
       self.client.execute(self.DROP_JAVA_UDF_TEMPLATE.format(
           db=self.HIVE_IMPALA_INTEGRATION_DB, function=fn))
       self.client.execute(self.CREATE_JAVA_UDF_TEMPLATE.format(
           db=self.HIVE_IMPALA_INTEGRATION_DB, function=fn,
           location=self.HIVE_UDF_JAR, symbol=fn_symbol))
-      hive_stdout = self.run_stmt_in_hive("DESCRIBE FUNCTION %s.%s"
-        % (self.HIVE_IMPALA_INTEGRATION_DB, fn))
+      hive_stdout = self.__describe_udf_in_hive(fn)
       assert "does not exist" not in hive_stdout
       self.__verify_udf_in_hive(fn)
       # Drop the function from Impala and check if it reflects in Hive.
       self.client.execute(self.DROP_JAVA_UDF_TEMPLATE.format(
           db=self.HIVE_IMPALA_INTEGRATION_DB, function=fn))
-      hive_stdout = self.run_stmt_in_hive("DESCRIBE FUNCTION %s.%s"
-        % (self.HIVE_IMPALA_INTEGRATION_DB, fn))
+      hive_stdout = self.__describe_udf_in_hive(fn)
       assert "does not exist" in hive_stdout
 
     # Create the same set of functions from Hive and make sure they are visible
@@ -226,12 +221,12 @@ class TestUdfPersistence(CustomClusterTestSuite):
     REFRESH_COMMANDS = ["INVALIDATE METADATA",
         "REFRESH FUNCTIONS {0}".format(self.HIVE_IMPALA_INTEGRATION_DB)]
     for refresh_command in REFRESH_COMMANDS:
-      for (fn, fn_symbol) in self.SAMPLE_JAVA_UDFS:
+      for (fn, fn_symbol) in udfs_to_test:
         self.run_stmt_in_hive(self.CREATE_HIVE_UDF_TEMPLATE.format(
             db=self.HIVE_IMPALA_INTEGRATION_DB, function=fn,
             location=self.HIVE_UDF_JAR, symbol=fn_symbol))
       self.client.execute(refresh_command)
-      for (fn, fn_symbol) in self.SAMPLE_JAVA_UDFS:
+      for (fn, fn_symbol) in udfs_to_test:
         result = self.client.execute("SHOW FUNCTIONS IN {0}".format(
             self.HIVE_IMPALA_INTEGRATION_DB))
         assert result is not None and len(result.data) > 0 and\
@@ -456,15 +451,13 @@ class TestUdfPersistence(CustomClusterTestSuite):
     assert "No compatible function signatures" in str(result)
     self.verify_function_count(
         "SHOW FUNCTIONS IN %s like 'badudf*'" % self.JAVA_FN_TEST_DB, 0)
-    result = self.run_stmt_in_hive("DESCRIBE FUNCTION %s.%s"
-        % (self.JAVA_FN_TEST_DB, "badudf"))
+    result = self.__describe_udf_in_hive('badudf', db=self.JAVA_FN_TEST_DB)
     assert "does not exist" in str(result)
     # Create the same function from hive and make sure Impala doesn't load any 
signatures.
     self.run_stmt_in_hive(self.CREATE_HIVE_UDF_TEMPLATE.format(
         db=self.JAVA_FN_TEST_DB, function="badudf",
         location=self.JAVA_UDF_JAR, 
symbol="org.apache.impala.IncompatibleUdfTest"))
-    result = self.run_stmt_in_hive("DESCRIBE FUNCTION %s.%s"
-        % (self.JAVA_FN_TEST_DB, "badudf"))
+    result = self.__describe_udf_in_hive('badudf', db=self.JAVA_FN_TEST_DB)
     assert "does not exist" not in str(result)
     self.client.execute("INVALIDATE METADATA")
     self.verify_function_count(
@@ -477,8 +470,7 @@ class TestUdfPersistence(CustomClusterTestSuite):
     # Drop the function and make sure the function if dropped from hive
     self.client.execute(self.DROP_JAVA_UDF_TEMPLATE.format(
         db=self.JAVA_FN_TEST_DB, function="badudf"))
-    result = self.run_stmt_in_hive("DESCRIBE FUNCTION %s.%s"
-        % (self.JAVA_FN_TEST_DB, "badudf"))
+    result = self.__describe_udf_in_hive('badudf', db=self.JAVA_FN_TEST_DB)
     assert "does not exist" in str(result)
 
   # Create sample UDA functions in {database} from library {location}
@@ -505,19 +497,18 @@ class TestUdfPersistence(CustomClusterTestSuite):
       ('udfbin', 'org.apache.hadoop.hive.ql.udf.UDFBin'),
       ('udfhex', 'org.apache.hadoop.hive.ql.udf.UDFHex'),
       ('udfconv', 'org.apache.hadoop.hive.ql.udf.UDFConv'),
-      # TODO UDFHour was moved from UDF to GenericUDF in Hive 3
-      # This test will fail when running against HMS-3 unless we add
-      # support for GenericUDFs to handle such cases
-      ('udfhour', 'org.apache.hadoop.hive.ql.udf.UDFHour'),
       ('udflike', 'org.apache.hadoop.hive.ql.udf.UDFLike'),
       ('udfsign', 'org.apache.hadoop.hive.ql.udf.UDFSign'),
-      # TODO UDFYear moved to GenericUDF in Hive 3
-      # This test will fail when running against HMS-3 unless we add
-      # support for GenericUDFs
-      ('udfyear', 'org.apache.hadoop.hive.ql.udf.UDFYear'),
       ('udfascii','org.apache.hadoop.hive.ql.udf.UDFAscii')
   ]
 
+  # These UDFs are available in Hive 2 but in Hive 3 are now implemented
+  # using a new GenericUDF interface that we don't support.
+  SAMPLE_JAVA_UDFS_HIVE2_ONLY = [
+      ('udfhour', 'org.apache.hadoop.hive.ql.udf.UDFHour'),
+      ('udfyear', 'org.apache.hadoop.hive.ql.udf.UDFYear'),
+  ]
+
   # Simple tests to verify java udfs in SAMPLE_JAVA_UDFS
   SAMPLE_JAVA_UDFS_TEST = {
     'udfpi' : ('{db}.udfpi()', '3.141592653589793'),

Reply via email to