This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ad438e7e3cc89a7e8511fcfe67c13411de987007
Author: Daniel Becker <[email protected]>
AuthorDate: Wed Oct 12 17:59:42 2022 +0200

    IMPALA-11581: ALTER TABLE RENAME TO doesn't update transient_lastDdlTime
    
    The following statements behave differently when executed via Hive or
    Impala:
    
    CREATE TABLE rename_from (i int);
    ALTER TABLE rename_from RENAME TO rename_to;
    
    Hive updates transient_lastDdlTime while Impala leaves it unchanged.
    
    This patch fixes the behaviour of Impala so that it also updates
    transient_lastDdlTime.
    
    Testing:
     - Added a test in test_last_ddl_time_update.py that checks that
       transient_lastDdlTime is updated on rename. Refactored the class a
       bit so that the new test fits in easier.
    
    Change-Id: Ib550feaebbad9cf6c9b34ab046293968b157a50c
    Reviewed-on: http://gerrit.cloudera.org:8080/19137
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../apache/impala/service/CatalogOpExecutor.java   |   1 +
 tests/metadata/test_last_ddl_time_update.py        | 117 +++++++++++++--------
 2 files changed, 77 insertions(+), 41 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java 
b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 04e525c6d..23712a844 100755
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -5036,6 +5036,7 @@ public class CatalogOpExecutor {
     boolean needsHmsAlterTable = !isSynchronizedTable || !integratedHmsTable;
     if (needsHmsAlterTable) {
       try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+        Table.updateTimestampProperty(msTbl, Table.TBL_PROP_LAST_DDL_TIME);
         msClient.getHiveClient().alter_table(
             tableName.getDb(), tableName.getTbl(), msTbl);
       } catch (TException e) {
diff --git a/tests/metadata/test_last_ddl_time_update.py 
b/tests/metadata/test_last_ddl_time_update.py
index e9b48fc88..36a4db358 100644
--- a/tests/metadata/test_last_ddl_time_update.py
+++ b/tests/metadata/test_last_ddl_time_update.py
@@ -49,7 +49,7 @@ class TestLastDdlTimeUpdate(ImpalaTestSuite):
       self.test_suite = test_suite
       self.db_name = db_name
       self.tbl_name = tbl_name
-      self.fq_tbl_name = "%s.%s" % (self.db_name, self.tbl_name)
+      self.fq_tbl_name = self._generate_fq_tbl_name(self.db_name, 
self.tbl_name)
 
     def expect_no_time_change(self, query):
       """Neither transient_lastDdlTime or impala.lastComputeStatsTime should be
@@ -72,34 +72,40 @@ class TestLastDdlTimeUpdate(ImpalaTestSuite):
       """
       self.run_test(query, False, True)
 
-    def run_test(self, query, expect_changed_ddl_time, 
expect_changed_stats_time):
+    def expect_ddl_time_change_on_rename(self, new_tbl_name):
+      """
+      Checks that after an ALTER TABLE ... RENAME query transient_lastDdlTime 
is higher on
+      the new table than it was on the old table.
+      """
+      query = "alter table %(TBL)s rename to {}".format(self.db_name + "." + 
new_tbl_name)
+      self.run_test(query, True, False, new_tbl_name)
+
+    def run_test(self, query, expect_changed_ddl_time, 
expect_changed_stats_time,
+        new_tbl_name=None):
       """
       Runs the query and compares the last ddl/compute stats time before and 
after
       executing the query. If expect_changed_ddl_time or 
expect_changed_stat_time
       is true, then we expect the given table property to be increased, 
otherwise we
       expect it to be unchanged.
+      If the query renames the table, the new table name has to be provided in
+      'new_tbl_name' so that the new table is checked after the query.
       The following strings are substituted in the query: "%(TBL)s" and 
"%(WAREHOUSE)s"
       """
-
-      HIVE_LAST_DDL_TIME_PARAM_KEY = "transient_lastDdlTime"
-      LAST_COMPUTE_STATS_TIME_KEY = "impala.lastComputeStatsTime"
-
       # Get timestamps before executing query.
-      table = self.test_suite.hive_client.get_table(self.db_name, 
self.tbl_name)
-      assert table is not None
-      beforeDdlTime = table.parameters[HIVE_LAST_DDL_TIME_PARAM_KEY]
-      beforeStatsTime = table.parameters[LAST_COMPUTE_STATS_TIME_KEY]
+      (beforeDdlTime, beforeStatsTime) = self._get_ddl_and_stats_time(
+          self.db_name, self.tbl_name)
       # HMS uses a seconds granularity on the last ddl time - sleeping 1100 ms 
should be
       # enough to ensure that the new timestamps are strictly greater than the 
old ones.
       time.sleep (1.1)
 
       self.test_suite.execute_query(query %
         {'TBL': self.fq_tbl_name, 'WAREHOUSE': WAREHOUSE})
+      if new_tbl_name is not None:
+        self._update_name(new_tbl_name)
 
       # Get timestamps after executing query.
-      table = self.test_suite.hive_client.get_table(self.db_name, 
self.tbl_name)
-      afterDdlTime = table.parameters[HIVE_LAST_DDL_TIME_PARAM_KEY]
-      afterStatsTime = table.parameters[LAST_COMPUTE_STATS_TIME_KEY]
+      (afterDdlTime, afterStatsTime) = self._get_ddl_and_stats_time(
+          self.db_name, self.tbl_name)
 
       if expect_changed_ddl_time:
         # check that the new ddlTime is strictly greater than the old one.
@@ -113,17 +119,46 @@ class TestLastDdlTimeUpdate(ImpalaTestSuite):
       else:
         assert long(afterStatsTime) == long(beforeStatsTime)
 
-  def test_alter(self, vector, unique_database):
-    TBL_NAME = "alter_test_tbl"
-    FQ_TBL_NAME = unique_database + "." + TBL_NAME
+    def _update_name(self, new_tbl_name):
+      """"
+      Update the name, for example because of a table rename ddl. The database 
name does
+      not change.
+      """
+      self.tbl_name = new_tbl_name
+      self.fq_tbl_name = self._generate_fq_tbl_name(self.db_name, 
self.tbl_name)
 
-    self.execute_query("create external table %s (i int) "
-                       "partitioned by (j int, s string)" % FQ_TBL_NAME)
+    def _generate_fq_tbl_name(self, db_name, tbl_name):
+      return "%s.%s" % (db_name, tbl_name)
+
+    def _get_ddl_and_stats_time(self, db_name, tbl_name):
+      HIVE_LAST_DDL_TIME_PARAM_KEY = "transient_lastDdlTime"
+      LAST_COMPUTE_STATS_TIME_KEY = "impala.lastComputeStatsTime"
+
+      table = self.test_suite.hive_client.get_table(db_name, tbl_name)
+      assert table is not None
+      ddlTime = table.parameters[HIVE_LAST_DDL_TIME_PARAM_KEY]
+      statsTime = table.parameters[LAST_COMPUTE_STATS_TIME_KEY]
+      return (ddlTime, statsTime)
+
+  def _create_table(self, fq_tbl_name, is_kudu):
+    if is_kudu:
+      self.execute_query("create table %s (i int primary key) "
+                         "partition by hash(i) partitions 3 stored as kudu" % 
fq_tbl_name)
+    else:
+      self.execute_query("create external table %s (i int) "
+                         "partitioned by (j int, s string)" % fq_tbl_name)
+
+  def _create_and_init_test_helper(self, unique_database, tbl_name, is_kudu):
+    helper = TestLastDdlTimeUpdate.TestHelper(self, unique_database, tbl_name)
+    self._create_table(helper.fq_tbl_name, is_kudu)
 
     # compute statistics to fill table property impala.lastComputeStatsTime
-    self.execute_query("compute stats %s" % FQ_TBL_NAME)
+    self.execute_query("compute stats %s" % helper.fq_tbl_name)
+    return helper
 
-    h = TestLastDdlTimeUpdate.TestHelper(self, unique_database, TBL_NAME)
+  def test_alter(self, vector, unique_database):
+    TBL_NAME = "alter_test_tbl"
+    h = self._create_and_init_test_helper(unique_database, TBL_NAME, False)
 
     # add/drop partitions
     h.expect_no_time_change("alter table %(TBL)s add partition (j=1, 
s='2012')")
@@ -140,8 +175,8 @@ class TestLastDdlTimeUpdate(ImpalaTestSuite):
     self.run_common_test_cases(h)
 
     # prepare for incremental statistics tests
-    self.execute_query("drop stats %s" % FQ_TBL_NAME)
-    self.execute_query("alter table %s add partition (j=1, s='2012')" % 
FQ_TBL_NAME)
+    self.execute_query("drop stats %s" % h.fq_tbl_name)
+    self.execute_query("alter table %s add partition (j=1, s='2012')" % 
h.fq_tbl_name)
 
     # compute incremental statistics
     h.expect_stat_time_change("compute incremental stats %(TBL)s")
@@ -154,7 +189,7 @@ class TestLastDdlTimeUpdate(ImpalaTestSuite):
     # prepare for table sample statistics tests
     self.execute_query(
         "alter table %s set tblproperties 
('impala.enable.stats.extrapolation'='true')"
-        % FQ_TBL_NAME)
+        % h.fq_tbl_name)
 
     # compute sampled statistics
     h.expect_stat_time_change("compute stats %(TBL)s tablesample system(20)")
@@ -162,14 +197,7 @@ class TestLastDdlTimeUpdate(ImpalaTestSuite):
 
   def test_insert(self, vector, unique_database):
     TBL_NAME = "insert_test_tbl"
-    FQ_TBL_NAME = unique_database + "." + TBL_NAME
-    self.execute_query("create external table %s (i int) "
-                       "partitioned by (j int, s string)" % FQ_TBL_NAME)
-
-    # initialize compute stats time
-    self.execute_query("compute stats %s" % FQ_TBL_NAME)
-
-    h = TestLastDdlTimeUpdate.TestHelper(self, unique_database, TBL_NAME)
+    h = self._create_and_init_test_helper(unique_database, TBL_NAME, False)
 
     # static partition insert
     h.expect_no_time_change("insert into %(TBL)s partition(j=1, s='2012') 
select 10")
@@ -183,22 +211,29 @@ class TestLastDdlTimeUpdate(ImpalaTestSuite):
     # dynamic partition insert modifying an existing partition
     h.expect_no_time_change("insert into %(TBL)s partition(j, s) select 20, 1, 
'2012'")
 
-  def test_kudu(self, vector, unique_database):
+  def test_kudu_alter_and_insert(self, vector, unique_database):
     TBL_NAME = "kudu_test_tbl"
-    FQ_TBL_NAME = unique_database + "." + TBL_NAME
-    self.execute_query("create table %s (i int primary key) "
-                       "partition by hash(i) partitions 3 stored as kudu" % 
FQ_TBL_NAME)
-
-    # initialize last compute stats time
-    self.execute_query("compute stats %s" % FQ_TBL_NAME)
-
-    h = TestLastDdlTimeUpdate.TestHelper(self, unique_database, TBL_NAME)
+    h = self._create_and_init_test_helper(unique_database, TBL_NAME, True)
 
     # insert
-    h.expect_no_time_change("insert into %s values (1)" % FQ_TBL_NAME)
+    h.expect_no_time_change("insert into %s values (1)" % h.fq_tbl_name)
 
     self.run_common_test_cases(h)
 
+  def test_rename(self, vector, unique_database):
+    # Test non-Kudu table
+    OLD_TBL_NAME = "rename_from_test_tbl"
+    NEW_TBL_NAME = "rename_to_test_tbl"
+
+    h = self._create_and_init_test_helper(unique_database, OLD_TBL_NAME, False)
+    h.expect_ddl_time_change_on_rename(NEW_TBL_NAME)
+
+    # Test Kudu table
+    OLD_KUDU_TBL_NAME = "kudu_rename_from_test_tbl"
+    NEW_KUDU_TBL_NAME = "kudu_rename_to_test_tbl"
+    h = self._create_and_init_test_helper(unique_database, OLD_KUDU_TBL_NAME, 
True)
+    h.expect_ddl_time_change_on_rename(NEW_KUDU_TBL_NAME)
+
   # Tests that should behave the same with HDFS and Kudu tables.
   def run_common_test_cases(self, test_helper):
     h = test_helper

Reply via email to