This is an automated email from the ASF dual-hosted git repository.

dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 72fd26d207a HIVE-27994: Optimize renaming the partitioned table 
(#4995) (Zhihua Deng, reviewed by Butao Zhang, Sai Hemanth Gantasala)
72fd26d207a is described below

commit 72fd26d207a2943f0535fa96330bedc244fbe10a
Author: dengzh <[email protected]>
AuthorDate: Sat Jan 20 10:02:24 2024 +0800

    HIVE-27994: Optimize renaming the partitioned table (#4995) (Zhihua Deng, 
reviewed by Butao Zhang, Sai Hemanth Gantasala)
---
 ql/src/test/queries/clientpositive/rename_table.q  |  40 +++
 .../results/clientpositive/llap/rename_table.q.out | 378 +++++++++++++++++++++
 .../hadoop/hive/metastore/DirectSqlUpdatePart.java |  75 ++--
 .../hadoop/hive/metastore/HiveAlterHandler.java    |  76 +++--
 .../hadoop/hive/metastore/StatObjectConverter.java |   9 +-
 5 files changed, 519 insertions(+), 59 deletions(-)

diff --git a/ql/src/test/queries/clientpositive/rename_table.q 
b/ql/src/test/queries/clientpositive/rename_table.q
new file mode 100644
index 00000000000..1e4f2fcdd55
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/rename_table.q
@@ -0,0 +1,40 @@
+--! qt:dataset:src
+--! qt:dataset:part
+-- This test verifies that if the table after rename can still fetch the 
column statistics
+set hive.stats.kll.enable=true;
+set metastore.stats.fetch.bitvector=true;
+set metastore.stats.fetch.kll=true;
+set hive.stats.autogather=true;
+set hive.stats.column.autogather=true;
+
+CREATE TABLE rename_partition_table0 (key STRING, value STRING) PARTITIONED BY 
(part STRING)
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE rename_partition_table0 PARTITION (part = '1') SELECT * 
FROM src where rand(1) < 0.5;
+ALTER TABLE rename_partition_table0 ADD COLUMNS (new_col INT);
+INSERT OVERWRITE TABLE rename_partition_table0 PARTITION (part = '2') SELECT 
src.*, 1 FROM src;
+
+ALTER TABLE rename_partition_table0 RENAME TO rename_partition_table1;
+DESCRIBE FORMATTED rename_partition_table1;
+DESCRIBE FORMATTED rename_partition_table1 PARTITION (part='1') key;
+DESCRIBE FORMATTED rename_partition_table1 PARTITION (part='1') value;
+DESCRIBE FORMATTED rename_partition_table1 PARTITION (part='2') key;
+DESCRIBE FORMATTED rename_partition_table1 PARTITION (part='2') value;
+DESCRIBE FORMATTED rename_partition_table1 PARTITION (part='2') new_col;
+
+CREATE EXTERNAL TABLE rename_partition_table_ext0 (key STRING, value STRING) 
PARTITIONED BY (part STRING)
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE rename_partition_table_ext0 PARTITION (part = '1') 
SELECT * FROM src where rand(1) < 0.5;
+ALTER TABLE rename_partition_table_ext0 CHANGE COLUMN value val STRING CASCADE;
+INSERT OVERWRITE TABLE rename_partition_table_ext0 PARTITION (part = '2') 
SELECT * FROM src;
+
+ALTER TABLE rename_partition_table_ext0 RENAME TO rename_partition_table_ext1;
+DESCRIBE FORMATTED rename_partition_table_ext1;
+DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION (part='1') key;
+DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION (part='1') val;
+DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION (part='2') key;
+DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION (part='2') val;
+
+DROP TABLE rename_partition_table1;
+DROP TABLE rename_partition_table_ext1;
diff --git a/ql/src/test/results/clientpositive/llap/rename_table.q.out 
b/ql/src/test/results/clientpositive/llap/rename_table.q.out
new file mode 100644
index 00000000000..014d629ea86
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/rename_table.q.out
@@ -0,0 +1,378 @@
+PREHOOK: query: CREATE TABLE rename_partition_table0 (key STRING, value 
STRING) PARTITIONED BY (part STRING)
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@rename_partition_table0
+POSTHOOK: query: CREATE TABLE rename_partition_table0 (key STRING, value 
STRING) PARTITIONED BY (part STRING)
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@rename_partition_table0
+PREHOOK: query: INSERT OVERWRITE TABLE rename_partition_table0 PARTITION (part 
= '1') SELECT * FROM src where rand(1) < 0.5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@rename_partition_table0@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE rename_partition_table0 PARTITION 
(part = '1') SELECT * FROM src where rand(1) < 0.5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@rename_partition_table0@part=1
+POSTHOOK: Lineage: rename_partition_table0 PARTITION(part=1).key SIMPLE 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: rename_partition_table0 PARTITION(part=1).value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER TABLE rename_partition_table0 ADD COLUMNS (new_col INT)
+PREHOOK: type: ALTERTABLE_ADDCOLS
+PREHOOK: Input: default@rename_partition_table0
+PREHOOK: Output: default@rename_partition_table0
+POSTHOOK: query: ALTER TABLE rename_partition_table0 ADD COLUMNS (new_col INT)
+POSTHOOK: type: ALTERTABLE_ADDCOLS
+POSTHOOK: Input: default@rename_partition_table0
+POSTHOOK: Output: default@rename_partition_table0
+PREHOOK: query: INSERT OVERWRITE TABLE rename_partition_table0 PARTITION (part 
= '2') SELECT src.*, 1 FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@rename_partition_table0@part=2
+POSTHOOK: query: INSERT OVERWRITE TABLE rename_partition_table0 PARTITION 
(part = '2') SELECT src.*, 1 FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@rename_partition_table0@part=2
+POSTHOOK: Lineage: rename_partition_table0 PARTITION(part=2).key SIMPLE 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: rename_partition_table0 PARTITION(part=2).new_col SIMPLE []
+POSTHOOK: Lineage: rename_partition_table0 PARTITION(part=2).value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER TABLE rename_partition_table0 RENAME TO 
rename_partition_table1
+PREHOOK: type: ALTERTABLE_RENAME
+PREHOOK: Input: default@rename_partition_table0
+PREHOOK: Output: database:default
+PREHOOK: Output: default@rename_partition_table0
+PREHOOK: Output: default@rename_partition_table1
+POSTHOOK: query: ALTER TABLE rename_partition_table0 RENAME TO 
rename_partition_table1
+POSTHOOK: type: ALTERTABLE_RENAME
+POSTHOOK: Input: default@rename_partition_table0
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@rename_partition_table0
+POSTHOOK: Output: default@rename_partition_table1
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table1
+# col_name             data_type               comment             
+key                    string                                      
+value                  string                                      
+new_col                int                                         
+                
+# Partition Information                 
+# col_name             data_type               comment             
+part                   string                                      
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       bucketing_version       2                   
+#### A masked pattern was here ####
+       numFiles                2                   
+       numPartitions           2                   
+       numRows                 746                 
+       rawDataSize             133296              
+       totalSize               4760                
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         org.apache.hadoop.hive.ql.io.orc.OrcSerde        
+InputFormat:           org.apache.hadoop.hive.ql.io.orc.OrcInputFormat  
+OutputFormat:          org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat        
 
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+Storage Desc Params:            
+       serialization.format    1                   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='1') key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='1') key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table1
+col_name               key                 
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         198                 
+avg_col_len            2.8089430894308944  
+max_col_len            3                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram                                  
+comment                from deserializer   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='1') value
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='1') value
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table1
+col_name               value               
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         191                 
+avg_col_len            6.808943089430894   
+max_col_len            7                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram                                  
+comment                from deserializer   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='2') key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='2') key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table1
+col_name               key                 
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         316                 
+avg_col_len            2.812               
+max_col_len            3                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram                                  
+comment                from deserializer   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='2') value
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='2') value
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table1
+col_name               value               
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         307                 
+avg_col_len            6.812               
+max_col_len            7                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram                                  
+comment                from deserializer   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='2') new_col
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table1 PARTITION 
(part='2') new_col
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table1
+col_name               new_col             
+data_type              int                 
+min                    1                   
+max                    1                   
+num_nulls              0                   
+distinct_count         1                   
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram              Q1: 1, Q2: 1, Q3: 1 
+comment                from deserializer   
+PREHOOK: query: CREATE EXTERNAL TABLE rename_partition_table_ext0 (key STRING, 
value STRING) PARTITIONED BY (part STRING)
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@rename_partition_table_ext0
+POSTHOOK: query: CREATE EXTERNAL TABLE rename_partition_table_ext0 (key 
STRING, value STRING) PARTITIONED BY (part STRING)
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@rename_partition_table_ext0
+PREHOOK: query: INSERT OVERWRITE TABLE rename_partition_table_ext0 PARTITION 
(part = '1') SELECT * FROM src where rand(1) < 0.5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@rename_partition_table_ext0@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE rename_partition_table_ext0 PARTITION 
(part = '1') SELECT * FROM src where rand(1) < 0.5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@rename_partition_table_ext0@part=1
+POSTHOOK: Lineage: rename_partition_table_ext0 PARTITION(part=1).key SIMPLE 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: rename_partition_table_ext0 PARTITION(part=1).value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER TABLE rename_partition_table_ext0 CHANGE COLUMN value 
val STRING CASCADE
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@rename_partition_table_ext0
+PREHOOK: Output: default@rename_partition_table_ext0
+PREHOOK: Output: default@rename_partition_table_ext0@part=1
+POSTHOOK: query: ALTER TABLE rename_partition_table_ext0 CHANGE COLUMN value 
val STRING CASCADE
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@rename_partition_table_ext0
+POSTHOOK: Output: default@rename_partition_table_ext0
+POSTHOOK: Output: default@rename_partition_table_ext0@part=1
+PREHOOK: query: INSERT OVERWRITE TABLE rename_partition_table_ext0 PARTITION 
(part = '2') SELECT * FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@rename_partition_table_ext0@part=2
+POSTHOOK: query: INSERT OVERWRITE TABLE rename_partition_table_ext0 PARTITION 
(part = '2') SELECT * FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@rename_partition_table_ext0@part=2
+POSTHOOK: Lineage: rename_partition_table_ext0 PARTITION(part=2).key SIMPLE 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: rename_partition_table_ext0 PARTITION(part=2).val SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER TABLE rename_partition_table_ext0 RENAME TO 
rename_partition_table_ext1
+PREHOOK: type: ALTERTABLE_RENAME
+PREHOOK: Input: default@rename_partition_table_ext0
+PREHOOK: Output: database:default
+PREHOOK: Output: default@rename_partition_table_ext0
+PREHOOK: Output: default@rename_partition_table_ext1
+POSTHOOK: query: ALTER TABLE rename_partition_table_ext0 RENAME TO 
rename_partition_table_ext1
+POSTHOOK: type: ALTERTABLE_RENAME
+POSTHOOK: Input: default@rename_partition_table_ext0
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@rename_partition_table_ext0
+POSTHOOK: Output: default@rename_partition_table_ext1
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table_ext1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table_ext1
+# col_name             data_type               comment             
+key                    string                                      
+val                    string                                      
+                
+# Partition Information                 
+# col_name             data_type               comment             
+part                   string                                      
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            EXTERNAL_TABLE           
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       EXTERNAL                TRUE                
+       bucketing_version       2                   
+#### A masked pattern was here ####
+       numFiles                2                   
+       numPartitions           2                   
+       numRows                 746                 
+       rawDataSize             131296              
+       totalSize               4669                
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         org.apache.hadoop.hive.ql.io.orc.OrcSerde        
+InputFormat:           org.apache.hadoop.hive.ql.io.orc.OrcInputFormat  
+OutputFormat:          org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat        
 
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+Storage Desc Params:            
+       serialization.format    1                   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION 
(part='1') key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table_ext1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION 
(part='1') key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table_ext1
+col_name               key                 
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         198                 
+avg_col_len            2.8089430894308944  
+max_col_len            3                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram                                  
+comment                from deserializer   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION 
(part='1') val
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table_ext1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION 
(part='1') val
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table_ext1
+col_name               val                 
+data_type              string              
+min                                        
+max                                        
+num_nulls                                  
+distinct_count                             
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector                                 
+histogram                                  
+comment                from deserializer   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION 
(part='2') key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table_ext1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION 
(part='2') key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table_ext1
+col_name               key                 
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         316                 
+avg_col_len            2.812               
+max_col_len            3                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram                                  
+comment                from deserializer   
+PREHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION 
(part='2') val
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@rename_partition_table_ext1
+POSTHOOK: query: DESCRIBE FORMATTED rename_partition_table_ext1 PARTITION 
(part='2') val
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@rename_partition_table_ext1
+col_name               val                 
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         307                 
+avg_col_len            6.812               
+max_col_len            7                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram                                  
+comment                from deserializer   
+PREHOOK: query: DROP TABLE rename_partition_table1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@rename_partition_table1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@rename_partition_table1
+POSTHOOK: query: DROP TABLE rename_partition_table1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@rename_partition_table1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@rename_partition_table1
+PREHOOK: query: DROP TABLE rename_partition_table_ext1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@rename_partition_table_ext1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@rename_partition_table_ext1
+POSTHOOK: query: DROP TABLE rename_partition_table_ext1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@rename_partition_table_ext1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@rename_partition_table_ext1
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java
index f6e41f09094..67c293ee64f 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java
@@ -64,6 +64,7 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.locks.ReentrantLock;
@@ -181,7 +182,7 @@ class DirectSqlUpdatePart {
             e -> e.partitionId).collect(Collectors.toList()
     );
 
-    prefix.append("select \"PART_ID\", \"COLUMN_NAME\" from \"PART_COL_STATS\" 
WHERE ");
+    prefix.append("select \"PART_ID\", \"COLUMN_NAME\", \"ENGINE\" from 
\"PART_COL_STATS\" WHERE ");
     TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix,
             partIdList, "\"PART_ID\"", true, false);
 
@@ -191,7 +192,7 @@ class DirectSqlUpdatePart {
         LOG.debug("Going to execute query " + query);
         rs = statement.executeQuery(query);
         while (rs.next()) {
-          selectedParts.add(new PartColNameInfo(rs.getLong(1), 
rs.getString(2)));
+          selectedParts.add(new PartColNameInfo(rs.getLong(1), 
rs.getString(2), rs.getString(3)));
         }
       } finally {
         close(rs, statement, null);
@@ -207,7 +208,8 @@ class DirectSqlUpdatePart {
         statsDesc.setCatName(tbl.getCatName());
       }
       for (ColumnStatisticsObj statisticsObj : colStats.getStatsObj()) {
-        PartColNameInfo temp = new PartColNameInfo(partId, 
statisticsObj.getColName());
+        PartColNameInfo temp = new PartColNameInfo(partId, 
statisticsObj.getColName(),
+            colStats.getEngine());
         if (selectedParts.contains(temp)) {
           updateMap.put(temp, StatObjectConverter.
                   convertToMPartitionColumnStatistics(null, statsDesc, 
statisticsObj, colStats.getEngine()));
@@ -221,25 +223,46 @@ class DirectSqlUpdatePart {
 
   private void updatePartColStatTable(Map<PartColNameInfo, 
MPartitionColumnStatistics> updateMap,
                                           Connection dbConn) throws 
SQLException, MetaException, NoSuchObjectException {
-    PreparedStatement pst = null;
-    for (Map.Entry entry : updateMap.entrySet()) {
-      PartColNameInfo partColNameInfo = (PartColNameInfo) entry.getKey();
-      Long partId = partColNameInfo.partitionId;
-      MPartitionColumnStatistics mPartitionColumnStatistics = 
(MPartitionColumnStatistics) entry.getValue();
-      String update = "UPDATE \"PART_COL_STATS\" SET ";
-      update += 
StatObjectConverter.getUpdatedColumnSql(mPartitionColumnStatistics);
-      update += " WHERE \"PART_ID\" = " + partId + " AND "
-              + " \"COLUMN_NAME\" = " +  
quoteString(mPartitionColumnStatistics.getColName());
-      try {
-        pst = dbConn.prepareStatement(update);
-        
StatObjectConverter.initUpdatedColumnStatement(mPartitionColumnStatistics, pst);
-        LOG.debug("Going to execute update " + update);
-        int numUpdate = pst.executeUpdate();
-        if (numUpdate != 1) {
-          throw new MetaException("Invalid state of  PART_COL_STATS for 
PART_ID " + partId);
+    Map<String, List<Map.Entry<PartColNameInfo, MPartitionColumnStatistics>>> 
updates = new HashMap<>();
+    for (Map.Entry<PartColNameInfo, MPartitionColumnStatistics> entry : 
updateMap.entrySet()) {
+      MPartitionColumnStatistics mPartitionColumnStatistics = entry.getValue();
+      StringBuilder update = new StringBuilder("UPDATE \"PART_COL_STATS\" SET 
")
+          
.append(StatObjectConverter.getUpdatedColumnSql(mPartitionColumnStatistics))
+          .append(" WHERE \"PART_ID\" = ? AND \"COLUMN_NAME\" = ? AND 
\"ENGINE\" = ?");
+      updates.computeIfAbsent(update.toString(), k -> new 
ArrayList<>()).add(entry);
+    }
+
+    for (Map.Entry<String, List<Map.Entry<PartColNameInfo, 
MPartitionColumnStatistics>>> entry : updates.entrySet()) {
+      List<Long> partIds = new ArrayList<>();
+      try (PreparedStatement pst = dbConn.prepareStatement(entry.getKey())) {
+        List<Map.Entry<PartColNameInfo, MPartitionColumnStatistics>> entries = 
entry.getValue();
+        for (Map.Entry<PartColNameInfo, MPartitionColumnStatistics> partStats 
: entries) {
+          PartColNameInfo partColNameInfo = partStats.getKey();
+          MPartitionColumnStatistics mPartitionColumnStatistics = 
partStats.getValue();
+          int colIdx = 
StatObjectConverter.initUpdatedColumnStatement(mPartitionColumnStatistics, pst);
+          pst.setLong(colIdx++, partColNameInfo.partitionId);
+          pst.setString(colIdx++, mPartitionColumnStatistics.getColName());
+          pst.setString(colIdx++, mPartitionColumnStatistics.getEngine());
+          partIds.add(partColNameInfo.partitionId);
+          pst.addBatch();
+          if (partIds.size() == maxBatchSize) {
+            LOG.debug("Going to execute updates on part: {}", partIds);
+            verifyUpdates(pst.executeBatch(), partIds);
+            partIds = new ArrayList<>();
+          }
+        }
+        if (!partIds.isEmpty()) {
+          LOG.debug("Going to execute updates on part: {}", partIds);
+          verifyUpdates(pst.executeBatch(), partIds);
         }
-      } finally {
-        closeStmt(pst);
+      }
+    }
+  }
+
+  private void verifyUpdates(int[] numUpdates, List<Long> partIds) throws 
MetaException {
+    for (int i = 0; i < numUpdates.length; i++) {
+      if (numUpdates[i] != 1) {
+        throw new MetaException("Invalid state of PART_COL_STATS for PART_ID " 
+ partIds.get(i));
       }
     }
   }
@@ -1501,9 +1524,11 @@ class DirectSqlUpdatePart {
   private static final class PartColNameInfo {
     long partitionId;
     String colName;
-    public PartColNameInfo(long partitionId, String colName) {
+    String engine;
+    public PartColNameInfo(long partitionId, String colName, String engine) {
       this.partitionId = partitionId;
       this.colName = colName;
+      this.engine = engine;
     }
 
     @Override
@@ -1527,10 +1552,10 @@ class DirectSqlUpdatePart {
       if (this.partitionId != other.partitionId) {
         return false;
       }
-      if (this.colName.equalsIgnoreCase(other.colName)) {
-        return true;
+      if (!this.colName.equalsIgnoreCase(other.colName)) {
+        return false;
       }
-      return false;
+      return Objects.equals(this.engine, other.engine);
     }
   }
 }
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index 1c18631e1cc..a2807961b75 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -18,10 +18,8 @@
 package org.apache.hadoop.hive.metastore;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.Lists;
 
-import com.google.common.collect.Multimap;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hive.common.AcidMetaDataFile.DataFormat;
@@ -63,6 +61,7 @@ import java.io.IOException;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -352,7 +351,7 @@ public class HiveAlterHandler implements AlterHandler {
 
           // also the location field in partition
           parts = msdb.getPartitions(catName, dbname, name, -1);
-          Multimap<Partition, ColumnStatistics> columnStatsNeedUpdated = 
ArrayListMultimap.create();
+          Map<List<FieldSchema>, List<Partition>> partsByCols = new 
HashMap<>();
           for (Partition part : parts) {
             String oldPartLoc = part.getSd().getLocation();
             if (dataWasMoved && oldPartLoc.contains(oldTblLocPath)) {
@@ -363,44 +362,57 @@ public class HiveAlterHandler implements AlterHandler {
             }
             part.setDbName(newDbName);
             part.setTableName(newTblName);
-            List<ColumnStatistics> multiColStats = 
updateOrGetPartitionColumnStats(msdb, catName, dbname, name,
-                part.getValues(), part.getSd().getCols(), oldt, part, null, 
null);
-            for (ColumnStatistics colStats : multiColStats) {
-              columnStatsNeedUpdated.put(part, colStats);
+            partsByCols.computeIfAbsent(part.getSd().getCols(), k -> new 
ArrayList<>()).add(part);
+          }
+          Map<String, Map<String, ColumnStatistics>> engineToColStats = new 
HashMap<>();
+          if (rename) {
+            // If this is the table rename, get the partition column 
statistics first
+            for (Map.Entry<List<FieldSchema>, List<Partition>> entry : 
partsByCols.entrySet()) {
+              List<String> colNames = entry.getKey().stream().map(fs -> 
fs.getName()).collect(Collectors.toList());
+              List<String> partNames = new ArrayList<>();
+              for (Partition part : entry.getValue()) {
+                partNames.add(Warehouse.makePartName(oldt.getPartitionKeys(), 
part.getValues()));
+              }
+              List<List<ColumnStatistics>> colStats =
+                  msdb.getPartitionColumnStatistics(catName, dbname, name, 
partNames, colNames);
+              for (List<ColumnStatistics> cs : colStats) {
+                if (cs != null && !cs.isEmpty()) {
+                  String engine = cs.get(0).getEngine();
+                  cs.stream().forEach(stats -> {
+                    stats.getStatsDesc().setDbName(newDbName);
+                    stats.getStatsDesc().setTableName(newTblName);
+                    String partName = stats.getStatsDesc().getPartName();
+                    engineToColStats.computeIfAbsent(engine, key -> new 
HashMap<>()).put(partName, stats);
+                  });
+                }
+              }
             }
           }
           // Do not verify stats parameters on a partitioned table.
           msdb.alterTable(catName, dbname, name, newt, null);
+          int partitionBatchSize = MetastoreConf.getIntVar(handler.getConf(),
+              MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
+          String catalogName = catName;
           // alterPartition is only for changing the partition location in the 
table rename
           if (dataWasMoved) {
-
-            int partsToProcess = parts.size();
-            int partitionBatchSize = MetastoreConf.getIntVar(handler.getConf(),
-                MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
-            int batchStart = 0;
-            while (partsToProcess > 0) {
-              int batchEnd = Math.min(batchStart + partitionBatchSize, 
parts.size());
-              List<Partition> partBatch = parts.subList(batchStart, batchEnd);
-              int partBatchSize = partBatch.size();
-              partsToProcess -= partBatchSize;
-              batchStart += partBatchSize;
-              List<List<String>> partValues = new ArrayList<>(partBatchSize);
-              for (Partition part : partBatch) {
-                partValues.add(part.getValues());
+            Batchable.runBatched(partitionBatchSize, parts, new 
Batchable<Partition, Void>() {
+              @Override
+              public List<Void> run(List<Partition> input) throws Exception {
+                msdb.alterPartitions(catalogName, newDbName, newTblName,
+                    
input.stream().map(Partition::getValues).collect(Collectors.toList()),
+                    input, newt.getWriteId(), writeIdList);
+                return Collections.emptyList();
               }
-              msdb.alterPartitions(catName, newDbName, newTblName, partValues,
-                  partBatch, newt.getWriteId(), writeIdList);
-            }
+            });
           }
           Deadline.checkTimeout();
-          Table table = msdb.getTable(catName, newDbName, newTblName);
-          MTable mTable = msdb.ensureGetMTable(catName, newDbName, newTblName);
-          for (Entry<Partition, ColumnStatistics> partColStats : 
columnStatsNeedUpdated.entries()) {
-            ColumnStatistics newPartColStats = partColStats.getValue();
-            newPartColStats.getStatsDesc().setDbName(newDbName);
-            newPartColStats.getStatsDesc().setTableName(newTblName);
-            msdb.updatePartitionColumnStatistics(table, mTable, 
newPartColStats, 
-                partColStats.getKey().getValues(), writeIdList, 
newt.getWriteId());
+          if (rename) {
+            for (Entry<String, Map<String, ColumnStatistics>> entry : 
engineToColStats.entrySet()) {
+              // We will send ALTER_TABLE event after the db change, set 
listeners to null so that no extra
+              // event that could pollute the replication will be sent.
+              msdb.updatePartitionColumnStatisticsInBatch(entry.getValue(), 
oldt,
+                  null, writeIdList, newt.getWriteId());
+            }
           }
         } else {
           msdb.alterTable(catName, dbname, name, newt, writeIdList);
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
index 5848abd2064..163c855833e 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
@@ -288,11 +288,13 @@ public class StatObjectConverter {
     if (mStatsObj.getNumNulls() != null) {
       setStmt.append("\"NUM_NULLS\" = ? ,");
     }
-    setStmt.append("\"ENGINE\" = ? ");
+    setStmt.append("\"ENGINE\" = ? ,");
+    setStmt.append("\"DB_NAME\" = ? ,");
+    setStmt.append("\"TABLE_NAME\" = ? ");
     return setStmt.toString();
   }
 
-  public static void initUpdatedColumnStatement(MPartitionColumnStatistics 
mStatsObj,
+  public static int initUpdatedColumnStatement(MPartitionColumnStatistics 
mStatsObj,
                                                       PreparedStatement pst) 
throws SQLException {
     int colIdx = 1;
     if (mStatsObj.getAvgColLen() != null) {
@@ -339,6 +341,9 @@ public class StatObjectConverter {
       pst.setObject(colIdx++, mStatsObj.getNumNulls());
     }
     pst.setString(colIdx++, mStatsObj.getEngine());
+    pst.setString(colIdx++, mStatsObj.getDbName());
+    pst.setString(colIdx++, mStatsObj.getTableName());
+    return colIdx;
   }
 
   public static ColumnStatisticsObj getTableColumnStatisticsObj(


Reply via email to