zhangbutao commented on code in PR #6126:
URL: https://github.com/apache/hive/pull/6126#discussion_r2443943777


##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -461,9 +475,6 @@ public List<Void> run(List<Partition> input) throws 
Exception {
         }
       }
 
-      //HIVE-26504: Table columns stats may exist even for partitioned tables, 
so it must be updated in all cases
-      updateTableColumnStats(msdb, newt, writeIdList, columnStatistics);

Review Comment:
   Good change. `Table rename` can skip to update stats. 



##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -1031,61 +1028,23 @@ private Path constructRenamedPath(Path defaultNewPath, 
Path currentPath) {
         defaultNewPath.toUri().getPath());
   }
 
-  public static List<ColumnStatistics> getColumnStats(RawStore msdb, Table 
oldTable)
-      throws NoSuchObjectException, MetaException {
-    String catName = normalizeIdentifier(oldTable.isSetCatName()
-        ? oldTable.getCatName()
-        : getDefaultCatalog(msdb.getConf()));
-    String dbName = oldTable.getDbName().toLowerCase();
-    String tableName = normalizeIdentifier(oldTable.getTableName());
-    List<String> columnNames = 
oldTable.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
-    return msdb.getTableColumnStatistics(catName, dbName, tableName, 
columnNames);
-  }
-
   @VisibleForTesting
-  public static List<ColumnStatistics> deleteTableColumnStats(RawStore msdb, 
Table oldTable, Table newTable, List<ColumnStatistics> multiColStats)
+  public void deleteTableColumnStats(RawStore msdb, Table oldTable, Table 
newTable)
       throws InvalidObjectException, MetaException {
-    List<ColumnStatistics> newMultiColStats = new ArrayList<>();
     try {
       String catName = normalizeIdentifier(oldTable.isSetCatName()
           ? oldTable.getCatName()
           : getDefaultCatalog(msdb.getConf()));
       String dbName = oldTable.getDbName().toLowerCase();
       String tableName = normalizeIdentifier(oldTable.getTableName());
-      String newDbName = newTable.getDbName().toLowerCase();
-      String newTableName = normalizeIdentifier(newTable.getTableName());
-      List<FieldSchema> oldTableCols = oldTable.getSd().getCols();
-      List<FieldSchema> newTableCols = newTable.getSd().getCols();
-
-      boolean nameChanged = !newDbName.equals(dbName) || 
!newTableName.equals(tableName);
-
-      if ((nameChanged || 
!MetaStoreServerUtils.columnsIncludedByNameType(oldTableCols, newTableCols)) &&
+      List<String> staleColumns = findStaleColumns(oldTable.getSd().getCols(), 
newTable.getSd().getCols());
+      if (!staleColumns.isEmpty() &&
           // Don't bother in the case of ACID conversion.
           TxnUtils.isAcidTable(oldTable) == TxnUtils.isAcidTable(newTable)) {
-        for (ColumnStatistics colStats : multiColStats) {
-          List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
-          List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
-
-          if (statsObjs != null) {
-            for (ColumnStatisticsObj statsObj : statsObjs) {
-              boolean found = newTableCols.stream().anyMatch(c -> 
statsObj.getColName().equalsIgnoreCase(c.getName()) &&
-                  statsObj.getColType().equalsIgnoreCase(c.getType()));
-              if (nameChanged || !found) {

Review Comment:
   Good change. `Table rename` can skip to update stats.



##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -1094,120 +1053,33 @@ public static List<ColumnStatistics> 
deleteTableColumnStats(RawStore msdb, Table
       //should not happen since the input were verified before passed in
       throw new InvalidObjectException("Invalid inputs to update table column 
stats: " + e);
     }
-    return newMultiColStats;
-  }
-
-  @VisibleForTesting
-  public void updateTableColumnStats(RawStore msdb, Table newTable, String 
validWriteIds, List<ColumnStatistics> columnStatistics)
-      throws MetaException, InvalidObjectException {
-    Deadline.checkTimeout();
-    // Change to new table and append stats for the new table
-    for (ColumnStatistics colStats : columnStatistics) {
-      try {
-        msdb.updateTableColumnStatistics(colStats, validWriteIds, 
newTable.getWriteId());
-      } catch (NoSuchObjectException nsoe) {
-        LOG.debug("Could not find db entry." + nsoe);
-      } catch (InvalidInputException e) {
-        //should not happen since the input were verified before passed in
-        throw new InvalidObjectException("Invalid inputs to update table 
column stats: " + e);
-      }
-    }
   }
 
-  public static List<ColumnStatisticsObj> 
filterColumnStatsForTableColumns(List<FieldSchema> columns, ColumnStatistics 
colStats) {
-    return colStats.getStatsObj()
-        .stream()
-        .filter(o -> columns
-            .stream()
-            .anyMatch(column -> 
o.getColName().equalsIgnoreCase(column.getName()) && 
o.getColType().equalsIgnoreCase(column.getType())))
-        .collect(Collectors.toList());
-  }
-
-  public static List<ColumnStatistics> updateOrGetPartitionColumnStats(
+  public static void updateOrGetPartitionColumnStats(
       RawStore msdb, String catName, String dbname, String tblname, 
List<String> partVals,
-      List<FieldSchema> oldCols, Table table, Partition part, 
List<FieldSchema> newCols, List<String> deletedCols)
+      List<FieldSchema> oldCols, Table table, Partition part, List<String> 
deletedCols)
           throws MetaException, InvalidObjectException {
-    List<ColumnStatistics> newPartsColStats = new ArrayList<>();
-    boolean updateColumnStats = true;
     try {
-      // if newCols are not specified, use default ones.
-      if (newCols == null) {
-        newCols = part.getSd() == null ? new ArrayList<>() : 
part.getSd().getCols();
-      }
+      List<FieldSchema> newCols  = part.getSd().getCols();
       String oldPartName = Warehouse.makePartName(table.getPartitionKeys(), 
partVals);
-      String newPartName = Warehouse.makePartName(table.getPartitionKeys(), 
part.getValues());
-      boolean rename = !part.getDbName().equals(dbname) || 
!part.getTableName().equals(tblname)
-          || !oldPartName.equals(newPartName);
-
-      // do not need to update column stats if alter partition is not for 
rename or changing existing columns
-      if (!rename && MetaStoreServerUtils.columnsIncludedByNameType(oldCols, 
newCols)) {
-        return newPartsColStats;
+      // do not need to update column stats if existing columns haven't been 
changed
+      List<String> staleColumns = findStaleColumns(oldCols, newCols);
+      if (staleColumns.isEmpty()) {
+        return;
       }
-      List<String> oldColNames = new ArrayList<>(oldCols.size());
-      for (FieldSchema oldCol : oldCols) {
-        oldColNames.add(oldCol.getName());
+      if (deletedCols == null) {
+        msdb.deletePartitionColumnStatistics(catName, dbname, tblname, 
Lists.newArrayList(oldPartName), staleColumns, null);
+      } else {
+        deletedCols.addAll(staleColumns);
       }
-      List<String> oldPartNames = Lists.newArrayList(oldPartName);
-      // TODO: doesn't take txn stats into account. This method can only 
remove stats.
-      List<List<ColumnStatistics>> multiPartsColStats = 
msdb.getPartitionColumnStatistics(catName, dbname, tblname,
-          oldPartNames, oldColNames);
-      for (List<ColumnStatistics> partsColStats : multiPartsColStats) {
-        assert (partsColStats.size() <= 1);
-
-        // for out para, this value is initialized by caller.
-        if (deletedCols == null) {
-          deletedCols = new ArrayList<>();
-        } else {
-          // in case deletedCols is provided by caller, stats will be updated  
by caller.
-          updateColumnStats = false;
-        }
-        for (ColumnStatistics partColStats : partsColStats) { //actually only 
at most one loop
-          List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
-          List<ColumnStatisticsObj> statsObjs = partColStats.getStatsObj();
-          for (ColumnStatisticsObj statsObj : statsObjs) {
-            boolean found = false;
-            for (FieldSchema newCol : newCols) {
-              if (statsObj.getColName().equalsIgnoreCase(newCol.getName())
-                  && statsObj.getColType().equalsIgnoreCase(newCol.getType())) 
{
-                found = true;
-                break;
-              }
-            }
-            Deadline.checkTimeout();
-            if (found) {
-              if (rename) {

Review Comment:
   Good change. `Table rename` can skip to update partition stats.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to