zhangbutao commented on code in PR #6126:
URL: https://github.com/apache/hive/pull/6126#discussion_r2443943777
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -461,9 +475,6 @@ public List<Void> run(List<Partition> input) throws
Exception {
}
}
- //HIVE-26504: Table columns stats may exist even for partitioned tables,
so it must be updated in all cases
- updateTableColumnStats(msdb, newt, writeIdList, columnStatistics);
Review Comment:
Good change. `Table rename` can skip to update stats.
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -1031,61 +1028,23 @@ private Path constructRenamedPath(Path defaultNewPath,
Path currentPath) {
defaultNewPath.toUri().getPath());
}
- public static List<ColumnStatistics> getColumnStats(RawStore msdb, Table
oldTable)
- throws NoSuchObjectException, MetaException {
- String catName = normalizeIdentifier(oldTable.isSetCatName()
- ? oldTable.getCatName()
- : getDefaultCatalog(msdb.getConf()));
- String dbName = oldTable.getDbName().toLowerCase();
- String tableName = normalizeIdentifier(oldTable.getTableName());
- List<String> columnNames =
oldTable.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
- return msdb.getTableColumnStatistics(catName, dbName, tableName,
columnNames);
- }
-
@VisibleForTesting
- public static List<ColumnStatistics> deleteTableColumnStats(RawStore msdb,
Table oldTable, Table newTable, List<ColumnStatistics> multiColStats)
+ public void deleteTableColumnStats(RawStore msdb, Table oldTable, Table
newTable)
throws InvalidObjectException, MetaException {
- List<ColumnStatistics> newMultiColStats = new ArrayList<>();
try {
String catName = normalizeIdentifier(oldTable.isSetCatName()
? oldTable.getCatName()
: getDefaultCatalog(msdb.getConf()));
String dbName = oldTable.getDbName().toLowerCase();
String tableName = normalizeIdentifier(oldTable.getTableName());
- String newDbName = newTable.getDbName().toLowerCase();
- String newTableName = normalizeIdentifier(newTable.getTableName());
- List<FieldSchema> oldTableCols = oldTable.getSd().getCols();
- List<FieldSchema> newTableCols = newTable.getSd().getCols();
-
- boolean nameChanged = !newDbName.equals(dbName) ||
!newTableName.equals(tableName);
-
- if ((nameChanged ||
!MetaStoreServerUtils.columnsIncludedByNameType(oldTableCols, newTableCols)) &&
+ List<String> staleColumns = findStaleColumns(oldTable.getSd().getCols(),
newTable.getSd().getCols());
+ if (!staleColumns.isEmpty() &&
// Don't bother in the case of ACID conversion.
TxnUtils.isAcidTable(oldTable) == TxnUtils.isAcidTable(newTable)) {
- for (ColumnStatistics colStats : multiColStats) {
- List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
- List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
-
- if (statsObjs != null) {
- for (ColumnStatisticsObj statsObj : statsObjs) {
- boolean found = newTableCols.stream().anyMatch(c ->
statsObj.getColName().equalsIgnoreCase(c.getName()) &&
- statsObj.getColType().equalsIgnoreCase(c.getType()));
- if (nameChanged || !found) {
Review Comment:
Good change. `Table rename` can skip to update stats.
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -1094,120 +1053,33 @@ public static List<ColumnStatistics>
deleteTableColumnStats(RawStore msdb, Table
//should not happen since the input were verified before passed in
throw new InvalidObjectException("Invalid inputs to update table column
stats: " + e);
}
- return newMultiColStats;
- }
-
- @VisibleForTesting
- public void updateTableColumnStats(RawStore msdb, Table newTable, String
validWriteIds, List<ColumnStatistics> columnStatistics)
- throws MetaException, InvalidObjectException {
- Deadline.checkTimeout();
- // Change to new table and append stats for the new table
- for (ColumnStatistics colStats : columnStatistics) {
- try {
- msdb.updateTableColumnStatistics(colStats, validWriteIds,
newTable.getWriteId());
- } catch (NoSuchObjectException nsoe) {
- LOG.debug("Could not find db entry." + nsoe);
- } catch (InvalidInputException e) {
- //should not happen since the input were verified before passed in
- throw new InvalidObjectException("Invalid inputs to update table
column stats: " + e);
- }
- }
}
- public static List<ColumnStatisticsObj>
filterColumnStatsForTableColumns(List<FieldSchema> columns, ColumnStatistics
colStats) {
- return colStats.getStatsObj()
- .stream()
- .filter(o -> columns
- .stream()
- .anyMatch(column ->
o.getColName().equalsIgnoreCase(column.getName()) &&
o.getColType().equalsIgnoreCase(column.getType())))
- .collect(Collectors.toList());
- }
-
- public static List<ColumnStatistics> updateOrGetPartitionColumnStats(
+ public static void updateOrGetPartitionColumnStats(
RawStore msdb, String catName, String dbname, String tblname,
List<String> partVals,
- List<FieldSchema> oldCols, Table table, Partition part,
List<FieldSchema> newCols, List<String> deletedCols)
+ List<FieldSchema> oldCols, Table table, Partition part, List<String>
deletedCols)
throws MetaException, InvalidObjectException {
- List<ColumnStatistics> newPartsColStats = new ArrayList<>();
- boolean updateColumnStats = true;
try {
- // if newCols are not specified, use default ones.
- if (newCols == null) {
- newCols = part.getSd() == null ? new ArrayList<>() :
part.getSd().getCols();
- }
+ List<FieldSchema> newCols = part.getSd().getCols();
String oldPartName = Warehouse.makePartName(table.getPartitionKeys(),
partVals);
- String newPartName = Warehouse.makePartName(table.getPartitionKeys(),
part.getValues());
- boolean rename = !part.getDbName().equals(dbname) ||
!part.getTableName().equals(tblname)
- || !oldPartName.equals(newPartName);
-
- // do not need to update column stats if alter partition is not for
rename or changing existing columns
- if (!rename && MetaStoreServerUtils.columnsIncludedByNameType(oldCols,
newCols)) {
- return newPartsColStats;
+ // do not need to update column stats if existing columns haven't been
changed
+ List<String> staleColumns = findStaleColumns(oldCols, newCols);
+ if (staleColumns.isEmpty()) {
+ return;
}
- List<String> oldColNames = new ArrayList<>(oldCols.size());
- for (FieldSchema oldCol : oldCols) {
- oldColNames.add(oldCol.getName());
+ if (deletedCols == null) {
+ msdb.deletePartitionColumnStatistics(catName, dbname, tblname,
Lists.newArrayList(oldPartName), staleColumns, null);
+ } else {
+ deletedCols.addAll(staleColumns);
}
- List<String> oldPartNames = Lists.newArrayList(oldPartName);
- // TODO: doesn't take txn stats into account. This method can only
remove stats.
- List<List<ColumnStatistics>> multiPartsColStats =
msdb.getPartitionColumnStatistics(catName, dbname, tblname,
- oldPartNames, oldColNames);
- for (List<ColumnStatistics> partsColStats : multiPartsColStats) {
- assert (partsColStats.size() <= 1);
-
- // for out para, this value is initialized by caller.
- if (deletedCols == null) {
- deletedCols = new ArrayList<>();
- } else {
- // in case deletedCols is provided by caller, stats will be updated
by caller.
- updateColumnStats = false;
- }
- for (ColumnStatistics partColStats : partsColStats) { //actually only
at most one loop
- List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
- List<ColumnStatisticsObj> statsObjs = partColStats.getStatsObj();
- for (ColumnStatisticsObj statsObj : statsObjs) {
- boolean found = false;
- for (FieldSchema newCol : newCols) {
- if (statsObj.getColName().equalsIgnoreCase(newCol.getName())
- && statsObj.getColType().equalsIgnoreCase(newCol.getType()))
{
- found = true;
- break;
- }
- }
- Deadline.checkTimeout();
- if (found) {
- if (rename) {
Review Comment:
Good change. `Table rename` can skip to update partition stats.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]