wecharyu commented on code in PR #6126:
URL: https://github.com/apache/hive/pull/6126#discussion_r2436504444
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -1031,61 +1029,28 @@ private Path constructRenamedPath(Path defaultNewPath,
Path currentPath) {
defaultNewPath.toUri().getPath());
}
- public static List<ColumnStatistics> getColumnStats(RawStore msdb, Table
oldTable)
- throws NoSuchObjectException, MetaException {
- String catName = normalizeIdentifier(oldTable.isSetCatName()
- ? oldTable.getCatName()
- : getDefaultCatalog(msdb.getConf()));
- String dbName = oldTable.getDbName().toLowerCase();
- String tableName = normalizeIdentifier(oldTable.getTableName());
- List<String> columnNames =
oldTable.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
- return msdb.getTableColumnStatistics(catName, dbName, tableName,
columnNames);
- }
-
@VisibleForTesting
- public static List<ColumnStatistics> deleteTableColumnStats(RawStore msdb,
Table oldTable, Table newTable, List<ColumnStatistics> multiColStats)
+ public void deleteTableColumnStats(RawStore msdb, Table oldTable, Table
newTable)
throws InvalidObjectException, MetaException {
- List<ColumnStatistics> newMultiColStats = new ArrayList<>();
try {
String catName = normalizeIdentifier(oldTable.isSetCatName()
? oldTable.getCatName()
: getDefaultCatalog(msdb.getConf()));
String dbName = oldTable.getDbName().toLowerCase();
String tableName = normalizeIdentifier(oldTable.getTableName());
- String newDbName = newTable.getDbName().toLowerCase();
- String newTableName = normalizeIdentifier(newTable.getTableName());
List<FieldSchema> oldTableCols = oldTable.getSd().getCols();
List<FieldSchema> newTableCols = newTable.getSd().getCols();
-
- boolean nameChanged = !newDbName.equals(dbName) ||
!newTableName.equals(tableName);
-
- if ((nameChanged ||
!MetaStoreServerUtils.columnsIncludedByNameType(oldTableCols, newTableCols)) &&
+ if (!columnsIncludedByNameType(oldTableCols, newTableCols) ||
+ !MetaStoreServerUtils.arePrefixColumns(oldTableCols, newTableCols) &&
Review Comment:
This case seems redundant, we only need delete removed columns' stats, which
is included in `!columnsIncludedByNameType(oldTableCols, newTableCols)`.
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -1031,61 +1029,28 @@ private Path constructRenamedPath(Path defaultNewPath,
Path currentPath) {
defaultNewPath.toUri().getPath());
}
- public static List<ColumnStatistics> getColumnStats(RawStore msdb, Table
oldTable)
- throws NoSuchObjectException, MetaException {
- String catName = normalizeIdentifier(oldTable.isSetCatName()
- ? oldTable.getCatName()
- : getDefaultCatalog(msdb.getConf()));
- String dbName = oldTable.getDbName().toLowerCase();
- String tableName = normalizeIdentifier(oldTable.getTableName());
- List<String> columnNames =
oldTable.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
- return msdb.getTableColumnStatistics(catName, dbName, tableName,
columnNames);
- }
-
@VisibleForTesting
- public static List<ColumnStatistics> deleteTableColumnStats(RawStore msdb,
Table oldTable, Table newTable, List<ColumnStatistics> multiColStats)
+ public void deleteTableColumnStats(RawStore msdb, Table oldTable, Table
newTable)
throws InvalidObjectException, MetaException {
- List<ColumnStatistics> newMultiColStats = new ArrayList<>();
try {
String catName = normalizeIdentifier(oldTable.isSetCatName()
? oldTable.getCatName()
: getDefaultCatalog(msdb.getConf()));
String dbName = oldTable.getDbName().toLowerCase();
String tableName = normalizeIdentifier(oldTable.getTableName());
- String newDbName = newTable.getDbName().toLowerCase();
- String newTableName = normalizeIdentifier(newTable.getTableName());
List<FieldSchema> oldTableCols = oldTable.getSd().getCols();
List<FieldSchema> newTableCols = newTable.getSd().getCols();
-
- boolean nameChanged = !newDbName.equals(dbName) ||
!newTableName.equals(tableName);
-
- if ((nameChanged ||
!MetaStoreServerUtils.columnsIncludedByNameType(oldTableCols, newTableCols)) &&
+ if (!columnsIncludedByNameType(oldTableCols, newTableCols) ||
+ !MetaStoreServerUtils.arePrefixColumns(oldTableCols, newTableCols) &&
// Don't bother in the case of ACID conversion.
TxnUtils.isAcidTable(oldTable) == TxnUtils.isAcidTable(newTable)) {
- for (ColumnStatistics colStats : multiColStats) {
- List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
- List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
-
- if (statsObjs != null) {
- for (ColumnStatisticsObj statsObj : statsObjs) {
- boolean found = newTableCols.stream().anyMatch(c ->
statsObj.getColName().equalsIgnoreCase(c.getName()) &&
- statsObj.getColType().equalsIgnoreCase(c.getType()));
- if (nameChanged || !found) {
- msdb.deleteTableColumnStatistics(catName,
oldTable.getDbName().toLowerCase(),
- normalizeIdentifier(oldTable.getTableName()),
statsObj.getColName(), colStats.getEngine());
- }
- if (found) {
- newStatsObjs.add(statsObj);
- }
- }
- StatsSetupConst.removeColumnStatsState(newTable.getParameters(),
-
statsObjs.stream().map(ColumnStatisticsObj::getColName).collect(Collectors.toList()));
- }
- ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
- statsDesc.setDbName(newDbName);
- statsDesc.setTableName(newTableName);
- colStats.setStatsObj(newStatsObjs);
- newMultiColStats.add(colStats);
+ List<String> deletedCols = oldTableCols.stream().filter(
+ c -> newTableCols.stream().noneMatch(n ->
columnsIncludedByNameType(Arrays.asList(c), Arrays.asList(n))))
Review Comment:
Can we use a method to compare two columns excluding comment here instead of
constructing arrays?
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java:
##########
@@ -209,51 +211,45 @@ private void initSharedCache(Configuration conf) {
return sharedCache;
}
- private static ColumnStatistics updateStatsForAlterPart(RawStore rawStore,
Table before, String catalogName,
+ private static void updateStatsForAlterPart(RawStore rawStore, Table before,
String catalogName,
String dbName, String tableName, Partition part) throws Exception {
List<String> deletedCols = new ArrayList<>();
- List<ColumnStatistics> multiColumnStats = HiveAlterHandler
- .updateOrGetPartitionColumnStats(rawStore, catalogName, dbName,
tableName, part.getValues(),
- part.getSd().getCols(), before, part, null, deletedCols);
- if (multiColumnStats.size() > 1) {
- throw new RuntimeException("CachedStore can only be enabled for Hive
engine");
+ // if this is the table rename, change the cache
Review Comment:
Do we support change the table name of a partition?
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -754,24 +766,10 @@ public Partition alterPartition(RawStore msdb, Warehouse
wh, String catName, Str
new_part, tbl, wh, false, true, environmentContext, false);
}
- String newPartName = Warehouse.makePartName(tbl.getPartitionKeys(),
new_part.getValues());
- List<ColumnStatistics> multiColumnStats =
updateOrGetPartitionColumnStats(msdb, catName, dbname, name,
oldPart.getValues(),
- oldPart.getSd().getCols(), tbl, new_part, null, null);
- msdb.alterPartition(catName, dbname, name, part_vals, new_part,
validWriteIds);
- if (!multiColumnStats.isEmpty()) {
- for (ColumnStatistics cs : multiColumnStats) {
- cs.getStatsDesc().setPartName(newPartName);
- try {
- msdb.updatePartitionColumnStatistics(tbl, mTable, cs,
new_part.getValues(),
- validWriteIds, new_part.getWriteId());
- } catch (InvalidInputException iie) {
- throw new InvalidOperationException("Unable to update partition
stats in table rename." + iie);
- } catch (NoSuchObjectException nsoe) {
- // It is ok, ignore
- }
- }
- }
+ updateOrGetPartitionColumnStats(msdb, catName, dbname, name,
oldPart.getValues(),
Review Comment:
It seems only delete all column stats of this partition here, but not update
the latest new partition column stats.
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -1094,120 +1059,37 @@ public static List<ColumnStatistics>
deleteTableColumnStats(RawStore msdb, Table
//should not happen since the input were verified before passed in
throw new InvalidObjectException("Invalid inputs to update table column
stats: " + e);
}
- return newMultiColStats;
- }
-
- @VisibleForTesting
- public void updateTableColumnStats(RawStore msdb, Table newTable, String
validWriteIds, List<ColumnStatistics> columnStatistics)
- throws MetaException, InvalidObjectException {
- Deadline.checkTimeout();
- // Change to new table and append stats for the new table
- for (ColumnStatistics colStats : columnStatistics) {
- try {
- msdb.updateTableColumnStatistics(colStats, validWriteIds,
newTable.getWriteId());
- } catch (NoSuchObjectException nsoe) {
- LOG.debug("Could not find db entry." + nsoe);
- } catch (InvalidInputException e) {
- //should not happen since the input were verified before passed in
- throw new InvalidObjectException("Invalid inputs to update table
column stats: " + e);
- }
- }
}
- public static List<ColumnStatisticsObj>
filterColumnStatsForTableColumns(List<FieldSchema> columns, ColumnStatistics
colStats) {
- return colStats.getStatsObj()
- .stream()
- .filter(o -> columns
- .stream()
- .anyMatch(column ->
o.getColName().equalsIgnoreCase(column.getName()) &&
o.getColType().equalsIgnoreCase(column.getType())))
- .collect(Collectors.toList());
- }
-
- public static List<ColumnStatistics> updateOrGetPartitionColumnStats(
+ public static void updateOrGetPartitionColumnStats(
RawStore msdb, String catName, String dbname, String tblname,
List<String> partVals,
- List<FieldSchema> oldCols, Table table, Partition part,
List<FieldSchema> newCols, List<String> deletedCols)
+ List<FieldSchema> oldCols, Table table, Partition part, List<String>
deletedCols)
throws MetaException, InvalidObjectException {
- List<ColumnStatistics> newPartsColStats = new ArrayList<>();
- boolean updateColumnStats = true;
try {
- // if newCols are not specified, use default ones.
- if (newCols == null) {
- newCols = part.getSd() == null ? new ArrayList<>() :
part.getSd().getCols();
- }
+ List<FieldSchema> newCols = part.getSd().getCols();
String oldPartName = Warehouse.makePartName(table.getPartitionKeys(),
partVals);
- String newPartName = Warehouse.makePartName(table.getPartitionKeys(),
part.getValues());
- boolean rename = !part.getDbName().equals(dbname) ||
!part.getTableName().equals(tblname)
- || !oldPartName.equals(newPartName);
-
- // do not need to update column stats if alter partition is not for
rename or changing existing columns
- if (!rename && MetaStoreServerUtils.columnsIncludedByNameType(oldCols,
newCols)) {
- return newPartsColStats;
+ // do not need to update column stats if existing columns haven't been
changed
+ if (columnsIncludedByNameType(oldCols, newCols) ||
+ MetaStoreServerUtils.arePrefixColumns(oldCols, newCols)) {
Review Comment:
ditto, redundant case.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]