bowenli86 commented on a change in pull request #8636:
[FLINK-12237][hive]Support Hive table stats related operations in HiveCatalog
URL: https://github.com/apache/flink/pull/8636#discussion_r290872434
##########
File path:
flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/table/catalog/hive/HiveCatalog.java
##########
@@ -1055,46 +1058,186 @@ private static Function
instantiateHiveFunction(ObjectPath functionPath, HiveCat
);
}
+ private boolean isTablePartitioned(Table hiveTable) {
+ return hiveTable.getPartitionKeysSize() != 0;
+ }
+
// ------ stats ------
@Override
public void alterTableStatistics(ObjectPath tablePath,
CatalogTableStatistics tableStatistics, boolean ignoreIfNotExists) throws
TableNotExistException, CatalogException {
-
+ try {
+ Table hiveTable = getHiveTable(tablePath);
+ // Set table stats
+ if (needUpdateStatistics(hiveTable.getParameters(),
tableStatistics)) {
+ updateStatisticsParameters(tableStatistics,
hiveTable.getParameters());
+ client.alter_table(tablePath.getDatabaseName(),
tablePath.getObjectName(), hiveTable);
+ }
+ } catch (TableNotExistException e) {
+ if (!ignoreIfNotExists) {
+ throw e;
+ }
+ } catch (TException e) {
+ throw new CatalogException(String.format("Failed to
alter table stats of table %s", tablePath.getFullName()), e);
+ }
}
@Override
public void alterTableColumnStatistics(ObjectPath tablePath,
CatalogColumnStatistics columnStatistics, boolean ignoreIfNotExists) throws
TableNotExistException, CatalogException {
+ try {
+ Table hiveTable = getHiveTable(tablePath);
+ // Set table column stats. This only works for
non-partitioned tables.
+ if (!isTablePartitioned(hiveTable)) {
+
client.updateTableColumnStatistics(HiveCatalogUtil.createColumnStats(hiveTable,
columnStatistics.getColumnStatisticsData()));
+ }
+ } catch (TableNotExistException e) {
+ if (!ignoreIfNotExists) {
+ throw e;
+ }
+ } catch (TException e) {
+ throw new CatalogException(String.format("Failed to
alter table column stats of table %s", tablePath.getFullName()), e);
+ }
+ }
+ private static boolean needUpdateStatistics(Map<String, String>
oldParameters, CatalogTableStatistics statistics) {
+ String oldRowCount =
oldParameters.getOrDefault(StatsSetupConst.ROW_COUNT, "0");
+ String oldTotalSize =
oldParameters.getOrDefault(StatsSetupConst.TOTAL_SIZE, "0");
+ String oldNumFiles =
oldParameters.getOrDefault(StatsSetupConst.NUM_FILES, "0");
+ String oldRawDataSize =
oldParameters.getOrDefault(StatsSetupConst.RAW_DATA_SIZE, "0");
+ return statistics.getRowCount() != Long.parseLong(oldRowCount)
|| statistics.getTotalSize() != Long.parseLong(oldTotalSize)
+ || statistics.getFileCount() !=
Integer.parseInt(oldNumFiles) || statistics.getRawDataSize() !=
Long.parseLong(oldRawDataSize);
+ }
+
+ private static void updateStatisticsParameters(CatalogTableStatistics
tableStatistics, Map<String, String> parameters) {
+ parameters.put(StatsSetupConst.ROW_COUNT,
String.valueOf(tableStatistics.getRowCount()));
+ parameters.put(StatsSetupConst.TOTAL_SIZE,
String.valueOf(tableStatistics.getTotalSize()));
+ parameters.put(StatsSetupConst.NUM_FILES,
String.valueOf(tableStatistics.getFileCount()));
+ parameters.put(StatsSetupConst.RAW_DATA_SIZE,
String.valueOf(tableStatistics.getRawDataSize()));
+ }
+
+ private static CatalogTableStatistics
createCatalogTableStatistics(Map<String, String> parameters) {
+ long rowRount =
Long.parseLong(parameters.getOrDefault(StatsSetupConst.ROW_COUNT, "0"));
+ long totalSize =
Long.parseLong(parameters.getOrDefault(StatsSetupConst.TOTAL_SIZE, "0"));
+ int numFiles =
Integer.parseInt(parameters.getOrDefault(StatsSetupConst.NUM_FILES, "0"));
+ long rawDataSize =
Long.parseLong(parameters.getOrDefault(StatsSetupConst.RAW_DATA_SIZE, "0"));
+ return new CatalogTableStatistics(rowRount, numFiles,
totalSize, rawDataSize);
}
@Override
public void alterPartitionStatistics(ObjectPath tablePath,
CatalogPartitionSpec partitionSpec, CatalogTableStatistics partitionStatistics,
boolean ignoreIfNotExists) throws PartitionNotExistException, CatalogException {
-
+ try {
+ Partition hivePartition = getHivePartition(tablePath,
partitionSpec);
+ // Set table stats
+ if (needUpdateStatistics(hivePartition.getParameters(),
partitionStatistics)) {
+ updateStatisticsParameters(partitionStatistics,
hivePartition.getParameters());
+
client.alter_partition(tablePath.getDatabaseName(), tablePath.getObjectName(),
hivePartition);
+ }
+ } catch (TableNotExistException | PartitionSpecInvalidException
e) {
+ throw new PartitionNotExistException(getName(),
tablePath, partitionSpec);
Review comment:
include the root case exception `e`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services