bowenli86 commented on a change in pull request #8636: 
[FLINK-12237][hive]Support Hive table stats related operations in HiveCatalog
URL: https://github.com/apache/flink/pull/8636#discussion_r290872434
 
 

 ##########
 File path: 
flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/table/catalog/hive/HiveCatalog.java
 ##########
 @@ -1055,46 +1058,186 @@ private static Function 
instantiateHiveFunction(ObjectPath functionPath, HiveCat
                );
        }
 
+       private boolean isTablePartitioned(Table hiveTable) {
+               return hiveTable.getPartitionKeysSize() != 0;
+       }
+
        // ------ stats ------
 
        @Override
        public void alterTableStatistics(ObjectPath tablePath, 
CatalogTableStatistics tableStatistics, boolean ignoreIfNotExists) throws 
TableNotExistException, CatalogException {
-
+               try {
+                       Table hiveTable = getHiveTable(tablePath);
+                       // Set table stats
+                       if (needUpdateStatistics(hiveTable.getParameters(), 
tableStatistics)) {
+                               updateStatisticsParameters(tableStatistics, 
hiveTable.getParameters());
+                               client.alter_table(tablePath.getDatabaseName(), 
tablePath.getObjectName(), hiveTable);
+                       }
+               } catch (TableNotExistException e) {
+                       if (!ignoreIfNotExists) {
+                               throw e;
+                       }
+               } catch (TException e) {
+                       throw new CatalogException(String.format("Failed to 
alter table stats of table %s", tablePath.getFullName()), e);
+               }
        }
 
        @Override
        public void alterTableColumnStatistics(ObjectPath tablePath, 
CatalogColumnStatistics columnStatistics, boolean ignoreIfNotExists) throws 
TableNotExistException, CatalogException {
+               try {
+                       Table hiveTable = getHiveTable(tablePath);
+                       // Set table column stats. This only works for 
non-partitioned tables.
+                       if (!isTablePartitioned(hiveTable)) {
+                               
client.updateTableColumnStatistics(HiveCatalogUtil.createColumnStats(hiveTable, 
columnStatistics.getColumnStatisticsData()));
+                       }
+               } catch (TableNotExistException e) {
+                       if (!ignoreIfNotExists) {
+                               throw e;
+                       }
+               } catch (TException e) {
+                       throw new CatalogException(String.format("Failed to 
alter table column stats of table %s", tablePath.getFullName()), e);
+               }
+       }
 
+       private static boolean needUpdateStatistics(Map<String, String> 
oldParameters, CatalogTableStatistics statistics) {
+               String oldRowCount = 
oldParameters.getOrDefault(StatsSetupConst.ROW_COUNT, "0");
+               String oldTotalSize = 
oldParameters.getOrDefault(StatsSetupConst.TOTAL_SIZE, "0");
+               String oldNumFiles = 
oldParameters.getOrDefault(StatsSetupConst.NUM_FILES, "0");
+               String oldRawDataSize = 
oldParameters.getOrDefault(StatsSetupConst.RAW_DATA_SIZE, "0");
+               return statistics.getRowCount() != Long.parseLong(oldRowCount) 
|| statistics.getTotalSize() != Long.parseLong(oldTotalSize)
+                               || statistics.getFileCount() != 
Integer.parseInt(oldNumFiles) || statistics.getRawDataSize() != 
Long.parseLong(oldRawDataSize);
+       }
+
+       private static void updateStatisticsParameters(CatalogTableStatistics 
tableStatistics, Map<String, String> parameters) {
+               parameters.put(StatsSetupConst.ROW_COUNT, 
String.valueOf(tableStatistics.getRowCount()));
+               parameters.put(StatsSetupConst.TOTAL_SIZE, 
String.valueOf(tableStatistics.getTotalSize()));
+               parameters.put(StatsSetupConst.NUM_FILES, 
String.valueOf(tableStatistics.getFileCount()));
+               parameters.put(StatsSetupConst.RAW_DATA_SIZE, 
String.valueOf(tableStatistics.getRawDataSize()));
+       }
+
+       private static CatalogTableStatistics 
createCatalogTableStatistics(Map<String, String> parameters) {
+               long rowRount = 
Long.parseLong(parameters.getOrDefault(StatsSetupConst.ROW_COUNT, "0"));
+               long totalSize = 
Long.parseLong(parameters.getOrDefault(StatsSetupConst.TOTAL_SIZE, "0"));
+               int numFiles = 
Integer.parseInt(parameters.getOrDefault(StatsSetupConst.NUM_FILES, "0"));
+               long rawDataSize = 
Long.parseLong(parameters.getOrDefault(StatsSetupConst.RAW_DATA_SIZE, "0"));
+               return new CatalogTableStatistics(rowRount, numFiles, 
totalSize, rawDataSize);
        }
 
        @Override
        public void alterPartitionStatistics(ObjectPath tablePath, 
CatalogPartitionSpec partitionSpec, CatalogTableStatistics partitionStatistics, 
boolean ignoreIfNotExists) throws PartitionNotExistException, CatalogException {
-
+               try {
+                       Partition hivePartition = getHivePartition(tablePath, 
partitionSpec);
+                       // Set table stats
+                       if (needUpdateStatistics(hivePartition.getParameters(), 
partitionStatistics)) {
+                               updateStatisticsParameters(partitionStatistics, 
hivePartition.getParameters());
+                               
client.alter_partition(tablePath.getDatabaseName(), tablePath.getObjectName(), 
hivePartition);
+                       }
+               } catch (TableNotExistException | PartitionSpecInvalidException 
e) {
+                       throw new PartitionNotExistException(getName(), 
tablePath, partitionSpec);
 
 Review comment:
   include the root case exception `e`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to