saihemanth-cloudera commented on code in PR #5578: URL: https://github.com/apache/hive/pull/5578#discussion_r1955461189
########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java: ########## @@ -7379,47 +7381,97 @@ public boolean delete_partition_column_statistics(String dbName, String tableNam if (colName != null) { colName = colName.toLowerCase(); } - startFunction("delete_column_statistics_by_partition",": table=" + + DeleteColumnStatisticsRequest request = new DeleteColumnStatisticsRequest(parsedDbName[DB_NAME], tableName); + request.setEngine(engine); + request.setCat_name(parsedDbName[CAT_NAME]); + request.addToCol_names(colName); + request.addToPart_names(partName); + return delete_column_statistics_req(request); + } + + @Override + public boolean delete_column_statistics_req(DeleteColumnStatisticsRequest req) throws TException { + String dbName = normalizeIdentifier(req.getDb_name()); + String tableName = normalizeIdentifier(req.getTbl_name()); + List<String> colNames = req.getCol_names(); + String engine = req.getEngine(); + String[] parsedDbName = parseDbName(dbName, conf); + if (req.getCat_name() != null) { + parsedDbName[CAT_NAME] = normalizeIdentifier(req.getCat_name()); + } + startFunction("delete_column_statistics_req", ": table=" + TableName.getQualified(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName) + - " partition=" + partName + " column=" + colName); + " partitions=" + req.getPart_names() + " column=" + colNames + " engine=" + engine); boolean ret = false, committed = false; - - getMS().openTransaction(); + List<ListenerEvent> events = new ArrayList<>(); + EventType eventType = null; + final RawStore rawStore = getMS(); + rawStore.openTransaction(); try { - List<String> partVals = getPartValsFromName(getMS(), parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, partName); - Table table = getMS().getTable(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName); - // This API looks unused; if it were used we'd need to update stats state and write ID. - // We cannot just randomly nuke some txn stats. + Table table = rawStore.getTable(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName); + boolean isPartitioned = table.getPartitionKeysSize() > 0; if (TxnUtils.isTransactionalTable(table)) { throw new MetaException("Cannot delete stats via this API for a transactional table"); } - - ret = getMS().deletePartitionColumnStatistics(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, - partName, partVals, colName, engine); - if (ret) { - if (transactionalListeners != null && !transactionalListeners.isEmpty()) { - MetaStoreListenerNotifier.notifyEvent(transactionalListeners, - EventType.DELETE_PARTITION_COLUMN_STAT, - new DeletePartitionColumnStatEvent(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, - partName, partVals, colName, engine, this)); + if (!isPartitioned || req.isTableLevel()) { + ret = rawStore.deleteTableColumnStatistics(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, colNames, engine); + if (ret) { + eventType = EventType.DELETE_TABLE_COLUMN_STAT; + for (String colName : + colNames == null ? table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList()) : colNames) { + if (transactionalListeners != null && !transactionalListeners.isEmpty()) { + MetaStoreListenerNotifier.notifyEvent(transactionalListeners, eventType, + new DeleteTableColumnStatEvent(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, colName, engine, this)); + } + events.add(new DeleteTableColumnStatEvent(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, colName, engine, this)); + } } - if (!listeners.isEmpty()) { - MetaStoreListenerNotifier.notifyEvent(listeners, - EventType.DELETE_PARTITION_COLUMN_STAT, - new DeletePartitionColumnStatEvent(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, + } else { + List<String> partNames = new ArrayList<>(); + if (req.getPart_namesSize() > 0) { + partNames.addAll(req.getPart_names()); + } else { + partNames.addAll(rawStore.listPartitionNames(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, (short) -1)); + } + if (partNames.isEmpty()) { + // no partition found, bail out early + return true; + } + ret = rawStore.deletePartitionColumnStatistics(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, + partNames, colNames, engine); + if (ret) { + eventType = EventType.DELETE_PARTITION_COLUMN_STAT; + for (String colName : colNames == null ? table.getSd().getCols().stream().map(FieldSchema::getName) + .collect(Collectors.toList()) : colNames) { + for (String partName : partNames) { + List<String> partVals = getPartValsFromName(getMS(), parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, partName); + if (transactionalListeners != null && !transactionalListeners.isEmpty()) { + MetaStoreListenerNotifier.notifyEvent(transactionalListeners, eventType, + new DeletePartitionColumnStatEvent(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, + partName, partVals, colName, engine, this)); + } + events.add(new DeletePartitionColumnStatEvent(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, Review Comment: This will be a performance killer while dropping partition stats for 10K+ partitions because you are fetching table object from the metastore for each partition via getPartValsFromName(Rawstore, catalogName, dbName, tableName, partName). Please change this to getPartValsFromName(Table, partName) since you already fetched the table object previously. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org