difin commented on code in PR #5540: URL: https://github.com/apache/hive/pull/5540#discussion_r1955018526
########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergCompactionService.java: ########## @@ -53,7 +53,7 @@ public Boolean compact(Table table, CompactionInfo ci) throws Exception { CompactorUtil.checkInterrupt(CLASS_NAME); org.apache.iceberg.Table icebergTable = IcebergTableUtil.getTable(conf, table); - if (!IcebergCompactionEvaluator.isEligibleForCompaction(icebergTable, ci.partName, ci.type, conf)) { + if (!IcebergCompactionEvaluator.isEligibleForCompaction(icebergTable, ci, conf)) { Review Comment: Done ########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergCompactionUtil.java: ########## @@ -94,4 +105,48 @@ public static List<DeleteFile> getDeleteFiles(Table table, String partitionPath) return Lists.newArrayList(CloseableIterable.transform(filteredDeletesScanTasks, t -> ((PositionDeletesScanTask) t).file())); } + + /** + * Returns target file size as following: + * In case of Minor compaction: + * 1. When COMPACTION_FILE_SIZE_THRESHOLD is defined, returns it. + * 2. Otherwise, calculates the file size threshold as: + * COMPACTION_FILE_SIZE_THRESHOLD * TableProperties.HIVE_ICEBERG_COMPACTION_TARGET_FILE_SIZE + * This makes Compaction evaluator consider data files with size less than file size threshold as undersized + * segment files eligible for minor compaction (as per Amoro compaction evaluator, which is minor compaction + * in Hive). + * In case of Major compaction returns -1. + * @param ci the compaction info + * @param conf Hive configuration + */ + public static long getFileSizeThreshold(CompactionInfo ci, HiveConf conf) { + switch (ci.type) { + case MINOR: + return Optional.ofNullable(ci.getProperty(CompactorContext.COMPACTION_FILE_SIZE_THRESHOLD)) + .map(HiveConf::toSizeBytes).orElse((long) (HiveConf.toSizeBytes(HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_ICEBERG_COMPACTION_TARGET_FILE_SIZE)) * + TableProperties.SELF_OPTIMIZING_MIN_TARGET_SIZE_RATIO_DEFAULT)); + case MAJOR: + return -1; + default: + throw new RuntimeException(String.format("Unsupported compaction type %s", ci.type)); + } + } + + /** + * Returns target file size as following: + * 1. When COMPACTION_FILE_SIZE_THRESHOLD is defined, calculates target size as: + * COMPACTION_FILE_SIZE_THRESHOLD / TableProperties.SELF_OPTIMIZING_MIN_TARGET_SIZE_RATIO_DEFAULT + * This makes Compaction evaluator consider data files with size less than COMPACTION_FILE_SIZE_THRESHOLD as + * fragments eligible for major (as per Amoro definition, minor compaction in Hive) compaction. + * 2. Otherwise, returns the default HIVE_ICEBERG_COMPACTION_TARGET_FILE_SIZE from HiveConf. + * @param ci the compaction info + * @param conf Hive configuration + */ + public static long getTargetFileSize(CompactionInfo ci, HiveConf conf) { + return Optional.ofNullable(ci.getProperty(CompactorContext.COMPACTION_FILE_SIZE_THRESHOLD)) Review Comment: Done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org