difin commented on code in PR #5540:
URL: https://github.com/apache/hive/pull/5540#discussion_r1955018526


##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergCompactionService.java:
##########
@@ -53,7 +53,7 @@ public Boolean compact(Table table, CompactionInfo ci) throws 
Exception {
     CompactorUtil.checkInterrupt(CLASS_NAME);
 
     org.apache.iceberg.Table icebergTable = IcebergTableUtil.getTable(conf, 
table);
-    if (!IcebergCompactionEvaluator.isEligibleForCompaction(icebergTable, 
ci.partName, ci.type, conf)) {
+    if (!IcebergCompactionEvaluator.isEligibleForCompaction(icebergTable, ci, 
conf)) {

Review Comment:
   Done



##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergCompactionUtil.java:
##########
@@ -94,4 +105,48 @@ public static List<DeleteFile> getDeleteFiles(Table table, 
String partitionPath)
     return 
Lists.newArrayList(CloseableIterable.transform(filteredDeletesScanTasks,
         t -> ((PositionDeletesScanTask) t).file()));
   }
+
+  /**
+   * Returns target file size as following:
+   * In case of Minor compaction:
+   *  1. When COMPACTION_FILE_SIZE_THRESHOLD is defined, returns it.
+   *  2. Otherwise, calculates the file size threshold as:
+   *       COMPACTION_FILE_SIZE_THRESHOLD * 
TableProperties.HIVE_ICEBERG_COMPACTION_TARGET_FILE_SIZE
+   *     This makes Compaction evaluator consider data files with size less 
than file size threshold as undersized
+   *     segment files eligible for minor compaction (as per Amoro compaction 
evaluator, which is minor compaction
+   *     in Hive).
+   * In case of Major compaction returns -1.
+   * @param ci the compaction info
+   * @param conf Hive configuration
+   */
+  public static long getFileSizeThreshold(CompactionInfo ci, HiveConf conf) {
+    switch (ci.type) {
+      case MINOR:
+        return 
Optional.ofNullable(ci.getProperty(CompactorContext.COMPACTION_FILE_SIZE_THRESHOLD))
+            .map(HiveConf::toSizeBytes).orElse((long) 
(HiveConf.toSizeBytes(HiveConf.getVar(conf,
+                HiveConf.ConfVars.HIVE_ICEBERG_COMPACTION_TARGET_FILE_SIZE)) *
+                
TableProperties.SELF_OPTIMIZING_MIN_TARGET_SIZE_RATIO_DEFAULT));
+      case MAJOR:
+        return -1;
+      default:
+        throw new RuntimeException(String.format("Unsupported compaction type 
%s", ci.type));
+    }
+  }
+
+  /**
+   * Returns target file size as following:
+   *  1. When COMPACTION_FILE_SIZE_THRESHOLD is defined, calculates target 
size as:
+   *       COMPACTION_FILE_SIZE_THRESHOLD / 
TableProperties.SELF_OPTIMIZING_MIN_TARGET_SIZE_RATIO_DEFAULT
+   *     This makes Compaction evaluator consider data files with size less 
than COMPACTION_FILE_SIZE_THRESHOLD as
+   *     fragments eligible for major (as per Amoro definition, minor 
compaction in Hive) compaction.
+   *  2. Otherwise, returns the default 
HIVE_ICEBERG_COMPACTION_TARGET_FILE_SIZE from HiveConf.
+   * @param ci the compaction info
+   * @param conf Hive configuration
+   */
+  public static long getTargetFileSize(CompactionInfo ci, HiveConf conf) {
+    return 
Optional.ofNullable(ci.getProperty(CompactorContext.COMPACTION_FILE_SIZE_THRESHOLD))

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to