FANNG1 commented on code in PR #10189:
URL: https://github.com/apache/gravitino/pull/10189#discussion_r2893611465


##########
api/src/main/java/org/apache/gravitino/policy/IcebergCompactionContent.java:
##########
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.policy;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.regex.Pattern;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.gravitino.MetadataObject;
+
+/** Built-in policy content for Iceberg compaction strategy. */
+public class IcebergCompactionContent implements PolicyContent {
+  /** Property key for strategy type. */
+  public static final String STRATEGY_TYPE_KEY = "strategy.type";
+  /** Strategy type value for compaction. */
+  public static final String STRATEGY_TYPE_VALUE = "compaction";
+  /** Property key for job template name. */
+  public static final String JOB_TEMPLATE_NAME_KEY = "job.template-name";
+  /** Built-in job template name for Iceberg rewrite data files. */
+  public static final String JOB_TEMPLATE_NAME_VALUE = 
"builtin-iceberg-rewrite-data-files";
+  /** Prefix for rewrite options propagated to job options. */
+  public static final String JOB_OPTIONS_PREFIX = "job.options.";
+  /** Rule key for trigger expression. */
+  public static final String TRIGGER_EXPR_KEY = "trigger-expr";
+  /** Rule key for score expression. */
+  public static final String SCORE_EXPR_KEY = "score-expr";
+  /** Rule key for minimum data file MSE threshold. */
+  public static final String MIN_DATA_FILE_MSE_KEY = "minDataFileMse";
+  /** Rule key for minimum delete file count threshold. */
+  public static final String MIN_DELETE_FILE_NUMBER_KEY = 
"minDeleteFileNumber";
+  /** Rule key for data file MSE score weight. */
+  public static final String DATA_FILE_MSE_WEIGHT_KEY = "dataFileMseWeight";
+  /** Rule key for delete file number score weight. */
+  public static final String DELETE_FILE_NUMBER_WEIGHT_KEY = 
"deleteFileNumberWeight";
+  /** Rule key for max partition number selected for compaction. */
+  public static final String MAX_PARTITION_NUM_KEY = "max-partition-num";
+  /** Metric name for data file MSE. */
+  public static final String DATA_FILE_MSE_METRIC = "custom-data-file-mse";

Review Comment:
   Thanks for catching this. After rebasing to commit 
f7b9c3905f84e59291c8cb8635821b609da00139, the built-in stats job now emits 
`custom-data-file-mse` and aggregated `custom-delete-file-number` (in addition 
to position/equality delete metrics), so the policy expressions are aligned 
end-to-end.



##########
common/src/main/java/org/apache/gravitino/dto/policy/PolicyContentDTO.java:
##########
@@ -69,4 +78,163 @@ public Map<String, String> properties() {
       return properties;
     }
   }
+
+  /** Represents a typed iceberg compaction policy content DTO. */
+  @EqualsAndHashCode
+  @ToString
+  @Builder(setterPrefix = "with")
+  @AllArgsConstructor(access = lombok.AccessLevel.PRIVATE)
+  class IcebergCompactionContentDTO implements PolicyContentDTO {
+
+    @JsonProperty("minDataFileMse")
+    @JsonAlias("minDatafileMse")
+    private Long minDataFileMse;
+
+    @JsonProperty("minDeleteFileNumber")
+    private Long minDeleteFileNumber;
+
+    @JsonProperty("dataFileMseWeight")
+    @JsonAlias("datafileMseWeight")
+    private Long dataFileMseWeight;
+
+    @JsonProperty("deleteFileNumberWeight")
+    private Long deleteFileNumberWeight;
+
+    @JsonProperty("maxPartitionNum")
+    private Long maxPartitionNum;
+
+    @JsonProperty("rewriteOptions")
+    private Map<String, String> rewriteOptions;
+
+    private static final Pattern OPTION_KEY_PATTERN = 
Pattern.compile("[A-Za-z0-9._-]+");
+
+    // Default constructor for Jackson deserialization only.
+    private IcebergCompactionContentDTO() {}
+
+    /**
+     * Returns the minimum threshold for custom-data-file-mse metric.
+     *
+     * @return minimum data file MSE threshold
+     */
+    public Long minDataFileMse() {
+      return minDataFileMse == null
+          ? IcebergCompactionContent.DEFAULT_MIN_DATA_FILE_MSE
+          : minDataFileMse;
+    }
+
+    /**
+     * Returns the minimum threshold for custom-delete-file-number metric.
+     *
+     * @return minimum delete file number threshold
+     */
+    public Long minDeleteFileNumber() {
+      return minDeleteFileNumber == null
+          ? IcebergCompactionContent.DEFAULT_MIN_DELETE_FILE_NUMBER
+          : minDeleteFileNumber;
+    }
+
+    /**
+     * Returns the weight for custom-data-file-mse metric in score expression.
+     *
+     * @return data file MSE score weight
+     */
+    public Long dataFileMseWeight() {
+      return dataFileMseWeight == null
+          ? IcebergCompactionContent.DEFAULT_DATA_FILE_MSE_WEIGHT
+          : dataFileMseWeight;
+    }
+
+    /**
+     * Returns the weight for custom-delete-file-number metric in score 
expression.
+     *
+     * @return delete file number score weight
+     */
+    public Long deleteFileNumberWeight() {
+      return deleteFileNumberWeight == null
+          ? IcebergCompactionContent.DEFAULT_DELETE_FILE_NUMBER_WEIGHT
+          : deleteFileNumberWeight;
+    }
+
+    /**
+     * Returns max partition number selected for compaction.
+     *
+     * @return max partition number
+     */
+    public Long maxPartitionNum() {
+      return maxPartitionNum == null
+          ? IcebergCompactionContent.DEFAULT_MAX_PARTITION_NUM
+          : maxPartitionNum;
+    }
+
+    /**
+     * Returns rewrite options expanded to job.options.* during rule 
generation.
+     *
+     * @return rewrite options map
+     */
+    public Map<String, String> rewriteOptions() {
+      return rewriteOptions == null
+          ? IcebergCompactionContent.DEFAULT_REWRITE_OPTIONS
+          : Collections.unmodifiableMap(new LinkedHashMap<>(rewriteOptions));
+    }
+
+    @Override
+    public Set<MetadataObject.Type> supportedObjectTypes() {
+      return ImmutableSet.of(
+          MetadataObject.Type.CATALOG, MetadataObject.Type.SCHEMA, 
MetadataObject.Type.TABLE);
+    }
+
+    @Override
+    public Map<String, String> properties() {

Review Comment:
   Agreed, fixed. I introduced a shared `toDomainContent()` in 
`IcebergCompactionContentDTO` and reuse it in both `properties()` and `rules()` 
(commit 335a16c83).



##########
maintenance/optimizer/src/main/java/org/apache/gravitino/maintenance/optimizer/recommender/strategy/GravitinoStrategy.java:
##########
@@ -43,7 +43,7 @@ public class GravitinoStrategy implements PartitionStrategy {
   /** Rule key for the partition table score aggregation mode. */
   public static final String PARTITION_TABLE_SCORE_MODE = 
"partition_table_score_mode";
   /** Rule key for the maximum number of partitions selected for execution. */
-  public static final String MAX_PARTITION_NUM = "max_partition_num";
+  public static final String MAX_PARTITION_NUM = "max-partition-num";

Review Comment:
   Good point. This change is intentional in this PR: we unified rule key style 
to kebab-case and do not keep backward-compatibility mapping for the old 
`max_partition_num` key. This follows the current design decision for this 
branch (no legacy compatibility). We can add a migration note in docs/release 
notes if needed.



##########
common/src/main/java/org/apache/gravitino/dto/policy/PolicyContentDTO.java:
##########
@@ -69,4 +78,163 @@ public Map<String, String> properties() {
       return properties;
     }
   }
+
+  /** Represents a typed iceberg compaction policy content DTO. */
+  @EqualsAndHashCode
+  @ToString
+  @Builder(setterPrefix = "with")
+  @AllArgsConstructor(access = lombok.AccessLevel.PRIVATE)
+  class IcebergCompactionContentDTO implements PolicyContentDTO {
+
+    @JsonProperty("minDataFileMse")
+    @JsonAlias("minDatafileMse")
+    private Long minDataFileMse;
+
+    @JsonProperty("minDeleteFileNumber")
+    private Long minDeleteFileNumber;
+
+    @JsonProperty("dataFileMseWeight")
+    @JsonAlias("datafileMseWeight")
+    private Long dataFileMseWeight;
+
+    @JsonProperty("deleteFileNumberWeight")
+    private Long deleteFileNumberWeight;
+
+    @JsonProperty("maxPartitionNum")
+    private Long maxPartitionNum;
+
+    @JsonProperty("rewriteOptions")
+    private Map<String, String> rewriteOptions;
+
+    private static final Pattern OPTION_KEY_PATTERN = 
Pattern.compile("[A-Za-z0-9._-]+");

Review Comment:
   Agreed, fixed. DTO validation now delegates to domain validation via 
`toDomainContent().validate()` to avoid duplicated option/range checks (commit 
335a16c83).



##########
server/src/main/java/org/apache/gravitino/server/web/rest/PolicyOperations.java:
##########
@@ -334,16 +333,19 @@ public Response listMetadataObjectsForPolicy(
   }
 
   static PolicyDTO toDTO(PolicyEntity policy, Optional<Boolean> inherited) {
+    String policyType =
+        policy.policyType() == Policy.BuiltInType.CUSTOM

Review Comment:
   Thanks, this is intentional for compatibility: `custom` is kept lowercase to 
avoid a breaking REST change for existing clients, while built-ins use enum 
names. I also added an inline comment in `PolicyOperations.toDTO` to make this 
explicit (commit 335a16c83).



##########
api/src/main/java/org/apache/gravitino/policy/IcebergCompactionContent.java:
##########
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.policy;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.regex.Pattern;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.gravitino.MetadataObject;
+
+/** Built-in policy content for Iceberg compaction strategy. */
+public class IcebergCompactionContent implements PolicyContent {
+  /** Property key for strategy type. */
+  public static final String STRATEGY_TYPE_KEY = "strategy.type";
+  /** Strategy type value for compaction. */
+  public static final String STRATEGY_TYPE_VALUE = "compaction";
+  /** Property key for job template name. */
+  public static final String JOB_TEMPLATE_NAME_KEY = "job.template-name";
+  /** Built-in job template name for Iceberg rewrite data files. */
+  public static final String JOB_TEMPLATE_NAME_VALUE = 
"builtin-iceberg-rewrite-data-files";
+  /** Prefix for rewrite options propagated to job options. */
+  public static final String JOB_OPTIONS_PREFIX = "job.options.";
+  /** Rule key for trigger expression. */
+  public static final String TRIGGER_EXPR_KEY = "trigger-expr";
+  /** Rule key for score expression. */
+  public static final String SCORE_EXPR_KEY = "score-expr";
+  /** Rule key for minimum data file MSE threshold. */
+  public static final String MIN_DATA_FILE_MSE_KEY = "minDataFileMse";
+  /** Rule key for minimum delete file count threshold. */
+  public static final String MIN_DELETE_FILE_NUMBER_KEY = 
"minDeleteFileNumber";
+  /** Rule key for data file MSE score weight. */
+  public static final String DATA_FILE_MSE_WEIGHT_KEY = "dataFileMseWeight";
+  /** Rule key for delete file number score weight. */
+  public static final String DELETE_FILE_NUMBER_WEIGHT_KEY = 
"deleteFileNumberWeight";
+  /** Rule key for max partition number selected for compaction. */
+  public static final String MAX_PARTITION_NUM_KEY = "max-partition-num";
+  /** Metric name for data file MSE. */
+  public static final String DATA_FILE_MSE_METRIC = "custom-data-file-mse";
+  /** Metric name for delete file number. */
+  public static final String DELETE_FILE_NUMBER_METRIC = 
"custom-delete-file-number";
+  /** Default minimum threshold for data file MSE metric. */
+  public static final long DEFAULT_MIN_DATA_FILE_MSE = 405323966463344L;

Review Comment:
   Agreed, fixed. Added inline derivation comment for 
`DEFAULT_MIN_DATA_FILE_MSE` (`(128 MiB * 0.15)^2`) in 
`IcebergCompactionContent` (commit 335a16c83).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to