This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 975f644c7 [core] Adjust compaction.max.file-num of bucketed append 
table to 5 (#3903)
975f644c7 is described below

commit 975f644c7121bb30f79c54dccd6ba4e1d13663c1
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Aug 7 18:05:59 2024 +0800

    [core] Adjust compaction.max.file-num of bucketed append table to 5 (#3903)
---
 docs/content/append-table/streaming.md                 |  2 +-
 .../shortcodes/generated/core_configuration.html       |  4 ++--
 .../src/main/java/org/apache/paimon/CoreOptions.java   | 18 ++++++++++++------
 .../append/AppendOnlyTableCompactionCoordinator.java   |  3 ++-
 .../paimon/operation/AppendOnlyFileStoreWrite.java     |  2 +-
 .../paimon/flink/sink/StoreMultiCommitterTest.java     |  2 ++
 .../apache/paimon/spark/sql/DeletionVectorTest.scala   | 12 +++++++-----
 7 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/docs/content/append-table/streaming.md 
b/docs/content/append-table/streaming.md
index c3d64a650..3758c7f56 100644
--- a/docs/content/append-table/streaming.md
+++ b/docs/content/append-table/streaming.md
@@ -115,7 +115,7 @@ control the strategy of compaction:
         </tr>
         <tr>
             <td><h5>compaction.max.file-num</h5></td>
-            <td style="word-wrap: break-word;">50</td>
+            <td style="word-wrap: break-word;">5</td>
             <td>Integer</td>
             <td>For file set [f_0,...,f_N], the maximum file number to trigger 
a compaction for append table, even if sum(size(f_i)) &lt; targetFileSize. This 
value avoids pending too much small files, which slows down the 
performance.</td>
         </tr>
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html 
b/docs/layouts/shortcodes/generated/core_configuration.html
index 1f5fb0dc1..784719d76 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -130,9 +130,9 @@ under the License.
         </tr>
         <tr>
             <td><h5>compaction.max.file-num</h5></td>
-            <td style="word-wrap: break-word;">50</td>
+            <td style="word-wrap: break-word;">(none)</td>
             <td>Integer</td>
-            <td>For file set [f_0,...,f_N], the maximum file number to trigger 
a compaction for append-only table, even if sum(size(f_i)) &lt; targetFileSize. 
This value avoids pending too much small files, which slows down the 
performance.</td>
+            <td>For file set [f_0,...,f_N], the maximum file number to trigger 
a compaction for append-only table, even if sum(size(f_i)) &lt; targetFileSize. 
This value avoids pending too much small files.<ul><li>Default value of Append 
Table is '50'.</li><li>Default value of Bucketed Append Table is 
'5'.</li></ul></td>
         </tr>
         <tr>
             <td><h5>compaction.min.file-num</h5></td>
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java 
b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
index 660580284..bd10ff000 100644
--- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -522,12 +522,18 @@ public class CoreOptions implements Serializable {
     public static final ConfigOption<Integer> COMPACTION_MAX_FILE_NUM =
             key("compaction.max.file-num")
                     .intType()
-                    .defaultValue(50)
+                    .noDefaultValue()
                     .withFallbackKeys("compaction.early-max.file-num")
                     .withDescription(
-                            "For file set [f_0,...,f_N], the maximum file 
number to trigger a compaction "
-                                    + "for append-only table, even if 
sum(size(f_i)) < targetFileSize. This value "
-                                    + "avoids pending too much small files, 
which slows down the performance.");
+                            Description.builder()
+                                    .text(
+                                            "For file set [f_0,...,f_N], the 
maximum file number to trigger a compaction "
+                                                    + "for append-only table, 
even if sum(size(f_i)) < targetFileSize. This value "
+                                                    + "avoids pending too much 
small files.")
+                                    .list(
+                                            text("Default value of Append 
Table is '50'."),
+                                            text("Default value of Bucketed 
Append Table is '5'."))
+                                    .build());
 
     public static final ConfigOption<ChangelogProducer> CHANGELOG_PRODUCER =
             key("changelog-producer")
@@ -1679,8 +1685,8 @@ public class CoreOptions implements Serializable {
         return options.get(COMPACTION_MIN_FILE_NUM);
     }
 
-    public int compactionMaxFileNum() {
-        return options.get(COMPACTION_MAX_FILE_NUM);
+    public Optional<Integer> compactionMaxFileNum() {
+        return options.getOptional(COMPACTION_MAX_FILE_NUM);
     }
 
     public long dynamicBucketTargetRowNum() {
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
 
b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
index 07ed3c9dd..d54221403 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
@@ -95,7 +95,8 @@ public class AppendOnlyTableCompactionCoordinator {
         this.targetFileSize = coreOptions.targetFileSize(false);
         this.compactionFileSize = coreOptions.compactionFileSize(false);
         this.minFileNum = coreOptions.compactionMinFileNum();
-        this.maxFileNum = coreOptions.compactionMaxFileNum();
+        // this is global compaction, avoid too many compaction tasks
+        this.maxFileNum = coreOptions.compactionMaxFileNum().orElse(50);
     }
 
     public List<AppendOnlyCompactionTask> run() {
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java
 
b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java
index e914f6950..744a130ca 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java
@@ -105,7 +105,7 @@ public class AppendOnlyFileStoreWrite extends 
MemoryFileStoreWrite<InternalRow>
         this.pathFactory = pathFactory;
         this.targetFileSize = options.targetFileSize(false);
         this.compactionMinFileNum = options.compactionMinFileNum();
-        this.compactionMaxFileNum = options.compactionMaxFileNum();
+        this.compactionMaxFileNum = options.compactionMaxFileNum().orElse(5);
         this.commitForceCompact = options.commitForceCompact();
         this.skipCompaction = options.writeOnly();
         this.fileCompression = options.fileCompression();
diff --git 
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java
 
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java
index 832da65f7..10e432f3c 100644
--- 
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java
+++ 
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java
@@ -70,6 +70,7 @@ import java.util.List;
 import java.util.Objects;
 import java.util.UUID;
 
+import static org.apache.paimon.CoreOptions.COMPACTION_MAX_FILE_NUM;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.fail;
 
@@ -131,6 +132,7 @@ class StoreMultiCommitterTest {
         Options secondOptions = new Options();
         secondOptions.setString("bucket", "1");
         secondOptions.setString("bucket-key", "a");
+        secondOptions.set(COMPACTION_MAX_FILE_NUM, 50);
         Schema secondTableSchema =
                 new Schema(
                         rowType2.getFields(),
diff --git 
a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala
 
b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala
index 68aeffe55..26d07ce06 100644
--- 
a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala
+++ 
b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala
@@ -45,11 +45,13 @@ class DeletionVectorTest extends PaimonSparkTestBase {
           } else {
             ""
           }
-          spark.sql(
-            s"""
-               |CREATE TABLE T (id INT, name STRING)
-               |TBLPROPERTIES ('deletion-vectors.enabled' = 'true', 'bucket' = 
'$bucket' $bucketKey)
-               |""".stripMargin)
+          spark.sql(s"""
+                       |CREATE TABLE T (id INT, name STRING)
+                       |TBLPROPERTIES (
+                       |  'deletion-vectors.enabled' = 'true',
+                       |  'compaction.max.file-num' = '50',
+                       |  'bucket' = '$bucket' $bucketKey)
+                       |""".stripMargin)
 
           val table = loadTable("T")
           val dvMaintainerFactory =

Reply via email to