This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 975f644c7 [core] Adjust compaction.max.file-num of bucketed append
table to 5 (#3903)
975f644c7 is described below
commit 975f644c7121bb30f79c54dccd6ba4e1d13663c1
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Aug 7 18:05:59 2024 +0800
[core] Adjust compaction.max.file-num of bucketed append table to 5 (#3903)
---
docs/content/append-table/streaming.md | 2 +-
.../shortcodes/generated/core_configuration.html | 4 ++--
.../src/main/java/org/apache/paimon/CoreOptions.java | 18 ++++++++++++------
.../append/AppendOnlyTableCompactionCoordinator.java | 3 ++-
.../paimon/operation/AppendOnlyFileStoreWrite.java | 2 +-
.../paimon/flink/sink/StoreMultiCommitterTest.java | 2 ++
.../apache/paimon/spark/sql/DeletionVectorTest.scala | 12 +++++++-----
7 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/docs/content/append-table/streaming.md
b/docs/content/append-table/streaming.md
index c3d64a650..3758c7f56 100644
--- a/docs/content/append-table/streaming.md
+++ b/docs/content/append-table/streaming.md
@@ -115,7 +115,7 @@ control the strategy of compaction:
</tr>
<tr>
<td><h5>compaction.max.file-num</h5></td>
- <td style="word-wrap: break-word;">50</td>
+ <td style="word-wrap: break-word;">5</td>
<td>Integer</td>
<td>For file set [f_0,...,f_N], the maximum file number to trigger
a compaction for append table, even if sum(size(f_i)) < targetFileSize. This
value avoids pending too much small files, which slows down the
performance.</td>
</tr>
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html
b/docs/layouts/shortcodes/generated/core_configuration.html
index 1f5fb0dc1..784719d76 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -130,9 +130,9 @@ under the License.
</tr>
<tr>
<td><h5>compaction.max.file-num</h5></td>
- <td style="word-wrap: break-word;">50</td>
+ <td style="word-wrap: break-word;">(none)</td>
<td>Integer</td>
- <td>For file set [f_0,...,f_N], the maximum file number to trigger
a compaction for append-only table, even if sum(size(f_i)) < targetFileSize.
This value avoids pending too much small files, which slows down the
performance.</td>
+ <td>For file set [f_0,...,f_N], the maximum file number to trigger
a compaction for append-only table, even if sum(size(f_i)) < targetFileSize.
This value avoids pending too much small files.<ul><li>Default value of Append
Table is '50'.</li><li>Default value of Bucketed Append Table is
'5'.</li></ul></td>
</tr>
<tr>
<td><h5>compaction.min.file-num</h5></td>
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
index 660580284..bd10ff000 100644
--- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -522,12 +522,18 @@ public class CoreOptions implements Serializable {
public static final ConfigOption<Integer> COMPACTION_MAX_FILE_NUM =
key("compaction.max.file-num")
.intType()
- .defaultValue(50)
+ .noDefaultValue()
.withFallbackKeys("compaction.early-max.file-num")
.withDescription(
- "For file set [f_0,...,f_N], the maximum file
number to trigger a compaction "
- + "for append-only table, even if
sum(size(f_i)) < targetFileSize. This value "
- + "avoids pending too much small files,
which slows down the performance.");
+ Description.builder()
+ .text(
+ "For file set [f_0,...,f_N], the
maximum file number to trigger a compaction "
+ + "for append-only table,
even if sum(size(f_i)) < targetFileSize. This value "
+ + "avoids pending too much
small files.")
+ .list(
+ text("Default value of Append
Table is '50'."),
+ text("Default value of Bucketed
Append Table is '5'."))
+ .build());
public static final ConfigOption<ChangelogProducer> CHANGELOG_PRODUCER =
key("changelog-producer")
@@ -1679,8 +1685,8 @@ public class CoreOptions implements Serializable {
return options.get(COMPACTION_MIN_FILE_NUM);
}
- public int compactionMaxFileNum() {
- return options.get(COMPACTION_MAX_FILE_NUM);
+ public Optional<Integer> compactionMaxFileNum() {
+ return options.getOptional(COMPACTION_MAX_FILE_NUM);
}
public long dynamicBucketTargetRowNum() {
diff --git
a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
index 07ed3c9dd..d54221403 100644
---
a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
+++
b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
@@ -95,7 +95,8 @@ public class AppendOnlyTableCompactionCoordinator {
this.targetFileSize = coreOptions.targetFileSize(false);
this.compactionFileSize = coreOptions.compactionFileSize(false);
this.minFileNum = coreOptions.compactionMinFileNum();
- this.maxFileNum = coreOptions.compactionMaxFileNum();
+ // this is global compaction, avoid too many compaction tasks
+ this.maxFileNum = coreOptions.compactionMaxFileNum().orElse(50);
}
public List<AppendOnlyCompactionTask> run() {
diff --git
a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java
b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java
index e914f6950..744a130ca 100644
---
a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java
+++
b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java
@@ -105,7 +105,7 @@ public class AppendOnlyFileStoreWrite extends
MemoryFileStoreWrite<InternalRow>
this.pathFactory = pathFactory;
this.targetFileSize = options.targetFileSize(false);
this.compactionMinFileNum = options.compactionMinFileNum();
- this.compactionMaxFileNum = options.compactionMaxFileNum();
+ this.compactionMaxFileNum = options.compactionMaxFileNum().orElse(5);
this.commitForceCompact = options.commitForceCompact();
this.skipCompaction = options.writeOnly();
this.fileCompression = options.fileCompression();
diff --git
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java
index 832da65f7..10e432f3c 100644
---
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java
+++
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java
@@ -70,6 +70,7 @@ import java.util.List;
import java.util.Objects;
import java.util.UUID;
+import static org.apache.paimon.CoreOptions.COMPACTION_MAX_FILE_NUM;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.fail;
@@ -131,6 +132,7 @@ class StoreMultiCommitterTest {
Options secondOptions = new Options();
secondOptions.setString("bucket", "1");
secondOptions.setString("bucket-key", "a");
+ secondOptions.set(COMPACTION_MAX_FILE_NUM, 50);
Schema secondTableSchema =
new Schema(
rowType2.getFields(),
diff --git
a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala
b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala
index 68aeffe55..26d07ce06 100644
---
a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala
+++
b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala
@@ -45,11 +45,13 @@ class DeletionVectorTest extends PaimonSparkTestBase {
} else {
""
}
- spark.sql(
- s"""
- |CREATE TABLE T (id INT, name STRING)
- |TBLPROPERTIES ('deletion-vectors.enabled' = 'true', 'bucket' =
'$bucket' $bucketKey)
- |""".stripMargin)
+ spark.sql(s"""
+ |CREATE TABLE T (id INT, name STRING)
+ |TBLPROPERTIES (
+ | 'deletion-vectors.enabled' = 'true',
+ | 'compaction.max.file-num' = '50',
+ | 'bucket' = '$bucket' $bucketKey)
+ |""".stripMargin)
val table = loadTable("T")
val dvMaintainerFactory =