This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 6f967b4db5 [core] Make metadata.stats-dense-store default value is
true (#4617)
6f967b4db5 is described below
commit 6f967b4db5ca69d9ca530195bfc56e461b1719c6
Author: Jingsong Lee <[email protected]>
AuthorDate: Mon Dec 2 15:43:53 2024 +0800
[core] Make metadata.stats-dense-store default value is true (#4617)
---
docs/content/flink/sql-ddl.md | 4 +-
.../shortcodes/generated/core_configuration.html | 4 +-
.../main/java/org/apache/paimon/CoreOptions.java | 6 +--
.../org/apache/paimon/stats/StatsTableTest.java | 52 +++++++++++++++++++++-
.../paimon/table/AppendOnlyFileStoreTableTest.java | 1 -
.../paimon/table/PrimaryKeyFileStoreTableTest.java | 2 -
6 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/docs/content/flink/sql-ddl.md b/docs/content/flink/sql-ddl.md
index 0324e66556..8b8c069229 100644
--- a/docs/content/flink/sql-ddl.md
+++ b/docs/content/flink/sql-ddl.md
@@ -203,8 +203,8 @@ Paimon will automatically collect the statistics of the
data file for speeding u
The statistics collector mode can be configured by `'metadata.stats-mode'`, by
default is `'truncate(16)'`.
You can configure the field level by setting
`'fields.{field_name}.stats-mode'`.
-For the stats mode of `none`, we suggest that you configure
`metadata.stats-dense-store` = `true`, which will
-significantly reduce the storage size of the manifest.
+For the stats mode of `none`, by default `metadata.stats-dense-store` is
`true`, which will significantly reduce the
+storage size of the manifest. But the Paimon sdk in reading engine requires at
least version 0.9.1 or 1.0.0 or higher.
### Field Default Value
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html
b/docs/layouts/shortcodes/generated/core_configuration.html
index a38f07784f..fad1f4907e 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -485,9 +485,9 @@ Mainly to resolve data skew on primary keys. We recommend
starting with 64 mb wh
</tr>
<tr>
<td><h5>metadata.stats-dense-store</h5></td>
- <td style="word-wrap: break-word;">false</td>
+ <td style="word-wrap: break-word;">true</td>
<td>Boolean</td>
- <td>Whether to store statistic densely in metadata (manifest
files), which will significantly reduce the storage size of metadata when the
none statistic mode is set.<br />Note, when this mode is enabled, the Paimon
sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.</td>
+ <td>Whether to store statistic densely in metadata (manifest
files), which will significantly reduce the storage size of metadata when the
none statistic mode is set.<br />Note, when this mode is enabled with
'metadata.stats-mode:none', the Paimon sdk in reading engine requires at least
version 0.9.1 or 1.0.0 or higher.</td>
</tr>
<tr>
<td><h5>metadata.stats-mode</h5></td>
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
index fce09357f0..b9b5675f1d 100644
--- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -1101,7 +1101,7 @@ public class CoreOptions implements Serializable {
public static final ConfigOption<Boolean> METADATA_STATS_DENSE_STORE =
key("metadata.stats-dense-store")
.booleanType()
- .defaultValue(false)
+ .defaultValue(true)
.withDescription(
Description.builder()
.text(
@@ -1110,8 +1110,8 @@ public class CoreOptions implements Serializable {
+ " none statistic mode is
set.")
.linebreak()
.text(
- "Note, when this mode is enabled,
the Paimon sdk in reading engine requires"
- + " at least version 0.9.1
or 1.0.0 or higher.")
+ "Note, when this mode is enabled
with 'metadata.stats-mode:none', the Paimon sdk in"
+ + " reading engine
requires at least version 0.9.1 or 1.0.0 or higher.")
.build());
public static final ConfigOption<String> COMMIT_CALLBACKS =
diff --git
a/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
index 494b2e28e4..25282d898a 100644
--- a/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
@@ -35,6 +35,7 @@ import org.apache.paimon.types.DataTypes;
import org.junit.jupiter.api.Test;
+import static org.apache.paimon.CoreOptions.METADATA_STATS_DENSE_STORE;
import static org.apache.paimon.CoreOptions.METADATA_STATS_MODE;
import static org.assertj.core.api.Assertions.assertThat;
@@ -42,10 +43,11 @@ import static org.assertj.core.api.Assertions.assertThat;
public class StatsTableTest extends TableTestBase {
@Test
- public void testPartitionStats() throws Exception {
+ public void testPartitionStatsNotDense() throws Exception {
Identifier identifier = identifier("T");
Options options = new Options();
options.set(METADATA_STATS_MODE, "NONE");
+ options.set(METADATA_STATS_DENSE_STORE, false);
options.set(CoreOptions.BUCKET, 1);
Schema schema =
Schema.newBuilder()
@@ -90,4 +92,52 @@ public class StatsTableTest extends TableTestBase {
assertThat(recordStats.maxValues().isNullAt(1)).isTrue();
assertThat(recordStats.maxValues().isNullAt(2)).isTrue();
}
+
+ @Test
+ public void testPartitionStatsDenseMode() throws Exception {
+ Identifier identifier = identifier("T");
+ Options options = new Options();
+ options.set(METADATA_STATS_MODE, "NONE");
+ options.set(CoreOptions.BUCKET, 1);
+ Schema schema =
+ Schema.newBuilder()
+ .column("pt", DataTypes.INT())
+ .column("pk", DataTypes.INT())
+ .column("col1", DataTypes.INT())
+ .partitionKeys("pt")
+ .primaryKey("pk", "pt")
+ .options(options.toMap())
+ .build();
+ catalog.createTable(identifier, schema, true);
+ Table table = catalog.getTable(identifier);
+
+ write(
+ table,
+ GenericRow.of(1, 1, 1),
+ GenericRow.of(1, 2, 1),
+ GenericRow.of(1, 3, 1),
+ GenericRow.of(2, 1, 1));
+
+ FileStoreTable storeTable = (FileStoreTable) table;
+ FileStore<?> store = storeTable.store();
+ String manifestListFile =
storeTable.snapshotManager().latestSnapshot().deltaManifestList();
+
+ ManifestList manifestList = store.manifestListFactory().create();
+ ManifestFileMeta manifest = manifestList.read(manifestListFile).get(0);
+
+ // should have partition stats
+ SimpleStats partitionStats = manifest.partitionStats();
+ assertThat(partitionStats.minValues().getInt(0)).isEqualTo(1);
+ assertThat(partitionStats.maxValues().getInt(0)).isEqualTo(2);
+
+ // should not have record stats because of NONE mode
+ ManifestFile manifestFile = store.manifestFileFactory().create();
+ DataFileMeta file =
+ manifestFile.read(manifest.fileName(),
manifest.fileSize()).get(0).file();
+ SimpleStats recordStats = file.valueStats();
+ assertThat(file.valueStatsCols()).isEmpty();
+ assertThat(recordStats.minValues().getFieldCount()).isEqualTo(0);
+ assertThat(recordStats.maxValues().getFieldCount()).isEqualTo(0);
+ assertThat(recordStats.nullCounts().size()).isEqualTo(0);
+ }
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java
index 0328cc6bad..922221bb8d 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java
@@ -230,7 +230,6 @@ public class AppendOnlyFileStoreTableTest extends
FileStoreTableTestBase {
public void testBatchFilter(boolean statsDenseStore) throws Exception {
Consumer<Options> optionsSetter =
options -> {
- options.set(CoreOptions.METADATA_STATS_DENSE_STORE,
statsDenseStore);
if (statsDenseStore) {
options.set(CoreOptions.METADATA_STATS_MODE, "none");
options.set("fields.b.stats-mode", "full");
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
index 51c8b328df..46b85223bc 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
@@ -348,7 +348,6 @@ public class PrimaryKeyFileStoreTableTest extends
FileStoreTableTestBase {
public void testBatchFilter(boolean statsDenseStore) throws Exception {
Consumer<Options> optionsSetter =
options -> {
- options.set(CoreOptions.METADATA_STATS_DENSE_STORE,
statsDenseStore);
if (statsDenseStore) {
// pk table doesn't need value stats
options.set(CoreOptions.METADATA_STATS_MODE, "none");
@@ -1664,7 +1663,6 @@ public class PrimaryKeyFileStoreTableTest extends
FileStoreTableTestBase {
options.set(TARGET_FILE_SIZE, new MemorySize(1));
options.set(DELETION_VECTORS_ENABLED, true);
- options.set(CoreOptions.METADATA_STATS_DENSE_STORE,
statsDenseStore);
if (statsDenseStore) {
options.set(CoreOptions.METADATA_STATS_MODE, "none");
options.set("fields.b.stats-mode", "full");