This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 6f967b4db5 [core] Make metadata.stats-dense-store default value is 
true (#4617)
6f967b4db5 is described below

commit 6f967b4db5ca69d9ca530195bfc56e461b1719c6
Author: Jingsong Lee <[email protected]>
AuthorDate: Mon Dec 2 15:43:53 2024 +0800

    [core] Make metadata.stats-dense-store default value is true (#4617)
---
 docs/content/flink/sql-ddl.md                      |  4 +-
 .../shortcodes/generated/core_configuration.html   |  4 +-
 .../main/java/org/apache/paimon/CoreOptions.java   |  6 +--
 .../org/apache/paimon/stats/StatsTableTest.java    | 52 +++++++++++++++++++++-
 .../paimon/table/AppendOnlyFileStoreTableTest.java |  1 -
 .../paimon/table/PrimaryKeyFileStoreTableTest.java |  2 -
 6 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/docs/content/flink/sql-ddl.md b/docs/content/flink/sql-ddl.md
index 0324e66556..8b8c069229 100644
--- a/docs/content/flink/sql-ddl.md
+++ b/docs/content/flink/sql-ddl.md
@@ -203,8 +203,8 @@ Paimon will automatically collect the statistics of the 
data file for speeding u
 The statistics collector mode can be configured by `'metadata.stats-mode'`, by 
default is `'truncate(16)'`.
 You can configure the field level by setting 
`'fields.{field_name}.stats-mode'`.
 
-For the stats mode of `none`, we suggest that you configure 
`metadata.stats-dense-store` = `true`, which will
-significantly reduce the storage size of the manifest.
+For the stats mode of `none`, by default `metadata.stats-dense-store` is 
`true`, which will significantly reduce the
+storage size of the manifest. But the Paimon sdk in reading engine requires at 
least version 0.9.1 or 1.0.0 or higher.
 
 ### Field Default Value
 
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html 
b/docs/layouts/shortcodes/generated/core_configuration.html
index a38f07784f..fad1f4907e 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -485,9 +485,9 @@ Mainly to resolve data skew on primary keys. We recommend 
starting with 64 mb wh
         </tr>
         <tr>
             <td><h5>metadata.stats-dense-store</h5></td>
-            <td style="word-wrap: break-word;">false</td>
+            <td style="word-wrap: break-word;">true</td>
             <td>Boolean</td>
-            <td>Whether to store statistic densely in metadata (manifest 
files), which will significantly reduce the storage size of metadata when the 
none statistic mode is set.<br />Note, when this mode is enabled, the Paimon 
sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.</td>
+            <td>Whether to store statistic densely in metadata (manifest 
files), which will significantly reduce the storage size of metadata when the 
none statistic mode is set.<br />Note, when this mode is enabled with 
'metadata.stats-mode:none', the Paimon sdk in reading engine requires at least 
version 0.9.1 or 1.0.0 or higher.</td>
         </tr>
         <tr>
             <td><h5>metadata.stats-mode</h5></td>
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java 
b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
index fce09357f0..b9b5675f1d 100644
--- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -1101,7 +1101,7 @@ public class CoreOptions implements Serializable {
     public static final ConfigOption<Boolean> METADATA_STATS_DENSE_STORE =
             key("metadata.stats-dense-store")
                     .booleanType()
-                    .defaultValue(false)
+                    .defaultValue(true)
                     .withDescription(
                             Description.builder()
                                     .text(
@@ -1110,8 +1110,8 @@ public class CoreOptions implements Serializable {
                                                     + " none statistic mode is 
set.")
                                     .linebreak()
                                     .text(
-                                            "Note, when this mode is enabled, 
the Paimon sdk in reading engine requires"
-                                                    + " at least version 0.9.1 
or 1.0.0 or higher.")
+                                            "Note, when this mode is enabled 
with 'metadata.stats-mode:none', the Paimon sdk in"
+                                                    + " reading engine 
requires at least version 0.9.1 or 1.0.0 or higher.")
                                     .build());
 
     public static final ConfigOption<String> COMMIT_CALLBACKS =
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java 
b/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
index 494b2e28e4..25282d898a 100644
--- a/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
@@ -35,6 +35,7 @@ import org.apache.paimon.types.DataTypes;
 
 import org.junit.jupiter.api.Test;
 
+import static org.apache.paimon.CoreOptions.METADATA_STATS_DENSE_STORE;
 import static org.apache.paimon.CoreOptions.METADATA_STATS_MODE;
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -42,10 +43,11 @@ import static org.assertj.core.api.Assertions.assertThat;
 public class StatsTableTest extends TableTestBase {
 
     @Test
-    public void testPartitionStats() throws Exception {
+    public void testPartitionStatsNotDense() throws Exception {
         Identifier identifier = identifier("T");
         Options options = new Options();
         options.set(METADATA_STATS_MODE, "NONE");
+        options.set(METADATA_STATS_DENSE_STORE, false);
         options.set(CoreOptions.BUCKET, 1);
         Schema schema =
                 Schema.newBuilder()
@@ -90,4 +92,52 @@ public class StatsTableTest extends TableTestBase {
         assertThat(recordStats.maxValues().isNullAt(1)).isTrue();
         assertThat(recordStats.maxValues().isNullAt(2)).isTrue();
     }
+
+    @Test
+    public void testPartitionStatsDenseMode() throws Exception {
+        Identifier identifier = identifier("T");
+        Options options = new Options();
+        options.set(METADATA_STATS_MODE, "NONE");
+        options.set(CoreOptions.BUCKET, 1);
+        Schema schema =
+                Schema.newBuilder()
+                        .column("pt", DataTypes.INT())
+                        .column("pk", DataTypes.INT())
+                        .column("col1", DataTypes.INT())
+                        .partitionKeys("pt")
+                        .primaryKey("pk", "pt")
+                        .options(options.toMap())
+                        .build();
+        catalog.createTable(identifier, schema, true);
+        Table table = catalog.getTable(identifier);
+
+        write(
+                table,
+                GenericRow.of(1, 1, 1),
+                GenericRow.of(1, 2, 1),
+                GenericRow.of(1, 3, 1),
+                GenericRow.of(2, 1, 1));
+
+        FileStoreTable storeTable = (FileStoreTable) table;
+        FileStore<?> store = storeTable.store();
+        String manifestListFile = 
storeTable.snapshotManager().latestSnapshot().deltaManifestList();
+
+        ManifestList manifestList = store.manifestListFactory().create();
+        ManifestFileMeta manifest = manifestList.read(manifestListFile).get(0);
+
+        // should have partition stats
+        SimpleStats partitionStats = manifest.partitionStats();
+        assertThat(partitionStats.minValues().getInt(0)).isEqualTo(1);
+        assertThat(partitionStats.maxValues().getInt(0)).isEqualTo(2);
+
+        // should not have record stats because of NONE mode
+        ManifestFile manifestFile = store.manifestFileFactory().create();
+        DataFileMeta file =
+                manifestFile.read(manifest.fileName(), 
manifest.fileSize()).get(0).file();
+        SimpleStats recordStats = file.valueStats();
+        assertThat(file.valueStatsCols()).isEmpty();
+        assertThat(recordStats.minValues().getFieldCount()).isEqualTo(0);
+        assertThat(recordStats.maxValues().getFieldCount()).isEqualTo(0);
+        assertThat(recordStats.nullCounts().size()).isEqualTo(0);
+    }
 }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java
index 0328cc6bad..922221bb8d 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java
@@ -230,7 +230,6 @@ public class AppendOnlyFileStoreTableTest extends 
FileStoreTableTestBase {
     public void testBatchFilter(boolean statsDenseStore) throws Exception {
         Consumer<Options> optionsSetter =
                 options -> {
-                    options.set(CoreOptions.METADATA_STATS_DENSE_STORE, 
statsDenseStore);
                     if (statsDenseStore) {
                         options.set(CoreOptions.METADATA_STATS_MODE, "none");
                         options.set("fields.b.stats-mode", "full");
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
index 51c8b328df..46b85223bc 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
@@ -348,7 +348,6 @@ public class PrimaryKeyFileStoreTableTest extends 
FileStoreTableTestBase {
     public void testBatchFilter(boolean statsDenseStore) throws Exception {
         Consumer<Options> optionsSetter =
                 options -> {
-                    options.set(CoreOptions.METADATA_STATS_DENSE_STORE, 
statsDenseStore);
                     if (statsDenseStore) {
                         // pk table doesn't need value stats
                         options.set(CoreOptions.METADATA_STATS_MODE, "none");
@@ -1664,7 +1663,6 @@ public class PrimaryKeyFileStoreTableTest extends 
FileStoreTableTestBase {
                     options.set(TARGET_FILE_SIZE, new MemorySize(1));
                     options.set(DELETION_VECTORS_ENABLED, true);
 
-                    options.set(CoreOptions.METADATA_STATS_DENSE_STORE, 
statsDenseStore);
                     if (statsDenseStore) {
                         options.set(CoreOptions.METADATA_STATS_MODE, "none");
                         options.set("fields.b.stats-mode", "full");

Reply via email to