This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 66ca6984ba [core] Introduce manifest.delete-file-drop-stats (#4640)
66ca6984ba is described below

commit 66ca6984bac38a835bc42175ef8e639841b15476
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Dec 4 21:14:18 2024 +0800

    [core] Introduce manifest.delete-file-drop-stats (#4640)
---
 docs/layouts/shortcodes/generated/core_configuration.html    |  6 ++++++
 .../src/main/java/org/apache/paimon/CoreOptions.java         | 12 ++++++++++++
 .../src/main/java/org/apache/paimon/AbstractFileStore.java   |  1 +
 .../append/UnawareAppendTableCompactionCoordinator.java      |  4 +++-
 .../org/apache/paimon/operation/AbstractFileStoreWrite.java  |  9 ++++++++-
 .../org/apache/paimon/operation/FileStoreCommitImpl.java     |  7 ++++++-
 .../org/apache/paimon/operation/MemoryFileStoreWrite.java    |  1 +
 .../org/apache/paimon/operation/FileStoreCommitTest.java     |  1 +
 .../org/apache/paimon/spark/commands/PaimonCommand.scala     |  5 ++++-
 9 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/docs/layouts/shortcodes/generated/core_configuration.html 
b/docs/layouts/shortcodes/generated/core_configuration.html
index 2ad5db28b9..6fb2c72650 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -453,6 +453,12 @@ Mainly to resolve data skew on primary keys. We recommend 
starting with 64 mb wh
             <td>String</td>
             <td>Default file compression for manifest.</td>
         </tr>
+        <tr>
+            <td><h5>manifest.delete-file-drop-stats</h5></td>
+            <td style="word-wrap: break-word;">false</td>
+            <td>Boolean</td>
+            <td>For DELETE manifest entry in manifest file, drop stats to 
reduce memory and storage. Default value is false only for compatibility of old 
reader.</td>
+        </tr>
         <tr>
             <td><h5>manifest.format</h5></td>
             <td style="word-wrap: break-word;">"avro"</td>
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java 
b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
index cddef33c27..765d5a1e32 100644
--- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -1426,6 +1426,14 @@ public class CoreOptions implements Serializable {
                     .noDefaultValue()
                     .withDescription("The object location for object table.");
 
+    public static final ConfigOption<Boolean> MANIFEST_DELETE_FILE_DROP_STATS =
+            key("manifest.delete-file-drop-stats")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "For DELETE manifest entry in manifest file, drop 
stats to reduce memory and storage."
+                                    + " Default value is false only for 
compatibility of old reader.");
+
     @ExcludeFromDocumentation("Only used internally to support materialized 
table")
     public static final ConfigOption<String> 
MATERIALIZED_TABLE_DEFINITION_QUERY =
             key("materialized-table.definition-query")
@@ -1947,6 +1955,10 @@ public class CoreOptions implements Serializable {
         return lookupStrategy().needLookup;
     }
 
+    public boolean manifestDeleteFileDropStats() {
+        return options.get(MANIFEST_DELETE_FILE_DROP_STATS);
+    }
+
     public LookupStrategy lookupStrategy() {
         return LookupStrategy.from(
                 mergeEngine().equals(MergeEngine.FIRST_ROW),
diff --git a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java 
b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
index ae4552aa71..1a538ad89e 100644
--- a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
+++ b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
@@ -218,6 +218,7 @@ abstract class AbstractFileStore<T> implements FileStore<T> 
{
                 tableName,
                 commitUser,
                 partitionType,
+                options,
                 options.partitionDefaultName(),
                 pathFactory(),
                 snapshotManager(),
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java
 
b/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java
index 5e43568aac..490bda9d4c 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java
@@ -381,7 +381,9 @@ public class UnawareAppendTableCompactionCoordinator {
                 snapshotReader.withFilter(filter);
             }
             // drop stats to reduce memory
-            snapshotReader.dropStats();
+            if (table.coreOptions().manifestDeleteFileDropStats()) {
+                snapshotReader.dropStats();
+            }
             this.streamingMode = isStreaming;
         }
 
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java
 
b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java
index 43957de8d6..14dfe75a6e 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java
@@ -18,6 +18,7 @@
 
 package org.apache.paimon.operation;
 
+import org.apache.paimon.CoreOptions;
 import org.apache.paimon.Snapshot;
 import org.apache.paimon.annotation.VisibleForTesting;
 import org.apache.paimon.compact.CompactDeletionFile;
@@ -96,13 +97,19 @@ public abstract class AbstractFileStoreWrite<T> implements 
FileStoreWrite<T> {
             @Nullable IndexMaintainer.Factory<T> indexFactory,
             @Nullable DeletionVectorsMaintainer.Factory dvMaintainerFactory,
             String tableName,
+            CoreOptions options,
             int totalBuckets,
             RowType partitionType,
             int writerNumberMax,
             boolean legacyPartitionName) {
         this.snapshotManager = snapshotManager;
+        this.scan = scan;
         // Statistic is useless in writer
-        this.scan = scan == null ? null : scan.dropStats();
+        if (options.manifestDeleteFileDropStats()) {
+            if (this.scan != null) {
+                this.scan.dropStats();
+            }
+        }
         this.indexFactory = indexFactory;
         this.dvMaintainerFactory = dvMaintainerFactory;
         this.totalBuckets = totalBuckets;
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java
 
b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java
index bbd9b27ee6..153f9f07e9 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java
@@ -18,6 +18,7 @@
 
 package org.apache.paimon.operation;
 
+import org.apache.paimon.CoreOptions;
 import org.apache.paimon.Snapshot;
 import org.apache.paimon.annotation.VisibleForTesting;
 import org.apache.paimon.data.BinaryRow;
@@ -146,6 +147,7 @@ public class FileStoreCommitImpl implements FileStoreCommit 
{
             String tableName,
             String commitUser,
             RowType partitionType,
+            CoreOptions options,
             String partitionDefaultName,
             FileStorePathFactory pathFactory,
             SnapshotManager snapshotManager,
@@ -176,8 +178,11 @@ public class FileStoreCommitImpl implements 
FileStoreCommit {
         this.manifestFile = manifestFileFactory.create();
         this.manifestList = manifestListFactory.create();
         this.indexManifestFile = indexManifestFileFactory.create();
+        this.scan = scan;
         // Stats in DELETE Manifest Entries is useless
-        this.scan = scan.dropStats();
+        if (options.manifestDeleteFileDropStats()) {
+            this.scan.dropStats();
+        }
         this.numBucket = numBucket;
         this.manifestTargetSize = manifestTargetSize;
         this.manifestFullCompactionSize = manifestFullCompactionSize;
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java
 
b/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java
index ff99f06510..a2733121ee 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java
@@ -73,6 +73,7 @@ public abstract class MemoryFileStoreWrite<T> extends 
AbstractFileStoreWrite<T>
                 indexFactory,
                 dvMaintainerFactory,
                 tableName,
+                options,
                 options.bucket(),
                 partitionType,
                 options.writeMaxWritersToSpill(),
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/operation/FileStoreCommitTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/operation/FileStoreCommitTest.java
index de4ee684b8..9e4ba30eb8 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/operation/FileStoreCommitTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/operation/FileStoreCommitTest.java
@@ -950,6 +950,7 @@ public class FileStoreCommitTest {
     @Test
     public void testDropStatsForOverwrite() throws Exception {
         TestFileStore store = createStore(false);
+        
store.options().toConfiguration().set(CoreOptions.MANIFEST_DELETE_FILE_DROP_STATS,
 true);
 
         List<KeyValue> keyValues = generateDataList(1);
         BinaryRow partition = gen.getPartition(keyValues.get(0));
diff --git 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala
 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala
index 466643b157..87583593e3 100644
--- 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala
+++ 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala
@@ -94,8 +94,11 @@ trait PaimonCommand extends WithFileStoreTable with 
ExpressionHelper with SQLCon
       condition: Expression,
       output: Seq[Attribute]): Seq[DataSplit] = {
     // low level snapshot reader, it can not be affected by 'scan.mode'
+    val snapshotReader = table.newSnapshotReader()
     // dropStats after filter push down
-    val snapshotReader = table.newSnapshotReader().dropStats()
+    if (table.coreOptions().manifestDeleteFileDropStats()) {
+      snapshotReader.dropStats()
+    }
     if (condition != TrueLiteral) {
       val filter =
         convertConditionToPaimonPredicate(condition, output, rowType, 
ignoreFailure = true)

Reply via email to