This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 9c57150e059 branch-4.0: [Enhancement](explain)Display deleteFileNum 
for FileScanNode when explain verbose #60308 (#60437)
9c57150e059 is described below

commit 9c57150e059d4583b7bbbe21ef8c4c28460c6340
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Feb 3 10:49:51 2026 +0800

    branch-4.0: [Enhancement](explain)Display deleteFileNum for FileScanNode 
when explain verbose #60308 (#60437)
    
    Cherry-picked from #60308
    
    Co-authored-by: daidai <[email protected]>
---
 .../org/apache/doris/datasource/FileScanNode.java  | 32 +++++++++++++++++
 .../doris/datasource/hive/source/HiveScanNode.java | 31 +++++++++++++++++
 .../datasource/iceberg/source/IcebergScanNode.java | 40 ++++++++++++++++++++++
 .../datasource/paimon/source/PaimonScanNode.java   | 22 ++++++++++++
 .../hive/test_transactional_hive.groovy            |  8 +++++
 .../iceberg/test_iceberg_position_delete.groovy    | 10 ++++++
 .../paimon/test_paimon_deletion_vector_oss.groovy  |  9 +++++
 7 files changed, 152 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
index 46c69247bf1..8af0096e007 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
@@ -55,8 +55,10 @@ import com.google.common.collect.Multimap;
 
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 /**
@@ -104,6 +106,17 @@ public abstract class FileScanNode extends 
ExternalScanNode {
         return totalFileSize;
     }
 
+    /**
+     * Get all delete files for the given file range.
+     * @param rangeDesc the file range descriptor
+     * @return list of delete file paths (formatted strings)
+     */
+    protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+        // Default implementation: return empty list
+        // Subclasses should override this method
+        return Collections.emptyList();
+    }
+
     @Override
     public String getNodeExplainString(String prefix, TExplainLevel 
detailLevel) {
         StringBuilder output = new StringBuilder();
@@ -149,6 +162,21 @@ public abstract class FileScanNode extends 
ExternalScanNode {
                         return Long.compare(o1.getStartOffset(), 
o2.getStartOffset());
                     }
                 });
+
+                // A Data file may be divided into different splits, so a set 
is used to remove duplicates.
+                Set<String> dataFilesSet = new HashSet<>();
+                // A delete file might be used by multiple data files, so use 
set to remove duplicates.
+                Set<String> deleteFilesSet = new HashSet<>();
+                // You can estimate how many delete splits need to be read for 
a data split
+                // using deleteSplitNum / dataSplitNum(fileRangeDescs.size()) 
split.
+                long deleteSplitNum = 0;
+                for (TFileRangeDesc fileRangeDesc : fileRangeDescs) {
+                    dataFilesSet.add(fileRangeDesc.getPath());
+                    List<String> deletefiles =  getDeleteFiles(fileRangeDesc);
+                    deleteFilesSet.addAll(deletefiles);
+                    deleteSplitNum += deletefiles.size();
+                }
+
                 // 3. if size <= 4, print all. if size > 4, print first 3 and 
last 1
                 int size = fileRangeDescs.size();
                 if (size <= 4) {
@@ -174,6 +202,10 @@ public abstract class FileScanNode extends 
ExternalScanNode {
                             .append(" length: ").append(file.getSize())
                             .append("\n");
                 }
+                output.append(prefix).append("    
").append("dataFileNum=").append(dataFilesSet.size())
+                        .append(", 
deleteFileNum=").append(deleteFilesSet.size())
+                        .append(", deleteSplitNum=").append(deleteSplitNum)
+                        .append("\n");
             }
         }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 2a524a4138d..c9317b45982 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -443,6 +443,37 @@ public class HiveScanNode extends FileQueryScanNode {
         }
     }
 
+    @Override
+    protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+        List<String> deleteFiles = new ArrayList<>();
+        if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+            return deleteFiles;
+        }
+        TTableFormatFileDesc tableFormatParams = 
rangeDesc.getTableFormatParams();
+        if (tableFormatParams == null || 
!tableFormatParams.isSetTransactionalHiveParams()) {
+            return deleteFiles;
+        }
+        TTransactionalHiveDesc hiveParams = 
tableFormatParams.getTransactionalHiveParams();
+        if (hiveParams == null || !hiveParams.isSetDeleteDeltas()) {
+            return deleteFiles;
+        }
+        List<TTransactionalHiveDeleteDeltaDesc> deleteDeltas = 
hiveParams.getDeleteDeltas();
+        if (deleteDeltas == null) {
+            return deleteFiles;
+        }
+        // Format: {directory_location}/{file_name}
+        for (TTransactionalHiveDeleteDeltaDesc deleteDelta : deleteDeltas) {
+            if (deleteDelta != null && deleteDelta.isSetDirectoryLocation()
+                    && deleteDelta.isSetFileNames() && 
deleteDelta.getFileNames() != null) {
+                String directoryLocation = deleteDelta.getDirectoryLocation();
+                for (String fileName : deleteDelta.getFileNames()) {
+                    deleteFiles.add(directoryLocation + "/" + fileName);
+                }
+            }
+        }
+        return deleteFiles;
+    }
+
     @Override
     protected Map<String, String> getLocationProperties() {
         return hmsTable.getBackendStorageProperties();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index 3101d9f22f3..debfa513e62 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -269,6 +269,46 @@ public class IcebergScanNode extends FileQueryScanNode {
         rangeDesc.setTableFormatParams(tableFormatFileDesc);
     }
 
+    @Override
+    protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+        List<String> deleteFiles = new ArrayList<>();
+        if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+            return deleteFiles;
+        }
+        TTableFormatFileDesc tableFormatParams = 
rangeDesc.getTableFormatParams();
+        if (tableFormatParams == null || 
!tableFormatParams.isSetIcebergParams()) {
+            return deleteFiles;
+        }
+        TIcebergFileDesc icebergParams = tableFormatParams.getIcebergParams();
+        if (icebergParams == null || !icebergParams.isSetDeleteFiles()) {
+            return deleteFiles;
+        }
+        List<TIcebergDeleteFileDesc> icebergDeleteFiles = 
icebergParams.getDeleteFiles();
+        if (icebergDeleteFiles == null) {
+            return deleteFiles;
+        }
+        for (TIcebergDeleteFileDesc deleteFile : icebergDeleteFiles) {
+            if (deleteFile != null && deleteFile.isSetPath()) {
+                deleteFiles.add(deleteFile.getPath());
+            }
+        }
+        return deleteFiles;
+    }
+
+    private String getDeleteFileContentType(int content) {
+        // Iceberg file type: 0: data, 1: position delete, 2: equality delete, 
3: deletion vector
+        switch (content) {
+            case 1:
+                return "position_delete";
+            case 2:
+                return "equality_delete";
+            case 3:
+                return "deletion_vector";
+            default:
+                return "unknown";
+        }
+    }
+
     @Override
     public List<Split> getSplits(int numBackends) throws UserException {
         try {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index e1f746a70e8..fc35282066e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -276,6 +276,28 @@ public class PaimonScanNode extends FileQueryScanNode {
         rangeDesc.setTableFormatParams(tableFormatFileDesc);
     }
 
+    @Override
+    protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+        List<String> deleteFiles = new ArrayList<>();
+        if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+            return deleteFiles;
+        }
+        TTableFormatFileDesc tableFormatParams = 
rangeDesc.getTableFormatParams();
+        if (tableFormatParams == null || 
!tableFormatParams.isSetPaimonParams()) {
+            return deleteFiles;
+        }
+        TPaimonFileDesc paimonParams = tableFormatParams.getPaimonParams();
+        if (paimonParams == null || !paimonParams.isSetDeletionFile()) {
+            return deleteFiles;
+        }
+        TPaimonDeletionFileDesc deletionFile = paimonParams.getDeletionFile();
+        if (deletionFile != null && deletionFile.isSetPath()) {
+            // Format: path [offset: offset, length: length]
+            deleteFiles.add(deletionFile.getPath());
+        }
+        return deleteFiles;
+    }
+
     @Override
     public List<Split> getSplits(int numBackends) throws UserException {
         boolean forceJniScanner = sessionVariable.isForceJniScanner();
diff --git 
a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy 
b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
index 568bb632dec..adc97540665 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
@@ -142,6 +142,13 @@ suite("test_transactional_hive", 
"p0,external,hive,external_docker,external_dock
         qt_count_5 """ select count(*) from orc_acid_major; """ //3
     }
 
+    def test_explain_verbose = {
+        explain {
+            sql ("select count(*) from orc_full_acid")
+            verbose (true)
+            contains "deleteFileNum"
+        }
+    }
 
     String enabled = context.config.otherConfigs.get("enableHiveTest")
     if (enabled == null || !enabled.equalsIgnoreCase("true")) {
@@ -177,6 +184,7 @@ suite("test_transactional_hive", 
"p0,external,hive,external_docker,external_dock
 
 
             test_acid_count()
+            test_explain_verbose()
             
             q01_par_limit()
             
diff --git 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
index d793cef3568..91bc48a6758 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
@@ -165,6 +165,16 @@ suite("test_iceberg_position_delete", 
"p0,external,doris,external_docker,externa
         assertTrue(iceberg_position_gen_7.size() == 5632)
 
         // sql """drop catalog ${catalog_name}"""
+
+        def test_explain_verbose = {
+            explain {
+                sql ("select name from iceberg_position_gen_data where id != 
5;")
+                verbose (true)
+                contains "deleteFileNum"
+            }
+        }
+        test_explain_verbose()
+
 }
 /*
 
diff --git 
a/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
 
b/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
index 71a4d971169..76574aae528 100644
--- 
a/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
+++ 
b/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
@@ -53,8 +53,17 @@ suite("test_paimon_deletion_vector_oss", 
"p0,external,doris,external_docker,exte
             qt_6 """select * from deletion_vector_parquet where id > 2 order 
by id;"""
         }
 
+        def test_explain_verbose = {
+            explain {
+                sql ("select * from deletion_vector_orc;")
+                verbose (true)
+                contains "deleteFileNum"
+            }
+        }
+
         test_cases("false")
         test_cases("true")
+        test_explain_verbose()
 
     } finally {
         sql """set force_jni_scanner=false"""


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to