This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new cff565f2b25 [Enhancement](explain)Display deleteFileNum for 
FileScanNode when explain verbose (#60308)
cff565f2b25 is described below

commit cff565f2b259700accf53b6d2cb445f180d9f254
Author: daidai <[email protected]>
AuthorDate: Mon Feb 2 16:54:28 2026 +0800

    [Enhancement](explain)Display deleteFileNum for FileScanNode when explain 
verbose (#60308)
    
    ### What problem does this PR solve?
    Problem Summary:
    This PR enhances the output of EXPLAIN VERBOSE for File Scan nodes by
    adding the following metrics:
    `dataFileNum=xxx, deleteFileNum=xxx, deleteSplitNum=xxx`
    Especially useful for iceberg/paimon/hive acid
    
    These metrics provide more visibility into the underlying file and split
    layout, helping users better tune parameters and control query
    performance.
    Details:
    `dataFileNum` : The number of distinct data files that need to be read.
    This is not equivalent to the number of splits, since a single data file
    can be divided into multiple splits.
    
    `deleteFileNum` : The number of distinct delete files that need to be
    read.
    
    `deleteSplitNum` : Added because the relationship between data files and
    delete files is many-to-many:
    one data file may be associated with multiple delete files
    one delete file may apply to multiple data files
    Using deleteSplitNum / dataSplitNum, users can estimate the average
    number of delete splits that need to be read per data split.
    
    Example:
    ```
    mysql> explain verbose select * from iceberg.format_v3.dv_test_1w;
    
+-----------------------------------------------------------------------------------------------------------------------------------------------+
    | Explain String(Nereids Planner)                                           
                                                                    |
    
+-----------------------------------------------------------------------------------------------------------------------------------------------+
    | PLAN FRAGMENT 0                                                           
                                                                    |
    |   OUTPUT EXPRS:                                                           
                                                                    |
    |     id[#0]                                                                
                                                                    |
    |     grp[#1]                                                               
                                                                    |
    |     value[#2]                                                             
                                                                    |
    |     ts[#3]                                                                
                                                                    |
    |   PARTITION: RANDOM                                                       
                                                                    |
    |                                                                           
                                                                    |
    |   HAS_COLO_PLAN_NODE: false                                               
                                                                    |
    |                                                                           
                                                                    |
    |   VRESULT SINK                                                            
                                                                    |
    |      MYSQL_PROTOCOL                                                       
                                                                    |
    |                                                                           
                                                                    |
    |   0:VICEBERG_SCAN_NODE(32)                                                
                                                                    |
    |      table: iceberg.format_v3.dv_test_1w                                  
                                                                    |
    |      inputSplitNum=220, totalFileSize=720774, scanRanges=220              
                                                                    |
    |      partition=0/0                                                        
                                                                    |
    |      backends:                                                            
                                                                    |
    |        1769590309070                                                      
                                                                    |
    |          
s3://warehouse/wh/format_v3/dv_test_1w/data/00004-51-fc462f9a-d42a-404d-adfc-c8d2781c8d04-0-00001.parquet
 start: 4 length: 2672      |
    |          
s3://warehouse/wh/format_v3/dv_test_1w/data/00003-50-fc462f9a-d42a-404d-adfc-c8d2781c8d04-0-00001.parquet
 start: 4 length: 2852      |
    |          
s3://warehouse/wh/format_v3/dv_test_1w/data/00000-47-fc462f9a-d42a-404d-adfc-c8d2781c8d04-0-00001.parquet
 start: 4 length: 2894      |
    |          ... other 216 files ...                                          
                                                                    |
    |          
s3://warehouse/wh/format_v3/dv_test_1w/data/00001-48-fc462f9a-d42a-404d-adfc-c8d2781c8d04-0-00001.parquet
 start: 58397 length: 13894 |
    |          dataFileNum=10, deleteFileNum=1 deleteSplitNum=220               
                                                                |
    |      cardinality=33334, numNodes=1                                        
                                                                    |
    |      pushdown agg=NONE                                                    
                                                                    |
    |      tuple ids: 0                                                         
                                                                    |
    |                                                                           
                                                                    |
    | Tuples:                                                                   
                                                                    |
    | TupleDescriptor{id=0, tbl=dv_test_1w}                                     
                                                                    |
    |   SlotDescriptor{id=0, col=id, colUniqueId=1, type=bigint, nullable=true, 
isAutoIncrement=false, subColPath=null, virtualColumn=null}         |
    |   SlotDescriptor{id=1, col=grp, colUniqueId=2, type=int, nullable=true, 
isAutoIncrement=false, subColPath=null, virtualColumn=null}           |
    |   SlotDescriptor{id=2, col=value, colUniqueId=3, type=int, nullable=true, 
isAutoIncrement=false, subColPath=null, virtualColumn=null}         |
    |   SlotDescriptor{id=3, col=ts, colUniqueId=4, type=datetimev2(6), 
nullable=true, isAutoIncrement=false, subColPath=null, virtualColumn=null}  |
    |                                                                           
                                                                    |
    |                                                                           
                                                                    |
    |                                                                           
                                                                    |
    |                                                                           
                                                                    |
    | ========== STATISTICS ==========                                          
                                                                    |
    
+-----------------------------------------------------------------------------------------------------------------------------------------------+
    ```
---
 .../org/apache/doris/datasource/FileScanNode.java  | 32 +++++++++++++++++
 .../doris/datasource/hive/source/HiveScanNode.java | 31 +++++++++++++++++
 .../datasource/iceberg/source/IcebergScanNode.java | 40 ++++++++++++++++++++++
 .../datasource/paimon/source/PaimonScanNode.java   | 22 ++++++++++++
 .../hive/test_transactional_hive.groovy            |  8 +++++
 .../iceberg/test_iceberg_position_delete.groovy    | 10 ++++++
 .../paimon/test_paimon_deletion_vector_oss.groovy  |  9 +++++
 7 files changed, 152 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
index a7aa0f607ac..1c1d6ac6720 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
@@ -54,8 +54,10 @@ import com.google.common.collect.Multimap;
 
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 /**
@@ -99,6 +101,17 @@ public abstract class FileScanNode extends ExternalScanNode 
{
         return totalFileSize;
     }
 
+    /**
+     * Get all delete files for the given file range.
+     * @param rangeDesc the file range descriptor
+     * @return list of delete file paths (formatted strings)
+     */
+    protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+        // Default implementation: return empty list
+        // Subclasses should override this method
+        return Collections.emptyList();
+    }
+
     @Override
     public String getNodeExplainString(String prefix, TExplainLevel 
detailLevel) {
         StringBuilder output = new StringBuilder();
@@ -139,6 +152,21 @@ public abstract class FileScanNode extends 
ExternalScanNode {
                         return Long.compare(o1.getStartOffset(), 
o2.getStartOffset());
                     }
                 });
+
+                // A Data file may be divided into different splits, so a set 
is used to remove duplicates.
+                Set<String> dataFilesSet = new HashSet<>();
+                // A delete file might be used by multiple data files, so use 
set to remove duplicates.
+                Set<String> deleteFilesSet = new HashSet<>();
+                // You can estimate how many delete splits need to be read for 
a data split
+                // using deleteSplitNum / dataSplitNum(fileRangeDescs.size()) 
split.
+                long deleteSplitNum = 0;
+                for (TFileRangeDesc fileRangeDesc : fileRangeDescs) {
+                    dataFilesSet.add(fileRangeDesc.getPath());
+                    List<String> deletefiles =  getDeleteFiles(fileRangeDesc);
+                    deleteFilesSet.addAll(deletefiles);
+                    deleteSplitNum += deletefiles.size();
+                }
+
                 // 3. if size <= 4, print all. if size > 4, print first 3 and 
last 1
                 int size = fileRangeDescs.size();
                 if (size <= 4) {
@@ -164,6 +192,10 @@ public abstract class FileScanNode extends 
ExternalScanNode {
                             .append(" length: ").append(file.getSize())
                             .append("\n");
                 }
+                output.append(prefix).append("    
").append("dataFileNum=").append(dataFilesSet.size())
+                        .append(", 
deleteFileNum=").append(deleteFilesSet.size())
+                        .append(", deleteSplitNum=").append(deleteSplitNum)
+                        .append("\n");
             }
         }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 776ff234cd6..2e8f3735f6b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -490,6 +490,37 @@ public class HiveScanNode extends FileQueryScanNode {
         }
     }
 
+    @Override
+    protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+        List<String> deleteFiles = new ArrayList<>();
+        if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+            return deleteFiles;
+        }
+        TTableFormatFileDesc tableFormatParams = 
rangeDesc.getTableFormatParams();
+        if (tableFormatParams == null || 
!tableFormatParams.isSetTransactionalHiveParams()) {
+            return deleteFiles;
+        }
+        TTransactionalHiveDesc hiveParams = 
tableFormatParams.getTransactionalHiveParams();
+        if (hiveParams == null || !hiveParams.isSetDeleteDeltas()) {
+            return deleteFiles;
+        }
+        List<TTransactionalHiveDeleteDeltaDesc> deleteDeltas = 
hiveParams.getDeleteDeltas();
+        if (deleteDeltas == null) {
+            return deleteFiles;
+        }
+        // Format: {directory_location}/{file_name}
+        for (TTransactionalHiveDeleteDeltaDesc deleteDelta : deleteDeltas) {
+            if (deleteDelta != null && deleteDelta.isSetDirectoryLocation()
+                    && deleteDelta.isSetFileNames() && 
deleteDelta.getFileNames() != null) {
+                String directoryLocation = deleteDelta.getDirectoryLocation();
+                for (String fileName : deleteDelta.getFileNames()) {
+                    deleteFiles.add(directoryLocation + "/" + fileName);
+                }
+            }
+        }
+        return deleteFiles;
+    }
+
     @Override
     protected Map<String, String> getLocationProperties() {
         return hmsTable.getBackendStorageProperties();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index 26326d8ee7d..647c2e014b9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -277,6 +277,46 @@ public class IcebergScanNode extends FileQueryScanNode {
         rangeDesc.setTableFormatParams(tableFormatFileDesc);
     }
 
+    @Override
+    protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+        List<String> deleteFiles = new ArrayList<>();
+        if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+            return deleteFiles;
+        }
+        TTableFormatFileDesc tableFormatParams = 
rangeDesc.getTableFormatParams();
+        if (tableFormatParams == null || 
!tableFormatParams.isSetIcebergParams()) {
+            return deleteFiles;
+        }
+        TIcebergFileDesc icebergParams = tableFormatParams.getIcebergParams();
+        if (icebergParams == null || !icebergParams.isSetDeleteFiles()) {
+            return deleteFiles;
+        }
+        List<TIcebergDeleteFileDesc> icebergDeleteFiles = 
icebergParams.getDeleteFiles();
+        if (icebergDeleteFiles == null) {
+            return deleteFiles;
+        }
+        for (TIcebergDeleteFileDesc deleteFile : icebergDeleteFiles) {
+            if (deleteFile != null && deleteFile.isSetPath()) {
+                deleteFiles.add(deleteFile.getPath());
+            }
+        }
+        return deleteFiles;
+    }
+
+    private String getDeleteFileContentType(int content) {
+        // Iceberg file type: 0: data, 1: position delete, 2: equality delete, 
3: deletion vector
+        switch (content) {
+            case 1:
+                return "position_delete";
+            case 2:
+                return "equality_delete";
+            case 3:
+                return "deletion_vector";
+            default:
+                return "unknown";
+        }
+    }
+
     @Override
     public List<Split> getSplits(int numBackends) throws UserException {
         try {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index b0f659ae52c..4dffb618ffc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -274,6 +274,28 @@ public class PaimonScanNode extends FileQueryScanNode {
         rangeDesc.setTableFormatParams(tableFormatFileDesc);
     }
 
+    @Override
+    protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+        List<String> deleteFiles = new ArrayList<>();
+        if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+            return deleteFiles;
+        }
+        TTableFormatFileDesc tableFormatParams = 
rangeDesc.getTableFormatParams();
+        if (tableFormatParams == null || 
!tableFormatParams.isSetPaimonParams()) {
+            return deleteFiles;
+        }
+        TPaimonFileDesc paimonParams = tableFormatParams.getPaimonParams();
+        if (paimonParams == null || !paimonParams.isSetDeletionFile()) {
+            return deleteFiles;
+        }
+        TPaimonDeletionFileDesc deletionFile = paimonParams.getDeletionFile();
+        if (deletionFile != null && deletionFile.isSetPath()) {
+            // Format: path [offset: offset, length: length]
+            deleteFiles.add(deletionFile.getPath());
+        }
+        return deleteFiles;
+    }
+
     @Override
     public List<Split> getSplits(int numBackends) throws UserException {
         boolean forceJniScanner = sessionVariable.isForceJniScanner();
diff --git 
a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy 
b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
index 568bb632dec..adc97540665 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
@@ -142,6 +142,13 @@ suite("test_transactional_hive", 
"p0,external,hive,external_docker,external_dock
         qt_count_5 """ select count(*) from orc_acid_major; """ //3
     }
 
+    def test_explain_verbose = {
+        explain {
+            sql ("select count(*) from orc_full_acid")
+            verbose (true)
+            contains "deleteFileNum"
+        }
+    }
 
     String enabled = context.config.otherConfigs.get("enableHiveTest")
     if (enabled == null || !enabled.equalsIgnoreCase("true")) {
@@ -177,6 +184,7 @@ suite("test_transactional_hive", 
"p0,external,hive,external_docker,external_dock
 
 
             test_acid_count()
+            test_explain_verbose()
             
             q01_par_limit()
             
diff --git 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
index d793cef3568..91bc48a6758 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
@@ -165,6 +165,16 @@ suite("test_iceberg_position_delete", 
"p0,external,doris,external_docker,externa
         assertTrue(iceberg_position_gen_7.size() == 5632)
 
         // sql """drop catalog ${catalog_name}"""
+
+        def test_explain_verbose = {
+            explain {
+                sql ("select name from iceberg_position_gen_data where id != 
5;")
+                verbose (true)
+                contains "deleteFileNum"
+            }
+        }
+        test_explain_verbose()
+
 }
 /*
 
diff --git 
a/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
 
b/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
index 71a4d971169..76574aae528 100644
--- 
a/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
+++ 
b/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
@@ -53,8 +53,17 @@ suite("test_paimon_deletion_vector_oss", 
"p0,external,doris,external_docker,exte
             qt_6 """select * from deletion_vector_parquet where id > 2 order 
by id;"""
         }
 
+        def test_explain_verbose = {
+            explain {
+                sql ("select * from deletion_vector_orc;")
+                verbose (true)
+                contains "deleteFileNum"
+            }
+        }
+
         test_cases("false")
         test_cases("true")
+        test_explain_verbose()
 
     } finally {
         sql """set force_jni_scanner=false"""


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to