This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 9c57150e059 branch-4.0: [Enhancement](explain)Display deleteFileNum
for FileScanNode when explain verbose #60308 (#60437)
9c57150e059 is described below
commit 9c57150e059d4583b7bbbe21ef8c4c28460c6340
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Feb 3 10:49:51 2026 +0800
branch-4.0: [Enhancement](explain)Display deleteFileNum for FileScanNode
when explain verbose #60308 (#60437)
Cherry-picked from #60308
Co-authored-by: daidai <[email protected]>
---
.../org/apache/doris/datasource/FileScanNode.java | 32 +++++++++++++++++
.../doris/datasource/hive/source/HiveScanNode.java | 31 +++++++++++++++++
.../datasource/iceberg/source/IcebergScanNode.java | 40 ++++++++++++++++++++++
.../datasource/paimon/source/PaimonScanNode.java | 22 ++++++++++++
.../hive/test_transactional_hive.groovy | 8 +++++
.../iceberg/test_iceberg_position_delete.groovy | 10 ++++++
.../paimon/test_paimon_deletion_vector_oss.groovy | 9 +++++
7 files changed, 152 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
index 46c69247bf1..8af0096e007 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
@@ -55,8 +55,10 @@ import com.google.common.collect.Multimap;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.stream.Collectors;
/**
@@ -104,6 +106,17 @@ public abstract class FileScanNode extends
ExternalScanNode {
return totalFileSize;
}
+ /**
+ * Get all delete files for the given file range.
+ * @param rangeDesc the file range descriptor
+ * @return list of delete file paths (formatted strings)
+ */
+ protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+ // Default implementation: return empty list
+ // Subclasses should override this method
+ return Collections.emptyList();
+ }
+
@Override
public String getNodeExplainString(String prefix, TExplainLevel
detailLevel) {
StringBuilder output = new StringBuilder();
@@ -149,6 +162,21 @@ public abstract class FileScanNode extends
ExternalScanNode {
return Long.compare(o1.getStartOffset(),
o2.getStartOffset());
}
});
+
+ // A Data file may be divided into different splits, so a set
is used to remove duplicates.
+ Set<String> dataFilesSet = new HashSet<>();
+ // A delete file might be used by multiple data files, so use
set to remove duplicates.
+ Set<String> deleteFilesSet = new HashSet<>();
+ // You can estimate how many delete splits need to be read for
a data split
+ // using deleteSplitNum / dataSplitNum(fileRangeDescs.size())
split.
+ long deleteSplitNum = 0;
+ for (TFileRangeDesc fileRangeDesc : fileRangeDescs) {
+ dataFilesSet.add(fileRangeDesc.getPath());
+ List<String> deletefiles = getDeleteFiles(fileRangeDesc);
+ deleteFilesSet.addAll(deletefiles);
+ deleteSplitNum += deletefiles.size();
+ }
+
// 3. if size <= 4, print all. if size > 4, print first 3 and
last 1
int size = fileRangeDescs.size();
if (size <= 4) {
@@ -174,6 +202,10 @@ public abstract class FileScanNode extends
ExternalScanNode {
.append(" length: ").append(file.getSize())
.append("\n");
}
+ output.append(prefix).append("
").append("dataFileNum=").append(dataFilesSet.size())
+ .append(",
deleteFileNum=").append(deleteFilesSet.size())
+ .append(", deleteSplitNum=").append(deleteSplitNum)
+ .append("\n");
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 2a524a4138d..c9317b45982 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -443,6 +443,37 @@ public class HiveScanNode extends FileQueryScanNode {
}
}
+ @Override
+ protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+ List<String> deleteFiles = new ArrayList<>();
+ if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+ return deleteFiles;
+ }
+ TTableFormatFileDesc tableFormatParams =
rangeDesc.getTableFormatParams();
+ if (tableFormatParams == null ||
!tableFormatParams.isSetTransactionalHiveParams()) {
+ return deleteFiles;
+ }
+ TTransactionalHiveDesc hiveParams =
tableFormatParams.getTransactionalHiveParams();
+ if (hiveParams == null || !hiveParams.isSetDeleteDeltas()) {
+ return deleteFiles;
+ }
+ List<TTransactionalHiveDeleteDeltaDesc> deleteDeltas =
hiveParams.getDeleteDeltas();
+ if (deleteDeltas == null) {
+ return deleteFiles;
+ }
+ // Format: {directory_location}/{file_name}
+ for (TTransactionalHiveDeleteDeltaDesc deleteDelta : deleteDeltas) {
+ if (deleteDelta != null && deleteDelta.isSetDirectoryLocation()
+ && deleteDelta.isSetFileNames() &&
deleteDelta.getFileNames() != null) {
+ String directoryLocation = deleteDelta.getDirectoryLocation();
+ for (String fileName : deleteDelta.getFileNames()) {
+ deleteFiles.add(directoryLocation + "/" + fileName);
+ }
+ }
+ }
+ return deleteFiles;
+ }
+
@Override
protected Map<String, String> getLocationProperties() {
return hmsTable.getBackendStorageProperties();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index 3101d9f22f3..debfa513e62 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -269,6 +269,46 @@ public class IcebergScanNode extends FileQueryScanNode {
rangeDesc.setTableFormatParams(tableFormatFileDesc);
}
+ @Override
+ protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+ List<String> deleteFiles = new ArrayList<>();
+ if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+ return deleteFiles;
+ }
+ TTableFormatFileDesc tableFormatParams =
rangeDesc.getTableFormatParams();
+ if (tableFormatParams == null ||
!tableFormatParams.isSetIcebergParams()) {
+ return deleteFiles;
+ }
+ TIcebergFileDesc icebergParams = tableFormatParams.getIcebergParams();
+ if (icebergParams == null || !icebergParams.isSetDeleteFiles()) {
+ return deleteFiles;
+ }
+ List<TIcebergDeleteFileDesc> icebergDeleteFiles =
icebergParams.getDeleteFiles();
+ if (icebergDeleteFiles == null) {
+ return deleteFiles;
+ }
+ for (TIcebergDeleteFileDesc deleteFile : icebergDeleteFiles) {
+ if (deleteFile != null && deleteFile.isSetPath()) {
+ deleteFiles.add(deleteFile.getPath());
+ }
+ }
+ return deleteFiles;
+ }
+
+ private String getDeleteFileContentType(int content) {
+ // Iceberg file type: 0: data, 1: position delete, 2: equality delete,
3: deletion vector
+ switch (content) {
+ case 1:
+ return "position_delete";
+ case 2:
+ return "equality_delete";
+ case 3:
+ return "deletion_vector";
+ default:
+ return "unknown";
+ }
+ }
+
@Override
public List<Split> getSplits(int numBackends) throws UserException {
try {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index e1f746a70e8..fc35282066e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -276,6 +276,28 @@ public class PaimonScanNode extends FileQueryScanNode {
rangeDesc.setTableFormatParams(tableFormatFileDesc);
}
+ @Override
+ protected List<String> getDeleteFiles(TFileRangeDesc rangeDesc) {
+ List<String> deleteFiles = new ArrayList<>();
+ if (rangeDesc == null || !rangeDesc.isSetTableFormatParams()) {
+ return deleteFiles;
+ }
+ TTableFormatFileDesc tableFormatParams =
rangeDesc.getTableFormatParams();
+ if (tableFormatParams == null ||
!tableFormatParams.isSetPaimonParams()) {
+ return deleteFiles;
+ }
+ TPaimonFileDesc paimonParams = tableFormatParams.getPaimonParams();
+ if (paimonParams == null || !paimonParams.isSetDeletionFile()) {
+ return deleteFiles;
+ }
+ TPaimonDeletionFileDesc deletionFile = paimonParams.getDeletionFile();
+ if (deletionFile != null && deletionFile.isSetPath()) {
+ // Format: path [offset: offset, length: length]
+ deleteFiles.add(deletionFile.getPath());
+ }
+ return deleteFiles;
+ }
+
@Override
public List<Split> getSplits(int numBackends) throws UserException {
boolean forceJniScanner = sessionVariable.isForceJniScanner();
diff --git
a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
index 568bb632dec..adc97540665 100644
---
a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
@@ -142,6 +142,13 @@ suite("test_transactional_hive",
"p0,external,hive,external_docker,external_dock
qt_count_5 """ select count(*) from orc_acid_major; """ //3
}
+ def test_explain_verbose = {
+ explain {
+ sql ("select count(*) from orc_full_acid")
+ verbose (true)
+ contains "deleteFileNum"
+ }
+ }
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
@@ -177,6 +184,7 @@ suite("test_transactional_hive",
"p0,external,hive,external_docker,external_dock
test_acid_count()
+ test_explain_verbose()
q01_par_limit()
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
index d793cef3568..91bc48a6758 100644
---
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_position_delete.groovy
@@ -165,6 +165,16 @@ suite("test_iceberg_position_delete",
"p0,external,doris,external_docker,externa
assertTrue(iceberg_position_gen_7.size() == 5632)
// sql """drop catalog ${catalog_name}"""
+
+ def test_explain_verbose = {
+ explain {
+ sql ("select name from iceberg_position_gen_data where id !=
5;")
+ verbose (true)
+ contains "deleteFileNum"
+ }
+ }
+ test_explain_verbose()
+
}
/*
diff --git
a/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
b/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
index 71a4d971169..76574aae528 100644
---
a/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
+++
b/regression-test/suites/external_table_p0/paimon/test_paimon_deletion_vector_oss.groovy
@@ -53,8 +53,17 @@ suite("test_paimon_deletion_vector_oss",
"p0,external,doris,external_docker,exte
qt_6 """select * from deletion_vector_parquet where id > 2 order
by id;"""
}
+ def test_explain_verbose = {
+ explain {
+ sql ("select * from deletion_vector_orc;")
+ verbose (true)
+ contains "deleteFileNum"
+ }
+ }
+
test_cases("false")
test_cases("true")
+ test_explain_verbose()
} finally {
sql """set force_jni_scanner=false"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]