This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 8587dbae8c4 branch-3.1: [opt](scanner) add some debug log for batch
split (#55352)
8587dbae8c4 is described below
commit 8587dbae8c4e638276ef57adf4b63d894e660d81
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Wed Aug 27 23:51:32 2025 -0700
branch-3.1: [opt](scanner) add some debug log for batch split (#55352)
Add some debug log
---
be/src/pipeline/exec/file_scan_operator.cpp | 6 ++++++
be/src/vec/exec/scan/scanner_context.cpp | 13 +++++++++++++
.../java/org/apache/doris/datasource/FileQueryScanNode.java | 1 +
.../java/org/apache/doris/datasource/SplitAssignment.java | 9 +++++++++
.../org/apache/doris/datasource/SplitAssignmentTest.java | 3 +++
5 files changed, 32 insertions(+)
diff --git a/be/src/pipeline/exec/file_scan_operator.cpp
b/be/src/pipeline/exec/file_scan_operator.cpp
index 00ebfe83535..26ccc092c7e 100644
--- a/be/src/pipeline/exec/file_scan_operator.cpp
+++ b/be/src/pipeline/exec/file_scan_operator.cpp
@@ -70,6 +70,12 @@ void FileScanLocalState::set_scan_ranges(RuntimeState* state,
if (should_run_serial()) {
max_scanners = 1;
}
+ VLOG_CRITICAL << "debug remote scan thread num: "
+ <<
vectorized::ScannerScheduler::get_remote_scan_thread_num()
+ << ", parallel_instance_num: " << parallel_instance_num
+ << ", should_run_serial: " << should_run_serial()
+ << ", max_scanners: " << max_scanners
+ << ", query id: " << print_id(state->query_id());
return max_scanners;
};
diff --git a/be/src/vec/exec/scan/scanner_context.cpp
b/be/src/vec/exec/scan/scanner_context.cpp
index 15afb3254ca..5fb564e4ab8 100644
--- a/be/src/vec/exec/scan/scanner_context.cpp
+++ b/be/src/vec/exec/scan/scanner_context.cpp
@@ -194,6 +194,19 @@ Status ScannerContext::init() {
_max_thread_num = 1;
}
+ VLOG_CRITICAL << "debug num_scanner_threads: " <<
_state->num_scanner_threads()
+ << ", potential_performance_issue: "
+ << submit_many_scan_tasks_for_potential_performance_issue
+ << ", _ignore_data_distribution: " <<
_ignore_data_distribution
+ << ", _is_file_scan_operator: " << _is_file_scan_operator
+ << ", doris_scanner_thread_pool_thread_num: "
+ << config::doris_scanner_thread_pool_thread_num
+ << ", _num_parallel_instances: " << _num_parallel_instances
+ << ", _all_scanners.size: " << _all_scanners.size()
+ << ", should_run_serial: " <<
_local_state->should_run_serial()
+ << ", _max_thread_num: " << _max_thread_num
+ << ", query id: " << print_id(_state->query_id());
+
// when user not specify scan_thread_num, so we can try downgrade
_max_thread_num.
// becaue we found in a table with 5k columns, column reader may ocuppy
too much memory.
// you can refer https://github.com/apache/doris/issues/35340 for details.
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
index 46b455a2205..93ebe7adaea 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
@@ -328,6 +328,7 @@ public abstract class FileQueryScanNode extends
FileScanNode {
// File splits are generated lazily, and fetched by backends while
scanning.
// Only provide the unique ID of split source to backend.
splitAssignment = new SplitAssignment(
+ ConnectContext.get().queryId(),
backendPolicy, this, this::splitToScanRange,
locationProperties, pathPartitionKeys);
splitAssignment.init();
if (executor != null) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitAssignment.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitAssignment.java
index cc17818d6b5..5b09bade5c9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitAssignment.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitAssignment.java
@@ -18,9 +18,11 @@
package org.apache.doris.datasource;
import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.DebugUtil;
import org.apache.doris.spi.Split;
import org.apache.doris.system.Backend;
import org.apache.doris.thrift.TScanRangeLocations;
+import org.apache.doris.thrift.TUniqueId;
import com.google.common.collect.Multimap;
import org.apache.logging.log4j.LogManager;
@@ -45,6 +47,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
*/
public class SplitAssignment {
private static final Logger LOG =
LogManager.getLogger(SplitAssignment.class);
+ private final TUniqueId queryId;
private final Set<Long> sources = new HashSet<>();
private final FederationBackendPolicy backendPolicy;
private final SplitGenerator splitGenerator;
@@ -62,11 +65,13 @@ public class SplitAssignment {
private final List<Closeable> closeableResources = new ArrayList<>();
public SplitAssignment(
+ TUniqueId queryId,
FederationBackendPolicy backendPolicy,
SplitGenerator splitGenerator,
SplitToScanRange splitToScanRange,
Map<String, String> locationProperties,
List<String> pathPartitionKeys) {
+ this.queryId = queryId;
this.backendPolicy = backendPolicy;
this.splitGenerator = splitGenerator;
this.splitToScanRange = splitToScanRange;
@@ -109,6 +114,10 @@ public class SplitAssignment {
for (Split split : splits) {
locations.add(splitToScanRange.getScanRange(backend,
locationProperties, split, pathPartitionKeys));
}
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("begin to add num of splits: {} to backend: {}, need
more split: {}, query id: {}",
+ locations.size(), backend.getAddress(),
needMoreSplit(), DebugUtil.printId(queryId));
+ }
while (needMoreSplit()) {
BlockingQueue<Collection<TScanRangeLocations>> queue =
assignment.computeIfAbsent(backend, be -> new
LinkedBlockingQueue<>(10000));
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/SplitAssignmentTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/SplitAssignmentTest.java
index ab5205b47a7..bc5838e53d2 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/SplitAssignmentTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/SplitAssignmentTest.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.UserException;
import org.apache.doris.spi.Split;
import org.apache.doris.system.Backend;
import org.apache.doris.thrift.TScanRangeLocations;
+import org.apache.doris.thrift.TUniqueId;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
@@ -72,6 +73,7 @@ public class SplitAssignmentTest {
pathPartitionKeys = new ArrayList<>();
splitAssignment = new SplitAssignment(
+ new TUniqueId(1, 2),
mockBackendPolicy,
mockSplitGenerator,
mockSplitToScanRange,
@@ -123,6 +125,7 @@ public class SplitAssignmentTest {
void testInitTimeout() throws Exception {
// Use MockUp to simulate timeout behavior quickly instead of waiting
30 seconds
SplitAssignment testAssignment = new SplitAssignment(
+ new TUniqueId(1, 2),
mockBackendPolicy,
mockSplitGenerator,
mockSplitToScanRange,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]