This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 8587dbae8c4 branch-3.1: [opt](scanner) add some debug log for batch 
split (#55352)
8587dbae8c4 is described below

commit 8587dbae8c4e638276ef57adf4b63d894e660d81
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Wed Aug 27 23:51:32 2025 -0700

    branch-3.1: [opt](scanner) add some debug log for batch split (#55352)
    
    Add some debug log
---
 be/src/pipeline/exec/file_scan_operator.cpp                 |  6 ++++++
 be/src/vec/exec/scan/scanner_context.cpp                    | 13 +++++++++++++
 .../java/org/apache/doris/datasource/FileQueryScanNode.java |  1 +
 .../java/org/apache/doris/datasource/SplitAssignment.java   |  9 +++++++++
 .../org/apache/doris/datasource/SplitAssignmentTest.java    |  3 +++
 5 files changed, 32 insertions(+)

diff --git a/be/src/pipeline/exec/file_scan_operator.cpp 
b/be/src/pipeline/exec/file_scan_operator.cpp
index 00ebfe83535..26ccc092c7e 100644
--- a/be/src/pipeline/exec/file_scan_operator.cpp
+++ b/be/src/pipeline/exec/file_scan_operator.cpp
@@ -70,6 +70,12 @@ void FileScanLocalState::set_scan_ranges(RuntimeState* state,
         if (should_run_serial()) {
             max_scanners = 1;
         }
+        VLOG_CRITICAL << "debug remote scan thread num: "
+                      << 
vectorized::ScannerScheduler::get_remote_scan_thread_num()
+                      << ", parallel_instance_num: " << parallel_instance_num
+                      << ", should_run_serial: " << should_run_serial()
+                      << ", max_scanners: " << max_scanners
+                      << ", query id: " << print_id(state->query_id());
         return max_scanners;
     };
 
diff --git a/be/src/vec/exec/scan/scanner_context.cpp 
b/be/src/vec/exec/scan/scanner_context.cpp
index 15afb3254ca..5fb564e4ab8 100644
--- a/be/src/vec/exec/scan/scanner_context.cpp
+++ b/be/src/vec/exec/scan/scanner_context.cpp
@@ -194,6 +194,19 @@ Status ScannerContext::init() {
         _max_thread_num = 1;
     }
 
+    VLOG_CRITICAL << "debug num_scanner_threads: " << 
_state->num_scanner_threads()
+                  << ", potential_performance_issue: "
+                  << submit_many_scan_tasks_for_potential_performance_issue
+                  << ", _ignore_data_distribution: " << 
_ignore_data_distribution
+                  << ", _is_file_scan_operator: " << _is_file_scan_operator
+                  << ", doris_scanner_thread_pool_thread_num: "
+                  << config::doris_scanner_thread_pool_thread_num
+                  << ", _num_parallel_instances: " << _num_parallel_instances
+                  << ", _all_scanners.size: " << _all_scanners.size()
+                  << ", should_run_serial: " << 
_local_state->should_run_serial()
+                  << ", _max_thread_num: " << _max_thread_num
+                  << ", query id: " << print_id(_state->query_id());
+
     // when user not specify scan_thread_num, so we can try downgrade 
_max_thread_num.
     // becaue we found in a table with 5k columns, column reader may ocuppy 
too much memory.
     // you can refer https://github.com/apache/doris/issues/35340 for details.
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
index 46b455a2205..93ebe7adaea 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
@@ -328,6 +328,7 @@ public abstract class FileQueryScanNode extends 
FileScanNode {
             // File splits are generated lazily, and fetched by backends while 
scanning.
             // Only provide the unique ID of split source to backend.
             splitAssignment = new SplitAssignment(
+                    ConnectContext.get().queryId(),
                     backendPolicy, this, this::splitToScanRange, 
locationProperties, pathPartitionKeys);
             splitAssignment.init();
             if (executor != null) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitAssignment.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitAssignment.java
index cc17818d6b5..5b09bade5c9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitAssignment.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitAssignment.java
@@ -18,9 +18,11 @@
 package org.apache.doris.datasource;
 
 import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.DebugUtil;
 import org.apache.doris.spi.Split;
 import org.apache.doris.system.Backend;
 import org.apache.doris.thrift.TScanRangeLocations;
+import org.apache.doris.thrift.TUniqueId;
 
 import com.google.common.collect.Multimap;
 import org.apache.logging.log4j.LogManager;
@@ -45,6 +47,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
  */
 public class SplitAssignment {
     private static final Logger LOG = 
LogManager.getLogger(SplitAssignment.class);
+    private final TUniqueId queryId;
     private final Set<Long> sources = new HashSet<>();
     private final FederationBackendPolicy backendPolicy;
     private final SplitGenerator splitGenerator;
@@ -62,11 +65,13 @@ public class SplitAssignment {
     private final List<Closeable> closeableResources = new ArrayList<>();
 
     public SplitAssignment(
+            TUniqueId queryId,
             FederationBackendPolicy backendPolicy,
             SplitGenerator splitGenerator,
             SplitToScanRange splitToScanRange,
             Map<String, String> locationProperties,
             List<String> pathPartitionKeys) {
+        this.queryId = queryId;
         this.backendPolicy = backendPolicy;
         this.splitGenerator = splitGenerator;
         this.splitToScanRange = splitToScanRange;
@@ -109,6 +114,10 @@ public class SplitAssignment {
             for (Split split : splits) {
                 locations.add(splitToScanRange.getScanRange(backend, 
locationProperties, split, pathPartitionKeys));
             }
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("begin to add num of splits: {} to backend: {}, need 
more split: {}, query id: {}",
+                        locations.size(), backend.getAddress(), 
needMoreSplit(), DebugUtil.printId(queryId));
+            }
             while (needMoreSplit()) {
                 BlockingQueue<Collection<TScanRangeLocations>> queue =
                         assignment.computeIfAbsent(backend, be -> new 
LinkedBlockingQueue<>(10000));
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/SplitAssignmentTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/SplitAssignmentTest.java
index ab5205b47a7..bc5838e53d2 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/SplitAssignmentTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/SplitAssignmentTest.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.UserException;
 import org.apache.doris.spi.Split;
 import org.apache.doris.system.Backend;
 import org.apache.doris.thrift.TScanRangeLocations;
+import org.apache.doris.thrift.TUniqueId;
 
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.Multimap;
@@ -72,6 +73,7 @@ public class SplitAssignmentTest {
         pathPartitionKeys = new ArrayList<>();
 
         splitAssignment = new SplitAssignment(
+                new TUniqueId(1, 2),
                 mockBackendPolicy,
                 mockSplitGenerator,
                 mockSplitToScanRange,
@@ -123,6 +125,7 @@ public class SplitAssignmentTest {
     void testInitTimeout() throws Exception {
         // Use MockUp to simulate timeout behavior quickly instead of waiting 
30 seconds
         SplitAssignment testAssignment = new SplitAssignment(
+                new TUniqueId(1, 2),
                 mockBackendPolicy,
                 mockSplitGenerator,
                 mockSplitToScanRange,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to