This is an automated email from the ASF dual-hosted git repository.

liaoxin pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new e679fbe97d3 [improve](routine load) adjust default values to make 
routine load more convenient to use (#42968)
e679fbe97d3 is described below

commit e679fbe97d3faa1a061ed9a8ac9e7ad50c4e3384
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Nov 4 22:57:40 2024 +0800

    [improve](routine load) adjust default values to make routine load more 
convenient to use (#42968)
    
    PR Body: For a routine load job, it will be divided into many tasks,
    each of which is a transaction. Currently, the default time
    consumed(max_batch_interval) is 10 seconds. The benefits of increasing
    this value are:
    1. Larger batch consumption can lead to better performance.
    2. Reducing the number of transactions can alleviate the pressure of
    compaction and the conflicts of concurrent transaction submissions.
    
    related doc: https://github.com/apache/doris-website/pull/1236/files
    
     Cherry-picked from #42491
    
    Co-authored-by: hui lai <[email protected]>
    Co-authored-by: Xin Liao <[email protected]>
---
 be/src/runtime/stream_load/stream_load_context.h                   | 7 ++++---
 .../java/org/apache/doris/load/routineload/RoutineLoadJob.java     | 2 +-
 .../java/org/apache/doris/load/routineload/RoutineLoadJobTest.java | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/be/src/runtime/stream_load/stream_load_context.h 
b/be/src/runtime/stream_load/stream_load_context.h
index 9d1601372f8..93f76fad4e6 100644
--- a/be/src/runtime/stream_load/stream_load_context.h
+++ b/be/src/runtime/stream_load/stream_load_context.h
@@ -164,9 +164,10 @@ public:
 
     // the following members control the max progress of a consuming
     // process. if any of them reach, the consuming will finish.
-    int64_t max_interval_s = 5;
-    int64_t max_batch_rows = 100000;
-    int64_t max_batch_size = 100 * 1024 * 1024; // 100MB
+    // same as values set in 
fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
+    int64_t max_interval_s = 60;
+    int64_t max_batch_rows = 20000000;
+    int64_t max_batch_size = 1024 * 1024 * 1024; // 1GB
 
     // for parse json-data
     std::string data_format = "";
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
 
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
index 7686a7ce0d4..6af164e4cff 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
@@ -113,7 +113,7 @@ public abstract class RoutineLoadJob
     public static final long DEFAULT_MAX_ERROR_NUM = 0;
     public static final double DEFAULT_MAX_FILTER_RATIO = 1.0;
 
-    public static final long DEFAULT_MAX_INTERVAL_SECOND = 10;
+    public static final long DEFAULT_MAX_INTERVAL_SECOND = 60;
     public static final long DEFAULT_MAX_BATCH_ROWS = 20000000;
     public static final long DEFAULT_MAX_BATCH_SIZE = 1024 * 1024 * 1024; // 
1GB
     public static final long DEFAULT_EXEC_MEM_LIMIT = 2 * 1024 * 1024 * 1024L;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java
index 6f3dd2eaaa8..2d9efd895cb 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java
@@ -362,7 +362,7 @@ public class RoutineLoadJobTest {
                 + "\"desired_concurrent_number\" = \"0\",\n"
                 + "\"max_error_number\" = \"10\",\n"
                 + "\"max_filter_ratio\" = \"1.0\",\n"
-                + "\"max_batch_interval\" = \"10\",\n"
+                + "\"max_batch_interval\" = \"60\",\n"
                 + "\"max_batch_rows\" = \"10\",\n"
                 + "\"max_batch_size\" = \"1073741824\",\n"
                 + "\"format\" = \"csv\",\n"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to