This is an automated email from the ASF dual-hosted git repository.

diwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-spark-connector.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e3e514  [improvement] change batch size default value and add max 
limitation (#230)
8e3e514 is described below

commit 8e3e514a2699661603505abbe91d372908e64313
Author: gnehil <[email protected]>
AuthorDate: Tue Sep 3 18:18:39 2024 +0800

    [improvement] change batch size default value and add max limitation (#230)
---
 .../src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java | 3 ++-
 .../src/main/scala/org/apache/doris/spark/rdd/ScalaValueReader.scala   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
index 68f4ba8..3b9b554 100644
--- 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
+++ 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
@@ -59,7 +59,8 @@ public interface ConfigurationOptions {
     int DORIS_TABLET_SIZE_MIN = 1;
 
     String DORIS_BATCH_SIZE = "doris.batch.size";
-    int DORIS_BATCH_SIZE_DEFAULT = 1024;
+    int DORIS_BATCH_SIZE_DEFAULT = 4064;
+    int DORIS_BATCH_SIZE_MAX = 65535;
 
     String DORIS_EXEC_MEM_LIMIT = "doris.exec.mem.limit";
     long DORIS_EXEC_MEM_LIMIT_DEFAULT = 8L * 1024 * 1024 * 1024;
diff --git 
a/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaValueReader.scala
 
b/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaValueReader.scala
index 16707b8..8f518bd 100644
--- 
a/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaValueReader.scala
+++ 
b/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaValueReader.scala
@@ -94,7 +94,7 @@ class ScalaValueReader(partition: PartitionDefinition, 
settings: Settings) exten
 
     // max row number of one read batch
     val batchSize = Try {
-      settings.getProperty(DORIS_BATCH_SIZE, 
DORIS_BATCH_SIZE_DEFAULT.toString).toInt
+      Math.min(settings.getProperty(DORIS_BATCH_SIZE, 
DORIS_BATCH_SIZE_DEFAULT.toString).toInt, DORIS_BATCH_SIZE_MAX)
     } getOrElse {
       logWarning(String.format(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, 
DORIS_BATCH_SIZE, settings.getProperty(DORIS_BATCH_SIZE)))
       DORIS_BATCH_SIZE_DEFAULT


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to