This is an automated email from the ASF dual-hosted git repository.

corgy pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new 84634a4d1f [Bugfix][Starrocks] Fix starrocks batch data exceeds the 
maximum limit (#9256)
84634a4d1f is described below

commit 84634a4d1f15f8d4d44284e8f25d2f1a403fd0ef
Author: hailin0 <[email protected]>
AuthorDate: Wed Apr 30 09:48:32 2025 +0800

    [Bugfix][Starrocks] Fix starrocks batch data exceeds the maximum limit 
(#9256)
---
 .../client/StarRocksStreamLoadVisitor.java         |  38 ++++++-
 .../client/StarRocksStreamLoadVisitorTest.java     | 113 +++++++++++++++++++++
 2 files changed, 147 insertions(+), 4 deletions(-)

diff --git 
a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitor.java
 
b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitor.java
index 73b6ba1159..741898cf89 100644
--- 
a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitor.java
+++ 
b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitor.java
@@ -64,6 +64,7 @@ public class StarRocksStreamLoadVisitor {
         this.sinkConfig = sinkConfig;
         this.tableSchema = tableSchema;
         this.httpHelper = new HttpHelper(sinkConfig);
+        checkBatchMaxBytes(sinkConfig.getBatchMaxBytes(), 
sinkConfig.getBatchMaxSize());
     }
 
     public Boolean doStreamLoad(StarRocksFlushTuple flushData) throws 
IOException {
@@ -92,7 +93,7 @@ public class StarRocksStreamLoadVisitor {
         Map<String, Object> loadResult =
                 httpHelper.doHttpPut(
                         loadUrl,
-                        joinRows(flushData.getRows(), 
flushData.getBytes().intValue()),
+                        joinRows(flushData.getRows(), flushData.getBytes()),
                         getStreamLoadHttpHeader(flushData.getLabel()));
         final String keyStatus = "Status";
         if (null == loadResult || !loadResult.containsKey(keyStatus)) {
@@ -150,13 +151,15 @@ public class StarRocksStreamLoadVisitor {
         return null;
     }
 
-    private byte[] joinRows(List<byte[]> rows, int totalBytes) {
+    private byte[] joinRows(List<byte[]> rows, Long totalBytes) {
+        checkBatchMaxBytes(totalBytes, rows.size());
         if 
(SinkConfig.StreamLoadFormat.CSV.equals(sinkConfig.getLoadFormat())) {
             Map<String, Object> props = sinkConfig.getStreamLoadProps();
             byte[] lineDelimiter =
                     StarRocksDelimiterParser.parse((String) 
props.get("row_delimiter"), "\n")
                             .getBytes(StandardCharsets.UTF_8);
-            ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * 
lineDelimiter.length);
+            ByteBuffer bos =
+                    ByteBuffer.allocate(totalBytes.intValue() + rows.size() * 
lineDelimiter.length);
             for (byte[] row : rows) {
                 bos.put(row);
                 bos.put(lineDelimiter);
@@ -166,7 +169,8 @@ public class StarRocksStreamLoadVisitor {
 
         if 
(SinkConfig.StreamLoadFormat.JSON.equals(sinkConfig.getLoadFormat())) {
             ByteBuffer bos =
-                    ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : 
rows.size() + 1));
+                    ByteBuffer.allocate(
+                            totalBytes.intValue() + (rows.isEmpty() ? 2 : 
rows.size() + 1));
             bos.put("[".getBytes(StandardCharsets.UTF_8));
             byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8);
             boolean isFirstElement = true;
@@ -300,4 +304,30 @@ public class StarRocksStreamLoadVisitor {
         headerMap.put("Connection", "close");
         return headerMap;
     }
+
+    void checkBatchMaxBytes(long batchMaxBytes, long batchMaxRows) {
+        long batchMaxBytesLimit;
+        if 
(SinkConfig.StreamLoadFormat.CSV.equals(sinkConfig.getLoadFormat())) {
+            Map<String, Object> props = sinkConfig.getStreamLoadProps();
+            byte[] lineDelimiter =
+                    StarRocksDelimiterParser.parse((String) 
props.get("row_delimiter"), "\n")
+                            .getBytes(StandardCharsets.UTF_8);
+            batchMaxBytesLimit = Integer.MAX_VALUE - batchMaxRows * 
lineDelimiter.length;
+        } else if 
(SinkConfig.StreamLoadFormat.JSON.equals(sinkConfig.getLoadFormat())) {
+            batchMaxBytesLimit = Integer.MAX_VALUE - (batchMaxRows == 0 ? 2 : 
batchMaxRows + 1);
+        } else {
+            throw new StarRocksConnectorException(
+                    StarRocksConnectorErrorCode.FLUSH_DATA_FAILED,
+                    "Failed to join rows data, unsupported `format` from 
stream load properties:");
+        }
+
+        if (batchMaxBytes > batchMaxBytesLimit) {
+            throw new StarRocksConnectorException(
+                    StarRocksConnectorErrorCode.FLUSH_DATA_FAILED,
+                    String.format(
+                            "The batch_max_bytes[%d] of the data exceeds the 
maximum limit[%d], "
+                                    + "please reset the batch_max_bytes.",
+                            batchMaxBytes, batchMaxBytesLimit));
+        }
+    }
 }
diff --git 
a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitorTest.java
 
b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitorTest.java
new file mode 100644
index 0000000000..91b2d040a6
--- /dev/null
+++ 
b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitorTest.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.starrocks.client;
+
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.connectors.seatunnel.starrocks.config.SinkConfig;
+import 
org.apache.seatunnel.connectors.seatunnel.starrocks.exception.StarRocksConnectorException;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class StarRocksStreamLoadVisitorTest {
+
+    @Test
+    void throwsExceptionWhenBatchMaxBytesExceedsLimitForCSVFormat() {
+        SinkConfig sinkConfig = mock(SinkConfig.class);
+        
when(sinkConfig.getLoadFormat()).thenReturn(SinkConfig.StreamLoadFormat.CSV);
+        when(sinkConfig.getBatchMaxBytes()).thenReturn(2147483638L);
+        when(sinkConfig.getBatchMaxSize()).thenReturn(100);
+        Map<String, Object> props = new HashMap<>();
+        props.put("row_delimiter", "\n");
+        when(sinkConfig.getStreamLoadProps()).thenReturn(props);
+
+        assertThrows(
+                StarRocksConnectorException.class,
+                () -> {
+                    StarRocksStreamLoadVisitor visitor =
+                            new StarRocksStreamLoadVisitor(sinkConfig, 
mock(TableSchema.class));
+                    visitor.checkBatchMaxBytes(2147483638L, 100);
+                });
+    }
+
+    @Test
+    void throwsExceptionWhenBatchMaxBytesExceedsLimitForJSONFormat() {
+        SinkConfig sinkConfig = mock(SinkConfig.class);
+        
when(sinkConfig.getLoadFormat()).thenReturn(SinkConfig.StreamLoadFormat.JSON);
+        when(sinkConfig.getBatchMaxBytes()).thenReturn(2147483637L);
+        when(sinkConfig.getBatchMaxSize()).thenReturn(100);
+
+        assertThrows(
+                StarRocksConnectorException.class,
+                () -> {
+                    StarRocksStreamLoadVisitor visitor =
+                            new StarRocksStreamLoadVisitor(sinkConfig, 
mock(TableSchema.class));
+                    visitor.checkBatchMaxBytes(2147483637L, 100);
+                });
+    }
+
+    @Test
+    void doesNotThrowExceptionWhenBatchMaxBytesWithinLimitForCSVFormat() {
+        SinkConfig sinkConfig = mock(SinkConfig.class);
+        
when(sinkConfig.getLoadFormat()).thenReturn(SinkConfig.StreamLoadFormat.CSV);
+        when(sinkConfig.getBatchMaxBytes()).thenReturn(2147483637L);
+        when(sinkConfig.getBatchMaxSize()).thenReturn(10);
+
+        Map<String, Object> props = new HashMap<>();
+        props.put("row_delimiter", "\n");
+        when(sinkConfig.getStreamLoadProps()).thenReturn(props);
+        StarRocksStreamLoadVisitor visitor =
+                new StarRocksStreamLoadVisitor(sinkConfig, 
mock(TableSchema.class));
+
+        assertDoesNotThrow(() -> visitor.checkBatchMaxBytes(2147483637L, 10));
+    }
+
+    @Test
+    void doesNotThrowExceptionWhenBatchMaxBytesWithinLimitForJSONFormat() {
+        SinkConfig sinkConfig = mock(SinkConfig.class);
+        
when(sinkConfig.getLoadFormat()).thenReturn(SinkConfig.StreamLoadFormat.JSON);
+        when(sinkConfig.getBatchMaxBytes()).thenReturn(2147483636L);
+        when(sinkConfig.getBatchMaxSize()).thenReturn(10);
+
+        StarRocksStreamLoadVisitor visitor =
+                new StarRocksStreamLoadVisitor(sinkConfig, 
mock(TableSchema.class));
+        assertDoesNotThrow(() -> visitor.checkBatchMaxBytes(2147483636L, 10));
+    }
+
+    @Test
+    void throwsExceptionForUnsupportedLoadFormat() {
+        SinkConfig sinkConfig = mock(SinkConfig.class);
+        when(sinkConfig.getBatchMaxBytes()).thenReturn(1024L);
+        when(sinkConfig.getBatchMaxSize()).thenReturn(10);
+
+        assertThrows(
+                StarRocksConnectorException.class,
+                () -> {
+                    StarRocksStreamLoadVisitor visitor =
+                            new StarRocksStreamLoadVisitor(sinkConfig, 
mock(TableSchema.class));
+                    visitor.checkBatchMaxBytes(1024, 10);
+                });
+    }
+}

Reply via email to