This is an automated email from the ASF dual-hosted git repository.
corgy pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new 84634a4d1f [Bugfix][Starrocks] Fix starrocks batch data exceeds the
maximum limit (#9256)
84634a4d1f is described below
commit 84634a4d1f15f8d4d44284e8f25d2f1a403fd0ef
Author: hailin0 <[email protected]>
AuthorDate: Wed Apr 30 09:48:32 2025 +0800
[Bugfix][Starrocks] Fix starrocks batch data exceeds the maximum limit
(#9256)
---
.../client/StarRocksStreamLoadVisitor.java | 38 ++++++-
.../client/StarRocksStreamLoadVisitorTest.java | 113 +++++++++++++++++++++
2 files changed, 147 insertions(+), 4 deletions(-)
diff --git
a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitor.java
b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitor.java
index 73b6ba1159..741898cf89 100644
---
a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitor.java
+++
b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitor.java
@@ -64,6 +64,7 @@ public class StarRocksStreamLoadVisitor {
this.sinkConfig = sinkConfig;
this.tableSchema = tableSchema;
this.httpHelper = new HttpHelper(sinkConfig);
+ checkBatchMaxBytes(sinkConfig.getBatchMaxBytes(),
sinkConfig.getBatchMaxSize());
}
public Boolean doStreamLoad(StarRocksFlushTuple flushData) throws
IOException {
@@ -92,7 +93,7 @@ public class StarRocksStreamLoadVisitor {
Map<String, Object> loadResult =
httpHelper.doHttpPut(
loadUrl,
- joinRows(flushData.getRows(),
flushData.getBytes().intValue()),
+ joinRows(flushData.getRows(), flushData.getBytes()),
getStreamLoadHttpHeader(flushData.getLabel()));
final String keyStatus = "Status";
if (null == loadResult || !loadResult.containsKey(keyStatus)) {
@@ -150,13 +151,15 @@ public class StarRocksStreamLoadVisitor {
return null;
}
- private byte[] joinRows(List<byte[]> rows, int totalBytes) {
+ private byte[] joinRows(List<byte[]> rows, Long totalBytes) {
+ checkBatchMaxBytes(totalBytes, rows.size());
if
(SinkConfig.StreamLoadFormat.CSV.equals(sinkConfig.getLoadFormat())) {
Map<String, Object> props = sinkConfig.getStreamLoadProps();
byte[] lineDelimiter =
StarRocksDelimiterParser.parse((String)
props.get("row_delimiter"), "\n")
.getBytes(StandardCharsets.UTF_8);
- ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() *
lineDelimiter.length);
+ ByteBuffer bos =
+ ByteBuffer.allocate(totalBytes.intValue() + rows.size() *
lineDelimiter.length);
for (byte[] row : rows) {
bos.put(row);
bos.put(lineDelimiter);
@@ -166,7 +169,8 @@ public class StarRocksStreamLoadVisitor {
if
(SinkConfig.StreamLoadFormat.JSON.equals(sinkConfig.getLoadFormat())) {
ByteBuffer bos =
- ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 :
rows.size() + 1));
+ ByteBuffer.allocate(
+ totalBytes.intValue() + (rows.isEmpty() ? 2 :
rows.size() + 1));
bos.put("[".getBytes(StandardCharsets.UTF_8));
byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8);
boolean isFirstElement = true;
@@ -300,4 +304,30 @@ public class StarRocksStreamLoadVisitor {
headerMap.put("Connection", "close");
return headerMap;
}
+
+ void checkBatchMaxBytes(long batchMaxBytes, long batchMaxRows) {
+ long batchMaxBytesLimit;
+ if
(SinkConfig.StreamLoadFormat.CSV.equals(sinkConfig.getLoadFormat())) {
+ Map<String, Object> props = sinkConfig.getStreamLoadProps();
+ byte[] lineDelimiter =
+ StarRocksDelimiterParser.parse((String)
props.get("row_delimiter"), "\n")
+ .getBytes(StandardCharsets.UTF_8);
+ batchMaxBytesLimit = Integer.MAX_VALUE - batchMaxRows *
lineDelimiter.length;
+ } else if
(SinkConfig.StreamLoadFormat.JSON.equals(sinkConfig.getLoadFormat())) {
+ batchMaxBytesLimit = Integer.MAX_VALUE - (batchMaxRows == 0 ? 2 :
batchMaxRows + 1);
+ } else {
+ throw new StarRocksConnectorException(
+ StarRocksConnectorErrorCode.FLUSH_DATA_FAILED,
+ "Failed to join rows data, unsupported `format` from
stream load properties:");
+ }
+
+ if (batchMaxBytes > batchMaxBytesLimit) {
+ throw new StarRocksConnectorException(
+ StarRocksConnectorErrorCode.FLUSH_DATA_FAILED,
+ String.format(
+ "The batch_max_bytes[%d] of the data exceeds the
maximum limit[%d], "
+ + "please reset the batch_max_bytes.",
+ batchMaxBytes, batchMaxBytesLimit));
+ }
+ }
}
diff --git
a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitorTest.java
b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitorTest.java
new file mode 100644
index 0000000000..91b2d040a6
--- /dev/null
+++
b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksStreamLoadVisitorTest.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.starrocks.client;
+
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.connectors.seatunnel.starrocks.config.SinkConfig;
+import
org.apache.seatunnel.connectors.seatunnel.starrocks.exception.StarRocksConnectorException;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class StarRocksStreamLoadVisitorTest {
+
+ @Test
+ void throwsExceptionWhenBatchMaxBytesExceedsLimitForCSVFormat() {
+ SinkConfig sinkConfig = mock(SinkConfig.class);
+
when(sinkConfig.getLoadFormat()).thenReturn(SinkConfig.StreamLoadFormat.CSV);
+ when(sinkConfig.getBatchMaxBytes()).thenReturn(2147483638L);
+ when(sinkConfig.getBatchMaxSize()).thenReturn(100);
+ Map<String, Object> props = new HashMap<>();
+ props.put("row_delimiter", "\n");
+ when(sinkConfig.getStreamLoadProps()).thenReturn(props);
+
+ assertThrows(
+ StarRocksConnectorException.class,
+ () -> {
+ StarRocksStreamLoadVisitor visitor =
+ new StarRocksStreamLoadVisitor(sinkConfig,
mock(TableSchema.class));
+ visitor.checkBatchMaxBytes(2147483638L, 100);
+ });
+ }
+
+ @Test
+ void throwsExceptionWhenBatchMaxBytesExceedsLimitForJSONFormat() {
+ SinkConfig sinkConfig = mock(SinkConfig.class);
+
when(sinkConfig.getLoadFormat()).thenReturn(SinkConfig.StreamLoadFormat.JSON);
+ when(sinkConfig.getBatchMaxBytes()).thenReturn(2147483637L);
+ when(sinkConfig.getBatchMaxSize()).thenReturn(100);
+
+ assertThrows(
+ StarRocksConnectorException.class,
+ () -> {
+ StarRocksStreamLoadVisitor visitor =
+ new StarRocksStreamLoadVisitor(sinkConfig,
mock(TableSchema.class));
+ visitor.checkBatchMaxBytes(2147483637L, 100);
+ });
+ }
+
+ @Test
+ void doesNotThrowExceptionWhenBatchMaxBytesWithinLimitForCSVFormat() {
+ SinkConfig sinkConfig = mock(SinkConfig.class);
+
when(sinkConfig.getLoadFormat()).thenReturn(SinkConfig.StreamLoadFormat.CSV);
+ when(sinkConfig.getBatchMaxBytes()).thenReturn(2147483637L);
+ when(sinkConfig.getBatchMaxSize()).thenReturn(10);
+
+ Map<String, Object> props = new HashMap<>();
+ props.put("row_delimiter", "\n");
+ when(sinkConfig.getStreamLoadProps()).thenReturn(props);
+ StarRocksStreamLoadVisitor visitor =
+ new StarRocksStreamLoadVisitor(sinkConfig,
mock(TableSchema.class));
+
+ assertDoesNotThrow(() -> visitor.checkBatchMaxBytes(2147483637L, 10));
+ }
+
+ @Test
+ void doesNotThrowExceptionWhenBatchMaxBytesWithinLimitForJSONFormat() {
+ SinkConfig sinkConfig = mock(SinkConfig.class);
+
when(sinkConfig.getLoadFormat()).thenReturn(SinkConfig.StreamLoadFormat.JSON);
+ when(sinkConfig.getBatchMaxBytes()).thenReturn(2147483636L);
+ when(sinkConfig.getBatchMaxSize()).thenReturn(10);
+
+ StarRocksStreamLoadVisitor visitor =
+ new StarRocksStreamLoadVisitor(sinkConfig,
mock(TableSchema.class));
+ assertDoesNotThrow(() -> visitor.checkBatchMaxBytes(2147483636L, 10));
+ }
+
+ @Test
+ void throwsExceptionForUnsupportedLoadFormat() {
+ SinkConfig sinkConfig = mock(SinkConfig.class);
+ when(sinkConfig.getBatchMaxBytes()).thenReturn(1024L);
+ when(sinkConfig.getBatchMaxSize()).thenReturn(10);
+
+ assertThrows(
+ StarRocksConnectorException.class,
+ () -> {
+ StarRocksStreamLoadVisitor visitor =
+ new StarRocksStreamLoadVisitor(sinkConfig,
mock(TableSchema.class));
+ visitor.checkBatchMaxBytes(1024, 10);
+ });
+ }
+}