This is an automated email from the ASF dual-hosted git repository.
zhouyao2023 pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new d58fce1caf [Feature]Check Chinese comments in the code (#8319)
d58fce1caf is described below
commit d58fce1cafc094860e451051b92d0d1031bdea76
Author: Jast <[email protected]>
AuthorDate: Wed Dec 18 08:32:11 2024 +0800
[Feature]Check Chinese comments in the code (#8319)
---
.../seatunnel/api/ChineseCharacterCheckTest.java | 180 +++++++++++++++++++++
.../FastLogDeserializationContent.java | 3 +-
.../source/DefaultSeaTunnelRowDeserializer.java | 6 +-
.../seatunnel/e2e/connector/doris/DorisIT.java | 8 +-
.../e2e/connector/doris/DorisMultiReadIT.java | 8 +-
.../e2e/connector/iceberg/IcebergSinkIT.java | 2 +-
6 files changed, 194 insertions(+), 13 deletions(-)
diff --git
a/seatunnel-ci-tools/src/test/java/org/apache/seatunnel/api/ChineseCharacterCheckTest.java
b/seatunnel-ci-tools/src/test/java/org/apache/seatunnel/api/ChineseCharacterCheckTest.java
new file mode 100644
index 0000000000..d0b2b838a1
--- /dev/null
+++
b/seatunnel-ci-tools/src/test/java/org/apache/seatunnel/api/ChineseCharacterCheckTest.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.github.javaparser.JavaParser;
+import com.github.javaparser.ParseResult;
+import com.github.javaparser.ast.CompilationUnit;
+import com.github.javaparser.ast.comments.Comment;
+import com.github.javaparser.ast.visitor.VoidVisitorAdapter;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.nio.file.FileVisitOption;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+import static org.apache.seatunnel.api.ImportShadeClassCheckTest.isWindows;
+
+@Slf4j
+public class ChineseCharacterCheckTest {
+
+ private final JavaParser JAVA_PARSER = new JavaParser();
+
+ private static final Pattern CHINESE_PATTERN =
Pattern.compile("[\\u4e00-\\u9fa5]");
+
+ /** Defines what content should be checked for Chinese characters */
+ public enum CheckScope {
+ /** Check both comments and code */
+ ALL,
+ /** Check only comments */
+ COMMENTS_ONLY,
+ /** Check only code (string literals) */
+ CODE_ONLY
+ }
+
+ @Disabled("Currently only checking comments")
+ @Test
+ public void checkChineseCharactersInAll() {
+ checkChineseCharacters(CheckScope.ALL);
+ }
+
+ @Test
+ public void checkChineseCharactersInCommentsOnly() {
+ checkChineseCharacters(CheckScope.COMMENTS_ONLY);
+ }
+
+ @Disabled("Currently only checking comments")
+ @Test
+ public void checkChineseCharactersInCodeOnly() {
+ checkChineseCharacters(CheckScope.CODE_ONLY);
+ }
+
+ private void checkChineseCharacters(CheckScope scope) {
+ // Define path fragments for source and test Java files
+ String mainPathFragment = isWindows ? "src\\main\\java" :
"src/main/java";
+ String testPathFragment2 = isWindows ? "src\\test\\java" :
"src/test/java";
+
+ try (Stream<Path> paths = Files.walk(Paths.get(".."),
FileVisitOption.FOLLOW_LINKS)) {
+ List<String> filesWithChinese = new ArrayList<>();
+
+ // Filter Java files in the specified directories
+ paths.filter(
+ path -> {
+ String pathString = path.toString();
+ return pathString.endsWith(".java")
+ &&
(pathString.contains(mainPathFragment)
+ ||
pathString.contains(testPathFragment2));
+ })
+ .forEach(
+ path -> {
+ try {
+ // Parse the Java file
+ ParseResult<CompilationUnit> parseResult =
+
JAVA_PARSER.parse(Files.newInputStream(path));
+
+ parseResult
+ .getResult()
+ .ifPresent(
+ cu -> {
+ // Check for Chinese
characters in comments
+ // if needed
+ if (scope !=
CheckScope.CODE_ONLY) {
+ List<Comment>
comments =
+
cu.getAllContainedComments();
+ for (Comment
comment : comments) {
+ if
(CHINESE_PATTERN
+
.matcher(
+
comment
+
.getContent())
+
.find()) {
+
filesWithChinese.add(
+
String.format(
+
"Found Chinese characters in comment at %s: %s",
+
path
+
.toAbsolutePath(),
+
comment.getContent()
+
.trim()));
+ }
+ }
+ }
+
+ // Check for Chinese
characters in code if
+ // needed
+ if (scope !=
CheckScope.COMMENTS_ONLY) {
+
ChineseCharacterVisitor visitor =
+ new
ChineseCharacterVisitor(
+
path, filesWithChinese);
+ visitor.visit(cu,
null);
+ }
+ });
+
+ } catch (Exception e) {
+ log.error("Error parsing file: {}", path,
e);
+ }
+ });
+
+ // Assert that no files contain Chinese characters
+ Assertions.assertEquals(
+ 0,
+ filesWithChinese.size(),
+ () ->
+ String.format(
+ "Found Chinese characters in following
files (Scope: %s):\n%s",
+ scope, String.join("\n",
filesWithChinese)));
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static class ChineseCharacterVisitor extends
VoidVisitorAdapter<Void> {
+ private final Path filePath;
+ private final List<String> filesWithChinese;
+
+ public ChineseCharacterVisitor(Path filePath, List<String>
filesWithChinese) {
+ this.filePath = filePath;
+ this.filesWithChinese = filesWithChinese;
+ }
+
+ @Override
+ public void visit(CompilationUnit cu, Void arg) {
+ // Check for Chinese characters in string literals
+ cu.findAll(com.github.javaparser.ast.expr.StringLiteralExpr.class)
+ .forEach(
+ str -> {
+ if
(CHINESE_PATTERN.matcher(str.getValue()).find()) {
+ filesWithChinese.add(
+ String.format(
+ "Found Chinese characters
in string literal at %s: %s",
+ filePath.toAbsolutePath(),
str.getValue()));
+ }
+ });
+ super.visit(cu, arg);
+ }
+ }
+}
diff --git
a/seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java
b/seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java
index 27bd35bff2..86851ad89a 100644
---
a/seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java
+++
b/seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java
@@ -90,7 +90,8 @@ public class FastLogDeserializationContent
.append("\":\"")
.append(content.getValue())
.append("\","));
- jsonStringBuilder.deleteCharAt(jsonStringBuilder.length() - 1); //
删除最后一个逗号
+ // Remove the last comma
+ jsonStringBuilder.deleteCharAt(jsonStringBuilder.length() - 1);
jsonStringBuilder.append("}");
// content field
transformedRow.add(jsonStringBuilder.toString());
diff --git
a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java
b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java
index 762506d498..08dcd85f7f 100644
---
a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java
+++
b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java
@@ -115,10 +115,10 @@ public class DefaultSeaTunnelRowDeserializer implements
SeaTunnelRowDeserializer
try {
for (int i = 0; i < rowTypeInfo.getTotalFields(); i++) {
fieldName = rowTypeInfo.getFieldName(i);
- value = doc.get(fieldName); // 字段值
+ value = doc.get(fieldName);
if (value != null) {
- seaTunnelDataType =
- rowTypeInfo.getFieldType(i); // seaTunnelDataType
为SeaTunnel类型
+ // seaTunnelDataType is the SeaTunnel type
+ seaTunnelDataType = rowTypeInfo.getFieldType(i);
seaTunnelFields[i] = convertValue(seaTunnelDataType,
value);
}
}
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java
index 3d9b0572db..178ed0ffba 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java
@@ -738,7 +738,7 @@ public class DorisIT extends AbstractDorisIT {
}
public void getErrorUrl(String message) {
- // 使用正则表达式匹配URL
+ // Using regular expressions to match URLs
Pattern pattern = Pattern.compile("http://[\\w./?=&-_]+");
Matcher matcher = pattern.matcher(message);
String urlString = null;
@@ -754,12 +754,12 @@ public class DorisIT extends AbstractDorisIT {
URL url = new URL(urlString);
HttpURLConnection connection = (HttpURLConnection)
url.openConnection();
- // 设置请求方法
+ // Set the request method
connection.setRequestMethod("GET");
- // 设置连接超时时间
+ // Set the connection timeout
connection.setConnectTimeout(5000);
- // 设置读取超时时间
+ // Set the read timeout
connection.setReadTimeout(5000);
int responseCode = connection.getResponseCode();
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java
index dd604b5714..cbdbfc4ae4 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java
@@ -493,7 +493,7 @@ public class DorisMultiReadIT extends AbstractDorisIT {
}
public void getErrorUrl(String message) {
- // 使用正则表达式匹配URL
+ // Using regular expressions to match URLs
Pattern pattern = Pattern.compile("http://[\\w./?=&-_]+");
Matcher matcher = pattern.matcher(message);
String urlString = null;
@@ -509,12 +509,12 @@ public class DorisMultiReadIT extends AbstractDorisIT {
URL url = new URL(urlString);
HttpURLConnection connection = (HttpURLConnection)
url.openConnection();
- // 设置请求方法
+ // Set the request method
connection.setRequestMethod("GET");
- // 设置连接超时时间
+ // Set the connection timeout
connection.setConnectTimeout(5000);
- // 设置读取超时时间
+ // Set the read timeout
connection.setReadTimeout(5000);
int responseCode = connection.getResponseCode();
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java
index 20c1b02914..0f5e0dfe0b 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java
@@ -108,7 +108,7 @@ public class IcebergSinkIT extends TestSuiteBase {
"sh", "-c", "cd " + CATALOG_DIR + " && tar -zxvf "
+ NAMESPACE_TAR);
try {
Process process = processBuilder.start();
- // 等待命令执行完成
+ // Wait for the command to complete
int exitCode = process.waitFor();
if (exitCode == 0) {
log.info("Extract files successful.");