This is an automated email from the ASF dual-hosted git repository.

zhouyao2023 pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new d58fce1caf [Feature]Check Chinese comments in the code (#8319)
d58fce1caf is described below

commit d58fce1cafc094860e451051b92d0d1031bdea76
Author: Jast <[email protected]>
AuthorDate: Wed Dec 18 08:32:11 2024 +0800

    [Feature]Check Chinese comments in the code (#8319)
---
 .../seatunnel/api/ChineseCharacterCheckTest.java   | 180 +++++++++++++++++++++
 .../FastLogDeserializationContent.java             |   3 +-
 .../source/DefaultSeaTunnelRowDeserializer.java    |   6 +-
 .../seatunnel/e2e/connector/doris/DorisIT.java     |   8 +-
 .../e2e/connector/doris/DorisMultiReadIT.java      |   8 +-
 .../e2e/connector/iceberg/IcebergSinkIT.java       |   2 +-
 6 files changed, 194 insertions(+), 13 deletions(-)

diff --git 
a/seatunnel-ci-tools/src/test/java/org/apache/seatunnel/api/ChineseCharacterCheckTest.java
 
b/seatunnel-ci-tools/src/test/java/org/apache/seatunnel/api/ChineseCharacterCheckTest.java
new file mode 100644
index 0000000000..d0b2b838a1
--- /dev/null
+++ 
b/seatunnel-ci-tools/src/test/java/org/apache/seatunnel/api/ChineseCharacterCheckTest.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.github.javaparser.JavaParser;
+import com.github.javaparser.ParseResult;
+import com.github.javaparser.ast.CompilationUnit;
+import com.github.javaparser.ast.comments.Comment;
+import com.github.javaparser.ast.visitor.VoidVisitorAdapter;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.nio.file.FileVisitOption;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+import static org.apache.seatunnel.api.ImportShadeClassCheckTest.isWindows;
+
+@Slf4j
+public class ChineseCharacterCheckTest {
+
+    private final JavaParser JAVA_PARSER = new JavaParser();
+
+    private static final Pattern CHINESE_PATTERN = 
Pattern.compile("[\\u4e00-\\u9fa5]");
+
+    /** Defines what content should be checked for Chinese characters */
+    public enum CheckScope {
+        /** Check both comments and code */
+        ALL,
+        /** Check only comments */
+        COMMENTS_ONLY,
+        /** Check only code (string literals) */
+        CODE_ONLY
+    }
+
+    @Disabled("Currently only checking comments")
+    @Test
+    public void checkChineseCharactersInAll() {
+        checkChineseCharacters(CheckScope.ALL);
+    }
+
+    @Test
+    public void checkChineseCharactersInCommentsOnly() {
+        checkChineseCharacters(CheckScope.COMMENTS_ONLY);
+    }
+
+    @Disabled("Currently only checking comments")
+    @Test
+    public void checkChineseCharactersInCodeOnly() {
+        checkChineseCharacters(CheckScope.CODE_ONLY);
+    }
+
+    private void checkChineseCharacters(CheckScope scope) {
+        // Define path fragments for source and test Java files
+        String mainPathFragment = isWindows ? "src\\main\\java" : 
"src/main/java";
+        String testPathFragment2 = isWindows ? "src\\test\\java" : 
"src/test/java";
+
+        try (Stream<Path> paths = Files.walk(Paths.get(".."), 
FileVisitOption.FOLLOW_LINKS)) {
+            List<String> filesWithChinese = new ArrayList<>();
+
+            // Filter Java files in the specified directories
+            paths.filter(
+                            path -> {
+                                String pathString = path.toString();
+                                return pathString.endsWith(".java")
+                                        && 
(pathString.contains(mainPathFragment)
+                                                || 
pathString.contains(testPathFragment2));
+                            })
+                    .forEach(
+                            path -> {
+                                try {
+                                    // Parse the Java file
+                                    ParseResult<CompilationUnit> parseResult =
+                                            
JAVA_PARSER.parse(Files.newInputStream(path));
+
+                                    parseResult
+                                            .getResult()
+                                            .ifPresent(
+                                                    cu -> {
+                                                        // Check for Chinese 
characters in comments
+                                                        // if needed
+                                                        if (scope != 
CheckScope.CODE_ONLY) {
+                                                            List<Comment> 
comments =
+                                                                    
cu.getAllContainedComments();
+                                                            for (Comment 
comment : comments) {
+                                                                if 
(CHINESE_PATTERN
+                                                                        
.matcher(
+                                                                               
 comment
+                                                                               
         .getContent())
+                                                                        
.find()) {
+                                                                    
filesWithChinese.add(
+                                                                            
String.format(
+                                                                               
     "Found Chinese characters in comment at %s: %s",
+                                                                               
     path
+                                                                               
             .toAbsolutePath(),
+                                                                               
     comment.getContent()
+                                                                               
             .trim()));
+                                                                }
+                                                            }
+                                                        }
+
+                                                        // Check for Chinese 
characters in code if
+                                                        // needed
+                                                        if (scope != 
CheckScope.COMMENTS_ONLY) {
+                                                            
ChineseCharacterVisitor visitor =
+                                                                    new 
ChineseCharacterVisitor(
+                                                                            
path, filesWithChinese);
+                                                            visitor.visit(cu, 
null);
+                                                        }
+                                                    });
+
+                                } catch (Exception e) {
+                                    log.error("Error parsing file: {}", path, 
e);
+                                }
+                            });
+
+            // Assert that no files contain Chinese characters
+            Assertions.assertEquals(
+                    0,
+                    filesWithChinese.size(),
+                    () ->
+                            String.format(
+                                    "Found Chinese characters in following 
files (Scope: %s):\n%s",
+                                    scope, String.join("\n", 
filesWithChinese)));
+
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private static class ChineseCharacterVisitor extends 
VoidVisitorAdapter<Void> {
+        private final Path filePath;
+        private final List<String> filesWithChinese;
+
+        public ChineseCharacterVisitor(Path filePath, List<String> 
filesWithChinese) {
+            this.filePath = filePath;
+            this.filesWithChinese = filesWithChinese;
+        }
+
+        @Override
+        public void visit(CompilationUnit cu, Void arg) {
+            // Check for Chinese characters in string literals
+            cu.findAll(com.github.javaparser.ast.expr.StringLiteralExpr.class)
+                    .forEach(
+                            str -> {
+                                if 
(CHINESE_PATTERN.matcher(str.getValue()).find()) {
+                                    filesWithChinese.add(
+                                            String.format(
+                                                    "Found Chinese characters 
in string literal at %s: %s",
+                                                    filePath.toAbsolutePath(), 
str.getValue()));
+                                }
+                            });
+            super.visit(cu, arg);
+        }
+    }
+}
diff --git 
a/seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java
 
b/seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java
index 27bd35bff2..86851ad89a 100644
--- 
a/seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java
+++ 
b/seatunnel-connectors-v2/connector-sls/src/main/java/org/apache/seatunnel/connectors/seatunnel/sls/serialization/FastLogDeserializationContent.java
@@ -90,7 +90,8 @@ public class FastLogDeserializationContent
                                         .append("\":\"")
                                         .append(content.getValue())
                                         .append("\","));
-        jsonStringBuilder.deleteCharAt(jsonStringBuilder.length() - 1); // 
删除最后一个逗号
+        // Remove the last comma
+        jsonStringBuilder.deleteCharAt(jsonStringBuilder.length() - 1);
         jsonStringBuilder.append("}");
         // content field
         transformedRow.add(jsonStringBuilder.toString());
diff --git 
a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java
 
b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java
index 762506d498..08dcd85f7f 100644
--- 
a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java
+++ 
b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java
@@ -115,10 +115,10 @@ public class DefaultSeaTunnelRowDeserializer implements 
SeaTunnelRowDeserializer
         try {
             for (int i = 0; i < rowTypeInfo.getTotalFields(); i++) {
                 fieldName = rowTypeInfo.getFieldName(i);
-                value = doc.get(fieldName); // 字段值
+                value = doc.get(fieldName);
                 if (value != null) {
-                    seaTunnelDataType =
-                            rowTypeInfo.getFieldType(i); // seaTunnelDataType 
为SeaTunnel类型
+                    // seaTunnelDataType is the SeaTunnel type
+                    seaTunnelDataType = rowTypeInfo.getFieldType(i);
                     seaTunnelFields[i] = convertValue(seaTunnelDataType, 
value);
                 }
             }
diff --git 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java
 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java
index 3d9b0572db..178ed0ffba 100644
--- 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java
+++ 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java
@@ -738,7 +738,7 @@ public class DorisIT extends AbstractDorisIT {
     }
 
     public void getErrorUrl(String message) {
-        // 使用正则表达式匹配URL
+        // Using regular expressions to match URLs
         Pattern pattern = Pattern.compile("http://[\\w./?=&-_]+";);
         Matcher matcher = pattern.matcher(message);
         String urlString = null;
@@ -754,12 +754,12 @@ public class DorisIT extends AbstractDorisIT {
             URL url = new URL(urlString);
             HttpURLConnection connection = (HttpURLConnection) 
url.openConnection();
 
-            // 设置请求方法
+            // Set the request method
             connection.setRequestMethod("GET");
 
-            // 设置连接超时时间
+            // Set the connection timeout
             connection.setConnectTimeout(5000);
-            // 设置读取超时时间
+            // Set the read timeout
             connection.setReadTimeout(5000);
 
             int responseCode = connection.getResponseCode();
diff --git 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java
 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java
index dd604b5714..cbdbfc4ae4 100644
--- 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java
+++ 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisMultiReadIT.java
@@ -493,7 +493,7 @@ public class DorisMultiReadIT extends AbstractDorisIT {
     }
 
     public void getErrorUrl(String message) {
-        // 使用正则表达式匹配URL
+        // Using regular expressions to match URLs
         Pattern pattern = Pattern.compile("http://[\\w./?=&-_]+";);
         Matcher matcher = pattern.matcher(message);
         String urlString = null;
@@ -509,12 +509,12 @@ public class DorisMultiReadIT extends AbstractDorisIT {
             URL url = new URL(urlString);
             HttpURLConnection connection = (HttpURLConnection) 
url.openConnection();
 
-            // 设置请求方法
+            // Set the request method
             connection.setRequestMethod("GET");
 
-            // 设置连接超时时间
+            // Set the connection timeout
             connection.setConnectTimeout(5000);
-            // 设置读取超时时间
+            // Set the read timeout
             connection.setReadTimeout(5000);
 
             int responseCode = connection.getResponseCode();
diff --git 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java
 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java
index 20c1b02914..0f5e0dfe0b 100644
--- 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java
+++ 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/IcebergSinkIT.java
@@ -108,7 +108,7 @@ public class IcebergSinkIT extends TestSuiteBase {
                             "sh", "-c", "cd " + CATALOG_DIR + " && tar -zxvf " 
+ NAMESPACE_TAR);
                     try {
                         Process process = processBuilder.start();
-                        // 等待命令执行完成
+                        // Wait for the command to complete
                         int exitCode = process.waitFor();
                         if (exitCode == 0) {
                             log.info("Extract files successful.");

Reply via email to