This is an automated email from the ASF dual-hosted git repository.
fanjia pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new 4d3448dee3 [Improve][Core] Add test case to check deprecated
`result_table_name` and `source_table_name` (#8153)
4d3448dee3 is described below
commit 4d3448dee3f0feb38c8a304cafa512b8416e82ab
Author: Jia Fan <[email protected]>
AuthorDate: Tue Dec 3 12:04:46 2024 +0800
[Improve][Core] Add test case to check deprecated `result_table_name` and
`source_table_name` (#8153)
---
docs/en/connector-v2/source/Hive.md | 2 +-
docs/en/seatunnel-engine/rest-api-v2.md | 4 +-
docs/en/transform-v2/transform-multi-table.md | 6 +-
docs/zh/seatunnel-engine/rest-api-v2.md | 4 +-
docs/zh/transform-v2/transform-multi-table.md | 6 +-
.../api/file/AllFileSpecificationCheckTest.java | 136 +++++++++++++++++++++
.../json/local_file_json_gz_to_assert.conf | 2 +-
.../text/local_file_gz_text_to_assert.conf | 2 +-
.../hive_on_hdfs_to_assert_with_kerberos.conf | 4 +-
.../engine/e2e/ClusterSeaTunnelContainer.java | 4 +-
.../test/resources/copy_transform_multi_table.conf | 6 +-
.../resources/embedding_transform_multi_table.conf | 6 +-
.../llm_openai_transform_multi_table.conf | 6 +-
...kind_extractor_transform_case1_multi_table.conf | 2 +-
.../resources/split_transform_multi_table.conf | 6 +-
15 files changed, 166 insertions(+), 30 deletions(-)
diff --git a/docs/en/connector-v2/source/Hive.md
b/docs/en/connector-v2/source/Hive.md
index d87739f103..b8fcea0d69 100644
--- a/docs/en/connector-v2/source/Hive.md
+++ b/docs/en/connector-v2/source/Hive.md
@@ -164,7 +164,7 @@ source {
table_name = "default.test_hive_sink_on_hdfs_with_kerberos"
metastore_uri = "thrift://metastore:9083"
hive.hadoop.conf-path = "/tmp/hadoop"
- result_table_name = hive_source
+ plugin_output = hive_source
hive_site_path = "/tmp/hive-site.xml"
kerberos_principal = "hive/[email protected]"
kerberos_keytab_path = "/tmp/hive.keytab"
diff --git a/docs/en/seatunnel-engine/rest-api-v2.md
b/docs/en/seatunnel-engine/rest-api-v2.md
index 8a5e3a8d7d..bc85d51bef 100644
--- a/docs/en/seatunnel-engine/rest-api-v2.md
+++ b/docs/en/seatunnel-engine/rest-api-v2.md
@@ -432,7 +432,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -449,7 +449,7 @@ transform {
sink {
Console {
- source_table_name = "fake"
+ plugin_input = "fake"
}
}
diff --git a/docs/en/transform-v2/transform-multi-table.md
b/docs/en/transform-v2/transform-multi-table.md
index e642ec9cd2..2e14f801f0 100644
--- a/docs/en/transform-v2/transform-multi-table.md
+++ b/docs/en/transform-v2/transform-multi-table.md
@@ -16,7 +16,7 @@ Multi-table Transform has no limitations on Transform
capabilities; any Transfor
| Name | Type | Required | Default | Description
|
|----------------------------|--------|----------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| table_match_regex | String | No | .* | A regular
expression to match the tables that require transformation. By default, it
matches all tables. Note that this table name refers to the actual upstream
table name, not `result_table_name`.
|
+| table_match_regex | String | No | .* | A regular
expression to match the tables that require transformation. By default, it
matches all tables. Note that this table name refers to the actual upstream
table name, not `plugin_output`.
|
| table_transform | List | No | - | You can use a
list in `table_transform` to specify rules for individual tables. If a
transformation rule is configured for a specific table in `table_transform`,
the outer rules will not apply to that table. The rules in `table_transform`
take precedence. |
| table_transform.table_path | String | No | - | When configuring
a transformation rule for a table in `table_transform`, you need to specify the
table path using the `table_path` field. The table path should include
`databaseName[.schemaName].tableName`.
|
@@ -37,8 +37,8 @@ We can configure this as follows:
```hocon
transform {
Copy {
- source_table_name = "fake" // Optional dataset name to read from
- result_table_name = "fake1" // Optional dataset name for output
+ plugin_input = "fake" // Optional dataset name to read from
+ plugin_output = "fake1" // Optional dataset name for output
table_match_regex = "test.a.*" // 1. Matches tables needing
transformation, here matching `test.abc` and `test.abcd`
src_field = "name" // Source field
diff --git a/docs/zh/seatunnel-engine/rest-api-v2.md
b/docs/zh/seatunnel-engine/rest-api-v2.md
index 0e3b3e2657..f15ef3188a 100644
--- a/docs/zh/seatunnel-engine/rest-api-v2.md
+++ b/docs/zh/seatunnel-engine/rest-api-v2.md
@@ -429,7 +429,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -446,7 +446,7 @@ transform {
sink {
Console {
- source_table_name = "fake"
+ plugin_input = "fake"
}
}
diff --git a/docs/zh/transform-v2/transform-multi-table.md
b/docs/zh/transform-v2/transform-multi-table.md
index 2881f319e7..6517fc51b9 100644
--- a/docs/zh/transform-v2/transform-multi-table.md
+++ b/docs/zh/transform-v2/transform-multi-table.md
@@ -17,7 +17,7 @@ SeaTunnel transform支持多表转换,在上游插件输出多个表的时候
| Name | Type | Required | Default | Description
|
|----------------------------|--------|----------|---------|--------------------------------------------------------------------------------------------------|
-| table_match_regex | String | No | .* |
表名的正则表达式,通过正则表达式来匹配需要进行转换的表,默认匹配所有的表。注意这个表名是上游的真正表名,不是result_table_name。
|
+| table_match_regex | String | No | .* |
表名的正则表达式,通过正则表达式来匹配需要进行转换的表,默认匹配所有的表。注意这个表名是上游的真正表名,不是`plugin_output`。
|
| table_transform | List | No | - |
可以通过table_transform列表来指定部分表的规则,当在table_transform中配置某个表的转换规则后,外层针对当前表的规则不会生效,以table_transform中的为准
|
| table_transform.table_path | String | No | - |
当在table_transform中配置某个表的转换规则后,需要使用table_path字段指定表名,表名需要包含`databaseName[.schemaName].tableName`。
|
@@ -33,8 +33,8 @@ SeaTunnel transform支持多表转换,在上游插件输出多个表的时候
```hocon
transform {
Copy {
- source_table_name = "fake" // 可选的读取数据集名
- result_table_name = "fake1" // 可选的输出数据集名
+ plugin_input = "fake" // 可选的读取数据集名
+ plugin_output = "fake1" // 可选的输出数据集名
table_match_regex = "test.a.*" // 1.
通过正则表达式匹配需要进行转换的表,test.a.*表示匹配test.abc和test.abcd
src_field = "name" // 源字段
diff --git
a/seatunnel-dist/src/test/java/org/apache/seatunnel/api/file/AllFileSpecificationCheckTest.java
b/seatunnel-dist/src/test/java/org/apache/seatunnel/api/file/AllFileSpecificationCheckTest.java
new file mode 100644
index 0000000000..d51eb1dad4
--- /dev/null
+++
b/seatunnel-dist/src/test/java/org/apache/seatunnel/api/file/AllFileSpecificationCheckTest.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.file;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.DisabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.FileVisitOption;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Stream;
+
+@Slf4j
+@DisabledOnOs(OS.WINDOWS)
+public class AllFileSpecificationCheckTest {
+
+ private static Map<String, List<String>> fileContents;
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ List<String> fileTypesCanNotRead =
+ Arrays.asList("parquet", "orc", "xlsx", "xls", "png", "jar",
"lzo", "zip", "ico");
+ List<String> fileCanNotRead =
+ Arrays.asList(
+
"seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/encoding/gbk.json",
+
"seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/encoding/gbk.xml",
+
"seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/encoding/gbk_use_attr_format.xml",
+
"seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/encoding/gbk.txt",
+
"seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/json/e2e_gbk.json",
+
"seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/e2e_gbk.txt");
+
+ fileContents = new LinkedHashMap<>();
+ try (Stream<Path> paths = Files.walk(Paths.get(".."),
FileVisitOption.FOLLOW_LINKS)) {
+ paths.filter(path -> path.toFile().isFile())
+ .filter(path -> !path.toFile().getName().startsWith("."))
+ .filter(
+ path ->
+ !fileTypesCanNotRead.contains(
+ path.toFile()
+ .getName()
+ .substring(
+
path.toFile().getName().lastIndexOf(".")
+ + 1)))
+ .filter(path ->
!fileCanNotRead.contains(path.toString().substring(3)))
+ .filter(
+ path ->
+ !path.toString()
+ .contains(File.separator +
"target" + File.separator))
+ .filter(
+ path ->
+ !path.toString()
+ .contains(
+ File.separator
+ + "node_modules"
+ + File.separator))
+ .filter(
+ path ->
+ !path.toString()
+ .contains(File.separator + "node"
+ File.separator))
+ .filter(path -> !path.toString().contains(File.separator +
"."))
+ .forEach(
+ path -> {
+ try {
+ fileContents.put(
+ path.toString().substring(3),
+ Files.readAllLines(path,
StandardCharsets.UTF_8));
+ } catch (IOException e) {
+ log.error("Failed to read file: {}", path,
e);
+ throw new RuntimeException(e);
+ }
+ });
+ }
+ }
+
+ @Test
+ public void testFileNotContainsSourceTableNameAndResultTableName() {
+ List<String> whiteList =
+ Arrays.asList(
+
"seatunnel-dist/src/test/java/org/apache/seatunnel/api/file/AllFileSpecificationCheckTest.java",
+ "docs/zh/connector-v2/source-common-options.md",
+ "docs/zh/connector-v2/sink-common-options.md",
+ "docs/zh/transform-v2/common-options.md",
+ "docs/zh/concept/config.md",
+ "docs/en/connector-v2/source-common-options.md",
+ "docs/en/connector-v2/sink-common-options.md",
+ "docs/en/transform-v2/common-options.md",
+ "docs/en/concept/config.md",
+
"seatunnel-api/src/main/java/org/apache/seatunnel/api/common/CommonOptions.java",
+
"seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/resources/fake_to_assert_with_compatible_source_and_result_table_name.conf",
+
"seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeIT.java");
+
+ fileContents.forEach(
+ (path, lines) -> {
+ if (whiteList.contains(path)) {
+ return;
+ }
+ for (int i = 0; i < lines.size(); i++) {
+ String line = lines.get(i);
+ if (line.contains("source_table_name")
+ || line.contains("result_table_name")) {
+ throw new RuntimeException(
+ String.format(
+ "File %s Line %d [%s] contains
`source_table_name` or `result_table_name`, please use `plugin_input` and
`plugin_output` instead.",
+ path, i + 1, line));
+ }
+ }
+ });
+ }
+}
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/json/local_file_json_gz_to_assert.conf
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/json/local_file_json_gz_to_assert.conf
index 0433aa5f5c..a7769e8440 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/json/local_file_json_gz_to_assert.conf
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/json/local_file_json_gz_to_assert.conf
@@ -66,7 +66,7 @@ source {
}
}
}
- result_table_name = "fake"
+ plugin_output = "fake"
}
}
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/local_file_gz_text_to_assert.conf
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/local_file_gz_text_to_assert.conf
index d4f71e9901..2c901b2e92 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/local_file_gz_text_to_assert.conf
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/local_file_gz_text_to_assert.conf
@@ -66,7 +66,7 @@ source {
}
}
}
- result_table_name = "fake"
+ plugin_output = "fake"
}
}
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_on_hdfs_to_assert_with_kerberos.conf
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_on_hdfs_to_assert_with_kerberos.conf
index 59c768e4fb..1fcbc17d72 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_on_hdfs_to_assert_with_kerberos.conf
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_on_hdfs_to_assert_with_kerberos.conf
@@ -25,7 +25,7 @@ source {
table_name = "default.test_hive_sink_on_hdfs_with_kerberos"
metastore_uri = "thrift://metastore:9083"
hive.hadoop.conf-path = "/tmp/hadoop"
- result_table_name = hive_source
+ plugin_output = hive_source
hive_site_path = "/tmp/hive-site.xml"
kerberos_principal = "hive/[email protected]"
kerberos_keytab_path = "/tmp/hive.keytab"
@@ -35,7 +35,7 @@ source {
sink {
Assert {
- source_table_name = hive_source
+ plugin_input = hive_source
rules {
row_rules = [
{
diff --git
a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterSeaTunnelContainer.java
b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterSeaTunnelContainer.java
index f01725e78f..6bdf1c2415 100644
---
a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterSeaTunnelContainer.java
+++
b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterSeaTunnelContainer.java
@@ -1469,7 +1469,7 @@ public class ClusterSeaTunnelContainer extends
SeaTunnelContainer {
+ "}\n\n"
+ "source {\n"
+ " FakeSource {\n"
- + " result_table_name = \"fake\"\n"
+ + " plugin_output = \"fake\"\n"
+ " schema = {\n"
+ " fields {\n"
+ " name = \"string\"\n"
@@ -1483,7 +1483,7 @@ public class ClusterSeaTunnelContainer extends
SeaTunnelContainer {
+ "}\n\n"
+ "sink {\n"
+ " Console {\n"
- + " source_table_name = \"fake\"\n"
+ + " plugin_input = \"fake\"\n"
+ " }\n"
+ "}\n",
jobName, jobMode);
diff --git
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/copy_transform_multi_table.conf
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/copy_transform_multi_table.conf
index b119fea7c8..2345901b4b 100644
---
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/copy_transform_multi_table.conf
+++
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/copy_transform_multi_table.conf
@@ -86,14 +86,14 @@ source {
}
}
]
- result_table_name = "fake"
+ plugin_output = "fake"
}
}
transform {
Copy {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
// match test.abc
table_match_regex = "test.a.*"
src_field = "name"
diff --git
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/embedding_transform_multi_table.conf
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/embedding_transform_multi_table.conf
index cafd5f051a..ba4c0998b0 100644
---
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/embedding_transform_multi_table.conf
+++
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/embedding_transform_multi_table.conf
@@ -194,13 +194,13 @@ source {
]
}
]
- result_table_name = "fake"
+ plugin_output = "fake"
}
}
transform {
Embedding {
- source_table_name = "fake"
+ plugin_input = "fake"
// match test.abc
table_match_regex = "test.a.*"
model_provider = OPENAI
@@ -223,7 +223,7 @@ transform {
author_biography_vector = author_biography
}
}]
- result_table_name = "fake1"
+ plugin_output = "fake1"
}
}
diff --git
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/llm_openai_transform_multi_table.conf
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/llm_openai_transform_multi_table.conf
index 7e76d1176f..751a4f106c 100644
---
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/llm_openai_transform_multi_table.conf
+++
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/llm_openai_transform_multi_table.conf
@@ -95,13 +95,13 @@ source {
]
}
]
- result_table_name = "fake"
+ plugin_output = "fake"
}
}
transform {
LLM {
- source_table_name = "fake"
+ plugin_input = "fake"
// match test.abc
table_match_regex = "test.a.*"
model_provider = OPENAI
@@ -117,7 +117,7 @@ transform {
prompt = "Determine whether someone is Chinese or American by their name"
openai.api_path = "http://mockserver:1080/v1/chat/completions"
}]
- result_table_name = "llm_output"
+ plugin_output = "llm_output"
}
}
diff --git
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/rowkind_extractor_transform_case1_multi_table.conf
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/rowkind_extractor_transform_case1_multi_table.conf
index 3e4040fd4b..cca42236ce 100644
---
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/rowkind_extractor_transform_case1_multi_table.conf
+++
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/rowkind_extractor_transform_case1_multi_table.conf
@@ -87,7 +87,7 @@ source {
}
}
]
- result_table_name = "fake"
+ plugin_output = "fake"
}
}
diff --git
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/split_transform_multi_table.conf
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/split_transform_multi_table.conf
index 6f5f52654d..363e4ced1e 100644
---
a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/split_transform_multi_table.conf
+++
b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/split_transform_multi_table.conf
@@ -86,14 +86,14 @@ source {
}
}
]
- result_table_name = "fake"
+ plugin_output = "fake"
}
}
transform {
Split {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
// match test.abc
table_match_regex = "test.a.*"
separator = "1"