This is an automated email from the ASF dual-hosted git repository. gaojun2048 pushed a commit to branch 11_add_transform_v2_doc in repository https://gitbox.apache.org/repos/asf/incubator-seatunnel-website.git
commit db778601ab7315a87db78aa297ff57b209a3b695 Author: gaojun <[email protected]> AuthorDate: Fri Dec 30 19:55:41 2022 +0800 add transform v2 document and remove transform v1 document --- versioned_docs/version-2.3.0/about.md | 4 +- versioned_docs/version-2.3.0/concept/config.md | 52 +++++- .../connector-v2/sink/AmazonDynamoDB.md | 2 +- .../version-2.3.0/connector-v2/sink/Cassandra.md | 2 +- .../version-2.3.0/connector-v2/sink/Clickhouse.md | 2 +- .../connector-v2/sink/ClickhouseFile.md | 3 +- .../version-2.3.0/connector-v2/sink/Console.md | 6 - .../version-2.3.0/connector-v2/sink/Doris.md | 2 +- .../connector-v2/sink/Elasticsearch.md | 2 +- .../version-2.3.0/connector-v2/sink/FtpFile.md | 2 +- .../version-2.3.0/connector-v2/sink/HdfsFile.md | 13 +- .../version-2.3.0/connector-v2/sink/Hive.md | 9 +- .../version-2.3.0/connector-v2/sink/InfluxDB.md | 2 +- .../version-2.3.0/connector-v2/sink/Jdbc.md | 7 +- .../version-2.3.0/connector-v2/sink/Kafka.md | 2 +- .../version-2.3.0/connector-v2/sink/Kudu.md | 2 +- .../version-2.3.0/connector-v2/sink/LocalFile.md | 2 +- .../version-2.3.0/connector-v2/sink/Maxcompute.md | 2 +- .../version-2.3.0/connector-v2/sink/OssFile.md | 2 +- .../connector-v2/sink/OssJindoFile.md | 2 +- .../version-2.3.0/connector-v2/sink/Rabbitmq.md | 2 +- .../version-2.3.0/connector-v2/sink/Redis.md | 2 +- .../version-2.3.0/connector-v2/sink/S3-Redshift.md | 1 - .../version-2.3.0/connector-v2/sink/S3File.md | 2 +- .../version-2.3.0/connector-v2/sink/SftpFile.md | 2 +- .../version-2.3.0/connector-v2/sink/Slack.md | 2 +- .../version-2.3.0/connector-v2/sink/Socket.md | 4 - .../version-2.3.0/connector-v2/sink/StarRocks.md | 3 +- .../version-2.3.0/connector-v2/sink/Tablestore.md | 2 +- .../connector-v2/sink/common-options.md | 19 +- .../connector-v2/source/AmazonDynamoDB.md | 2 +- .../version-2.3.0/connector-v2/source/Cassandra.md | 2 +- .../connector-v2/source/Clickhouse.md | 2 +- .../connector-v2/source/Elasticsearch.md | 2 +- .../version-2.3.0/connector-v2/source/Gitlab.md | 2 +- .../connector-v2/source/GoogleSheets.md | 2 +- .../version-2.3.0/connector-v2/source/Http.md | 2 +- .../version-2.3.0/connector-v2/source/Iceberg.md | 2 +- .../version-2.3.0/connector-v2/source/InfluxDB.md | 3 + .../version-2.3.0/connector-v2/source/Jdbc.md | 4 +- .../version-2.3.0/connector-v2/source/Jira.md | 2 +- .../version-2.3.0/connector-v2/source/Klaviyo.md | 2 +- .../version-2.3.0/connector-v2/source/Kudu.md | 2 +- .../version-2.3.0/connector-v2/source/Lemlist.md | 2 +- .../connector-v2/source/Maxcompute.md | 2 +- .../version-2.3.0/connector-v2/source/MongoDB.md | 8 +- .../version-2.3.0/connector-v2/source/MyHours.md | 2 +- .../version-2.3.0/connector-v2/source/MySQL-CDC.md | 2 +- .../version-2.3.0/connector-v2/source/Neo4j.md | 2 +- .../version-2.3.0/connector-v2/source/Notion.md | 2 +- .../version-2.3.0/connector-v2/source/OneSignal.md | 2 +- .../version-2.3.0/connector-v2/source/OpenMldb.md | 8 +- .../connector-v2/source/OssJindoFile.md | 2 +- .../version-2.3.0/connector-v2/source/Rabbitmq.md | 2 +- .../version-2.3.0/connector-v2/source/Redis.md | 2 +- .../version-2.3.0/connector-v2/source/S3File.md | 2 +- .../version-2.3.0/connector-v2/source/SftpFile.md | 2 +- .../version-2.3.0/connector-v2/source/Socket.md | 5 +- .../connector-v2/source/common-options.md | 2 +- .../version-2.3.0/connector-v2/source/kafka.md | 3 +- .../version-2.3.0/seatunnel-engine/deployment.md | 2 +- .../version-2.3.0/start-v2/locally/deployment.md | 8 +- .../start-v2/locally/quick-start-flink.md | 4 - .../locally/quick-start-seatunnel-engine.md | 6 +- .../start-v2/locally/quick-start-spark.md | 4 - .../version-2.3.0/transform-v2/common-options.md | 23 +++ versioned_docs/version-2.3.0/transform-v2/copy.md | 66 +++++++ .../version-2.3.0/transform-v2/filter-rowkind.md | 67 +++++++ .../version-2.3.0/transform-v2/filter.md | 60 +++++++ .../version-2.3.0/transform-v2/replace.md | 121 +++++++++++++ versioned_docs/version-2.3.0/transform-v2/split.md | 72 ++++++++ .../version-2.3.0/transform/common-options.mdx | 118 ------------ versioned_docs/version-2.3.0/transform/json.md | 197 --------------------- versioned_docs/version-2.3.0/transform/nullRate.md | 69 -------- versioned_docs/version-2.3.0/transform/nulltf.md | 75 -------- versioned_docs/version-2.3.0/transform/replace.md | 81 --------- versioned_docs/version-2.3.0/transform/split.mdx | 124 ------------- versioned_docs/version-2.3.0/transform/sql.md | 61 ------- versioned_docs/version-2.3.0/transform/udf.md | 44 ----- versioned_docs/version-2.3.0/transform/uuid.md | 64 ------- versioned_sidebars/version-2.3.0-sidebars.json | 12 +- 81 files changed, 561 insertions(+), 955 deletions(-) diff --git a/versioned_docs/version-2.3.0/about.md b/versioned_docs/version-2.3.0/about.md index 6a530df380..aba24a21c4 100644 --- a/versioned_docs/version-2.3.0/about.md +++ b/versioned_docs/version-2.3.0/about.md @@ -24,7 +24,7 @@ SeaTunnel focuses on data integration and data synchronization, and is mainly de ## Features of SeaTunnel - Rich and extensible Connector: SeaTunnel provides a Connector API that does not depend on a specific execution engine. Connectors (Source, Transform, Sink) developed based on this API can run On many different engines, such as SeaTunnel Engine, Flink, Spark that are currently supported. -- Connector plug-in: The plug-in design allows users to easily develop their own Connector and integrate it into the SeaTunnel project. Currently, SeaTunnel has supported more than 70 Connectors, and the number is surging. There is the list of the [currently-supported connectors](Connector-v2-release-state.md) +- Connector plug-in: The plug-in design allows users to easily develop their own Connector and integrate it into the SeaTunnel project. Currently, SeaTunnel has supported more than 100 Connectors, and the number is surging. There is the list of the [currently-supported connectors](Connector-v2-release-state.md) - Batch-stream integration: Connectors developed based on SeaTunnel Connector API are perfectly compatible with offline synchronization, real-time synchronization, full- synchronization, incremental synchronization and other scenarios. It greatly reduces the difficulty of managing data integration tasks. - Support distributed snapshot algorithm to ensure data consistency. - Multi-engine support: SeaTunnel uses SeaTunnel Engine for data synchronization by default. At the same time, SeaTunnel also supports the use of Flink or Spark as the execution engine of the Connector to adapt to the existing technical components of the enterprise. SeaTunnel supports multiple versions of Spark and Flink. @@ -51,7 +51,7 @@ The default engine use by SeaTunnel is [SeaTunnel Engine](seatunnel-engine/about - **Source Connectors** SeaTunnel support read data from various relational databases, graph databases, NoSQL databases, document databases, and memory databases. Various distributed file systems such as HDFS. A variety of cloud storage, such as S3 and OSS. At the same time, we also support data reading of many common SaaS services. You can access the detailed list [here](connector-v2/source). If you want, You can develop your own source connector and easily integrate it into seatunnel. -- **Transform Connector** +- **Transform Connector** If the schema is different between source and sink, You can use Transform Connector to change the schema read from source and make it same as the sink schema. - **Sink Connector** SeaTunnel support write data to various relational databases, graph databases, NoSQL databases, document databases, and memory databases. Various distributed file systems such as HDFS. A variety of cloud storage, such as S3 and OSS. At the same time, we also support write data to many common SaaS services. You can access the detailed list [here](connector-v2/sink). If you want, You can develop your own sink connector and easily integrate it into seatunnel. diff --git a/versioned_docs/version-2.3.0/concept/config.md b/versioned_docs/version-2.3.0/concept/config.md index cc8f7a5dbc..ba49606e63 100644 --- a/versioned_docs/version-2.3.0/concept/config.md +++ b/versioned_docs/version-2.3.0/concept/config.md @@ -20,19 +20,28 @@ The Config file will be similar to the one below. ```hocon env { - execution.parallelism = 1 + job.mode = "BATCH" } source { FakeSource { result_table_name = "fake" - field_name = "name,age" + row.num = 100 + schema = { + fields { + name = "string" + age = "int" + card = "int" + } + } } } transform { - sql { - sql = "select name,age from fake" + Filter { + source_table_name = "fake" + result_table_name = "fake1" + fields = [name, card] } } @@ -41,9 +50,10 @@ sink { host = "clickhouse:8123" database = "default" table = "seatunnel_console" - fields = ["name"] + fields = ["name", "card"] username = "default" password = "" + source_table_name = "fake1" } } ``` @@ -74,13 +84,39 @@ course, this uses the word 'may', which means that we can also directly treat th directly from source to sink. Like below. ```hocon -transform { - // no thing on here +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + name = "string" + age = "int" + card = "int" + } + } + } +} + +sink { + Clickhouse { + host = "clickhouse:8123" + database = "default" + table = "seatunnel_console" + fields = ["name", "age", card"] + username = "default" + password = "" + source_table_name = "fake1" + } } ``` Like source, transform has specific parameters that belong to each module. The supported source at now check. -The supported transform at now check [Transform of SeaTunnel](../transform) +The supported transform at now check [Transform V2 of SeaTunnel](../transform-v2) ### sink diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/AmazonDynamoDB.md b/versioned_docs/version-2.3.0/connector-v2/sink/AmazonDynamoDB.md index a74f5fc9e9..c1bf515be2 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/AmazonDynamoDB.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/AmazonDynamoDB.md @@ -63,7 +63,7 @@ Amazondynamodb { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Amazon DynamoDB Sink Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Cassandra.md b/versioned_docs/version-2.3.0/connector-v2/sink/Cassandra.md index be7ce26b98..0a4ece086e 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Cassandra.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Cassandra.md @@ -90,7 +90,7 @@ sink { ## Changelog -### 2.3.0 +### next version - Add Cassandra Sink Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Clickhouse.md b/versioned_docs/version-2.3.0/connector-v2/sink/Clickhouse.md index c6adfb7297..90bec5fc9c 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Clickhouse.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Clickhouse.md @@ -182,7 +182,7 @@ sink { ### 2.3.0-beta 2022-10-20 - [Improve] Clickhouse Support Int128,Int256 Type ([3067](https://github.com/apache/incubator-seatunnel/pull/3067)) -### 2.3.0 2022-12-30 +### next version - [Improve] Clickhouse Sink support nest type and array type([3047](https://github.com/apache/incubator-seatunnel/pull/3047)) diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/ClickhouseFile.md b/versioned_docs/version-2.3.0/connector-v2/sink/ClickhouseFile.md index 848fbdd751..1eb2458d08 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/ClickhouseFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/ClickhouseFile.md @@ -141,6 +141,7 @@ Sink plugin common parameters, please refer to [Sink Common Options](common-opti - Support write data to ClickHouse File and move to ClickHouse data dir -### 2.3.0 2022-12-30 +### Next version + - [BugFix] Fix generated data part name conflict and improve file commit logic [3416](https://github.com/apache/incubator-seatunnel/pull/3416) - [Feature] Support compatible_mode compatible with lower version Clickhouse [3416](https://github.com/apache/incubator-seatunnel/pull/3416) \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Console.md b/versioned_docs/version-2.3.0/connector-v2/sink/Console.md index 743246b371..134b74c85b 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Console.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Console.md @@ -54,12 +54,6 @@ source { } } -transform { - sql { - sql = "select name, age from fake" - } -} - sink { Console { diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Doris.md b/versioned_docs/version-2.3.0/connector-v2/sink/Doris.md index d89c44b909..6cac1bc321 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Doris.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Doris.md @@ -125,6 +125,6 @@ sink { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Doris Sink Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Elasticsearch.md b/versioned_docs/version-2.3.0/connector-v2/sink/Elasticsearch.md index 1d7017550f..1e5e650c97 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Elasticsearch.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Elasticsearch.md @@ -99,6 +99,6 @@ sink { - Add Elasticsearch Sink Connector -### 2.3.0 2022-12-30 +### next version - [Feature] Support CDC write DELETE/UPDATE/INSERT events ([3673](https://github.com/apache/incubator-seatunnel/pull/3673)) diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/FtpFile.md b/versioned_docs/version-2.3.0/connector-v2/sink/FtpFile.md index 4bcee61fed..637345d601 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/FtpFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/FtpFile.md @@ -173,7 +173,7 @@ FtpFile { - [BugFix] Fix filesystem get error ([3117](https://github.com/apache/incubator-seatunnel/pull/3117)) - [BugFix] Solved the bug of can not parse '\t' as delimiter from config file ([3083](https://github.com/apache/incubator-seatunnel/pull/3083)) -### 2.3.0 2022-12-30 +### Next version - [BugFix] Fixed the following bugs that failed to write data to files ([3258](https://github.com/apache/incubator-seatunnel/pull/3258)) - When field from upstream is null it will throw NullPointerException - Sink columns mapping failed diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/HdfsFile.md b/versioned_docs/version-2.3.0/connector-v2/sink/HdfsFile.md index 742b825a80..c17c59683a 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/HdfsFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/HdfsFile.md @@ -19,6 +19,9 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] parquet - [x] orc - [x] json +- [x] compress codec + - [x] lzo + ## Options @@ -41,6 +44,7 @@ In order to use this connector, You must ensure your spark/flink cluster already | is_enable_transaction | boolean | no | true | | batch_size | int | no | 1000000 | | common-options | | no | - | +| compressCodec | string | no | none | ### fs.defaultFS [string] @@ -125,8 +129,10 @@ Only support `true` now. The maximum number of rows in a file. For SeaTunnel Engine, the number of lines in the file is determined by `batch_size` and `checkpoint.interval` jointly decide. If the value of `checkpoint.interval` is large enough, sink writer will write rows in a file until the rows in the file larger than `batch_size`. If `checkpoint.interval` is small, the sink writer will create a new file when a new checkpoint trigger. -### common options +### compressCodec [string] +Support lzo compression for text in file format. The file name ends with ".lzo.txt" . +### common options Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details ## Example @@ -201,10 +207,11 @@ HdfsFile { - [BugFix] Fix filesystem get error ([3117](https://github.com/apache/incubator-seatunnel/pull/3117)) - [BugFix] Solved the bug of can not parse '\t' as delimiter from config file ([3083](https://github.com/apache/incubator-seatunnel/pull/3083)) -### 2.3.0 2022-12-30 +### Next version - [BugFix] Fixed the following bugs that failed to write data to files ([3258](https://github.com/apache/incubator-seatunnel/pull/3258)) - When field from upstream is null it will throw NullPointerException - Sink columns mapping failed - When restore writer from states getting transaction directly failed -- [Improve] Support setting batch size for every file ([3625](https://github.com/apache/incubator-seatunnel/pull/3625)) \ No newline at end of file +- [Improve] Support setting batch size for every file ([3625](https://github.com/apache/incubator-seatunnel/pull/3625)) +- [Improve] Support lzo compression for text in file format ([3782](https://github.com/apache/incubator-seatunnel/pull/3782)) \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Hive.md b/versioned_docs/version-2.3.0/connector-v2/sink/Hive.md index 0bc4edc471..63ba168041 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Hive.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Hive.md @@ -21,6 +21,8 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] text - [x] parquet - [x] orc +- [x] compress codec + - [x] lzo ## Options @@ -28,8 +30,8 @@ By default, we use 2PC commit to ensure `exactly-once` |----------------|--------|----------|---------------| | table_name | string | yes | - | | metastore_uri | string | yes | - | +| compressCodec | string | no | none | | common-options | | no | - | - ### table_name [string] Target Hive table name eg: db1.table1 @@ -120,9 +122,6 @@ source { } } -transform { -} - sink { # choose stdout output plugin to output data to console @@ -144,7 +143,7 @@ sink { ### 2.3.0-beta 2022-10-20 - [Improve] Hive Sink supports automatic partition repair ([3133](https://github.com/apache/incubator-seatunnel/pull/3133)) -### 2.3.0 2022-12-30 +### Next version - [BugFix] Fixed the following bugs that failed to write data to files ([3258](https://github.com/apache/incubator-seatunnel/pull/3258)) - When field from upstream is null it will throw NullPointerException - Sink columns mapping failed diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/InfluxDB.md b/versioned_docs/version-2.3.0/connector-v2/sink/InfluxDB.md index f5c172410a..f2cb274060 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/InfluxDB.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/InfluxDB.md @@ -104,6 +104,6 @@ sink { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add InfluxDB Sink Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Jdbc.md b/versioned_docs/version-2.3.0/connector-v2/sink/Jdbc.md index 475c09a2cb..9f2aa2e75b 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Jdbc.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Jdbc.md @@ -45,6 +45,7 @@ support `Xa transactions`. You can set `is_exactly_once=true` to enable it. | xa_data_source_class_name | String | No | - | | max_commit_attempts | Int | No | 3 | | transaction_timeout_sec | Int | No | -1 | +| auto_commit | Boolean | No | true | | common-options | | no | - | ### driver [string] @@ -119,6 +120,10 @@ The number of retries for transaction commit failures The timeout after the transaction is opened, the default is -1 (never timeout). Note that setting the timeout may affect exactly-once semantics +### auto_commit [boolean] + +Automatic transaction commit is enabled by default + ### common options Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details @@ -214,7 +219,7 @@ sink { - [Feature] Support StarRocks JDBC Sink ([3060](https://github.com/apache/incubator-seatunnel/pull/3060)) - [Feature] Support DB2 JDBC Sink ([2410](https://github.com/apache/incubator-seatunnel/pull/2410)) -### 2.3.0 2022-12-30 +### next version - [Feature] Support CDC write DELETE/UPDATE/INSERT events ([3378](https://github.com/apache/incubator-seatunnel/issues/3378)) - [Feature] Support Teradata JDBC Sink ([3362](https://github.com/apache/incubator-seatunnel/pull/3362)) diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Kafka.md b/versioned_docs/version-2.3.0/connector-v2/sink/Kafka.md index 813005b6ba..aa3f3b54bc 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Kafka.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Kafka.md @@ -183,7 +183,7 @@ sink { - Add Kafka Sink Connector -### 2.3.0 2022-12-30 +### next version - [Improve] Support to specify multiple partition keys [3230](https://github.com/apache/incubator-seatunnel/pull/3230) - [Improve] Add text format for kafka sink connector [3711](https://github.com/apache/incubator-seatunnel/pull/3711) \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Kudu.md b/versioned_docs/version-2.3.0/connector-v2/sink/Kudu.md index 782729429f..5448bf85b3 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Kudu.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Kudu.md @@ -59,6 +59,6 @@ Sink plugin common parameters, please refer to [Sink Common Options](common-opti ### 2.3.0-beta 2022-10-20 - [Improve] Kudu Sink Connector Support to upsert row ([2881](https://github.com/apache/incubator-seatunnel/pull/2881)) -### 2.3.0 2022-12-30 +### Next Version - Change plugin name from `KuduSink` to `Kudu` [3432](https://github.com/apache/incubator-seatunnel/pull/3432) diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/LocalFile.md b/versioned_docs/version-2.3.0/connector-v2/sink/LocalFile.md index 683957c336..fde2d22477 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/LocalFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/LocalFile.md @@ -186,7 +186,7 @@ LocalFile { - [BugFix] Fix filesystem get error ([3117](https://github.com/apache/incubator-seatunnel/pull/3117)) - [BugFix] Solved the bug of can not parse '\t' as delimiter from config file ([3083](https://github.com/apache/incubator-seatunnel/pull/3083)) -### 2.3.0 2022-12-30 +### Next version - [BugFix] Fixed the following bugs that failed to write data to files ([3258](https://github.com/apache/incubator-seatunnel/pull/3258)) - When field from upstream is null it will throw NullPointerException - Sink columns mapping failed diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Maxcompute.md b/versioned_docs/version-2.3.0/connector-v2/sink/Maxcompute.md index cf9d9ce489..302dca7aef 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Maxcompute.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Maxcompute.md @@ -74,6 +74,6 @@ sink { ## Changelog -### 2.3.0 2022-12-30 +### next version - [Feature] Add Maxcompute Sink Connector([3640](https://github.com/apache/incubator-seatunnel/pull/3640)) diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/OssFile.md b/versioned_docs/version-2.3.0/connector-v2/sink/OssFile.md index eccedd0f12..aa5a8614e3 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/OssFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/OssFile.md @@ -225,7 +225,7 @@ For orc file format - [BugFix] Fix filesystem get error ([3117](https://github.com/apache/incubator-seatunnel/pull/3117)) - [BugFix] Solved the bug of can not parse '\t' as delimiter from config file ([3083](https://github.com/apache/incubator-seatunnel/pull/3083)) -### 2.3.0 2022-12-30 +### Next version - [BugFix] Fixed the following bugs that failed to write data to files ([3258](https://github.com/apache/incubator-seatunnel/pull/3258)) - When field from upstream is null it will throw NullPointerException - Sink columns mapping failed diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/OssJindoFile.md b/versioned_docs/version-2.3.0/connector-v2/sink/OssJindoFile.md index a30329c728..3c6d9091cb 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/OssJindoFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/OssJindoFile.md @@ -210,6 +210,6 @@ For orc file format ## Changelog -### 2.3.0 2022-12-30 +### Next version - Add OSS Jindo File Sink Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Rabbitmq.md b/versioned_docs/version-2.3.0/connector-v2/sink/Rabbitmq.md index 28f25f0be5..5a6c9a7def 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Rabbitmq.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Rabbitmq.md @@ -101,6 +101,6 @@ sink { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Rabbitmq Sink Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Redis.md b/versioned_docs/version-2.3.0/connector-v2/sink/Redis.md index 066e96a52c..e77f0f9442 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Redis.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Redis.md @@ -139,6 +139,6 @@ simple: - Add Redis Sink Connector -### 2.3.0 2022-12-30 +### next version - [Improve] Support redis cluster mode connection and user authentication [3188](https://github.com/apache/incubator-seatunnel/pull/3188) diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/S3-Redshift.md b/versioned_docs/version-2.3.0/connector-v2/sink/S3-Redshift.md index 6eb0c24a36..b9f235338e 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/S3-Redshift.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/S3-Redshift.md @@ -276,4 +276,3 @@ For orc file format ### 2.3.0-beta 2022-10-20 -Add s3-redshift connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/S3File.md b/versioned_docs/version-2.3.0/connector-v2/sink/S3File.md index ac21ee721d..d49a2a33d9 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/S3File.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/S3File.md @@ -226,7 +226,7 @@ For orc file format - Add S3File Sink Connector -### 2.3.0 2022-12-30 +### Next version - [BugFix] Fixed the following bugs that failed to write data to files ([3258](https://github.com/apache/incubator-seatunnel/pull/3258)) - When field from upstream is null it will throw NullPointerException - Sink columns mapping failed diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/SftpFile.md b/versioned_docs/version-2.3.0/connector-v2/sink/SftpFile.md index 9d6a34a0c6..c08e0b93d5 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/SftpFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/SftpFile.md @@ -164,7 +164,7 @@ SftpFile { ## Changelog -### 2.3.0 2022-12-30 +### Next version - Add SftpFile Sink Connector - [BugFix] Fixed the following bugs that failed to write data to files ([3258](https://github.com/apache/incubator-seatunnel/pull/3258)) diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Slack.md b/versioned_docs/version-2.3.0/connector-v2/sink/Slack.md index 79bad748f2..6f011bfc36 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Slack.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Slack.md @@ -52,6 +52,6 @@ sink { ## Changelog -### 2.3.0 2022-12-30 +### new version - Add Slack Sink Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Socket.md b/versioned_docs/version-2.3.0/connector-v2/sink/Socket.md index 2a29a17c41..a1ab7b440f 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Socket.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Socket.md @@ -69,10 +69,6 @@ source { } } -transform { - sql = "select name, age from fake" -} - sink { Socket { host = "localhost" diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/StarRocks.md b/versioned_docs/version-2.3.0/connector-v2/sink/StarRocks.md index 40f9560acf..7f17154b60 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/StarRocks.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/StarRocks.md @@ -126,5 +126,6 @@ sink { ## Changelog -### 2.3.0 2022-12-30 +### next version + - Add StarRocks Sink Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/Tablestore.md b/versioned_docs/version-2.3.0/connector-v2/sink/Tablestore.md index 51f87a467a..15ca34eda2 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/Tablestore.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/Tablestore.md @@ -68,7 +68,7 @@ Tablestore { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Tablestore Sink Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/sink/common-options.md b/versioned_docs/version-2.3.0/connector-v2/sink/common-options.md index 53c623086a..2f6b834719 100644 --- a/versioned_docs/version-2.3.0/connector-v2/sink/common-options.md +++ b/versioned_docs/version-2.3.0/connector-v2/sink/common-options.md @@ -1,4 +1,4 @@ -# Common Options +# Sink Common Options > Common parameters of sink connectors @@ -32,24 +32,27 @@ source { } transform { - sql { + Filter { source_table_name = "fake" - sql = "select name from fake" + fields = [name] result_table_name = "fake_name" } - sql { + Filter { source_table_name = "fake" - sql = "select age from fake" + fields = [age] result_table_name = "fake_age" } } sink { - console { - parallelism = 3 + Console { source_table_name = "fake_name" } + Console { + source_table_name = "fake_age" + } } ``` -> If `source_table_name` is not specified, the console outputs the data of the last transform, and if it is set to `fake_name` , it will output the data of `fake_name` +> If the job only have one source and one(or zero) transform and one sink, You do not need to specify `source_table_name` and `result_table_name` for connector. +> If the number of any operator in source, transform and sink is greater than 1, you must specify the `source_table_name` and `result_table_name` for each connector in the job. diff --git a/versioned_docs/version-2.3.0/connector-v2/source/AmazonDynamoDB.md b/versioned_docs/version-2.3.0/connector-v2/source/AmazonDynamoDB.md index 05f71e6f01..ceb37226b0 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/AmazonDynamoDB.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/AmazonDynamoDB.md @@ -103,6 +103,6 @@ Source Plugin common parameters, refer to [Source Plugin](common-options.md) for ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Amazon DynamoDB Source Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Cassandra.md b/versioned_docs/version-2.3.0/connector-v2/source/Cassandra.md index 6700515523..e2a6e4e8c7 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Cassandra.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Cassandra.md @@ -74,7 +74,7 @@ source { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Cassandra Source Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Clickhouse.md b/versioned_docs/version-2.3.0/connector-v2/source/Clickhouse.md index 717101fa73..8df3d7c3de 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Clickhouse.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Clickhouse.md @@ -86,7 +86,7 @@ source { - [Improve] Clickhouse Source random use host when config multi-host ([3108](https://github.com/apache/incubator-seatunnel/pull/3108)) -### 2.3.0 2022-12-30 +### next version - [Improve] Clickhouse Source support nest type and array type([3047](https://github.com/apache/incubator-seatunnel/pull/3047)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Elasticsearch.md b/versioned_docs/version-2.3.0/connector-v2/source/Elasticsearch.md index 99686b30b8..8b3bcb6443 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Elasticsearch.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Elasticsearch.md @@ -96,6 +96,6 @@ Elasticsearch { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Elasticsearch Source Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Gitlab.md b/versioned_docs/version-2.3.0/connector-v2/source/Gitlab.md index 76ff7511d7..d0b1df1bcb 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Gitlab.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Gitlab.md @@ -289,7 +289,7 @@ Gitlab{ ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Gitlab Source Connector - [Feature][Connector-V2][HTTP] Use json-path parsing ([3510](https://github.com/apache/incubator-seatunnel/pull/3510)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/GoogleSheets.md b/versioned_docs/version-2.3.0/connector-v2/source/GoogleSheets.md index 4cb56e0250..1d9d65d359 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/GoogleSheets.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/GoogleSheets.md @@ -74,6 +74,6 @@ simple: ## Changelog -### 2.3.0 2022-12-30 +### next version - Add GoogleSheets Source Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Http.md b/versioned_docs/version-2.3.0/connector-v2/source/Http.md index 88a068e7f3..6e0ceb281d 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Http.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Http.md @@ -293,6 +293,6 @@ Http { - Add Http Source Connector -### 2.3.0 2022-12-30 +### new version - [Feature][Connector-V2][HTTP] Use json-path parsing ([3510](https://github.com/apache/incubator-seatunnel/pull/3510)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Iceberg.md b/versioned_docs/version-2.3.0/connector-v2/source/Iceberg.md index 250eb6cffe..a4400a530d 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Iceberg.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Iceberg.md @@ -180,6 +180,6 @@ Some versions of the hive-exec package do not have libfb303-xxx.jar, so you also - Add Iceberg Source Connector -### 2.3.0 2022-12-30 +### next version - [Feature] Support Hadoop3.x ([3046](https://github.com/apache/incubator-seatunnel/pull/3046)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/InfluxDB.md b/versioned_docs/version-2.3.0/connector-v2/source/InfluxDB.md index 501b932a51..3e711080e0 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/InfluxDB.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/InfluxDB.md @@ -117,14 +117,17 @@ the `partition_num` of the InfluxDB when you select > Tips: Ensure that `upper_bound` minus `lower_bound` is divided > `bypartition_num`, otherwise the query results will overlap ### epoch [string] + returned time precision - Optional values: H, m, s, MS, u, n - default value: n ### query_timeout_sec [int] + the `query_timeout` of the InfluxDB when you select, in seconds ### connect_timeout_ms [long] + the timeout for connecting to InfluxDB, in milliseconds ### common options diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Jdbc.md b/versioned_docs/version-2.3.0/connector-v2/source/Jdbc.md index 7136fcb641..7aacdb8bf0 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Jdbc.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Jdbc.md @@ -18,7 +18,7 @@ e.g. If you use MySQL, should download and copy `mysql-connector-java-xxx.jar` t - [x] [batch](../../concept/connector-v2-features.md) - [ ] [stream](../../concept/connector-v2-features.md) -- [ ] [exactly-once](../../concept/connector-v2-features.md) +- [x] [exactly-once](../../concept/connector-v2-features.md) - [x] [schema projection](../../concept/connector-v2-features.md) supports query SQL and can achieve projection effect. @@ -163,7 +163,7 @@ parallel: - [Feature] Support GBase8a JDBC Source ([3026](https://github.com/apache/incubator-seatunnel/pull/3026)) - [Feature] Support DB2 JDBC Source ([2410](https://github.com/apache/incubator-seatunnel/pull/2410)) -### 2.3.0 2022-12-30 +### next version - [BugFix] Fix jdbc split bug ([3220](https://github.com/apache/incubator-seatunnel/pull/3220)) - [Feature] Support Sqlite JDBC Source ([3089](https://github.com/apache/incubator-seatunnel/pull/3089)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Jira.md b/versioned_docs/version-2.3.0/connector-v2/source/Jira.md index 732b1d89bc..1614b783c2 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Jira.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Jira.md @@ -295,7 +295,7 @@ Jira { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Jira Source Connector - [Feature][Connector-V2][HTTP] Use json-path parsing ([3510](https://github.com/apache/incubator-seatunnel/pull/3510)) \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Klaviyo.md b/versioned_docs/version-2.3.0/connector-v2/source/Klaviyo.md index 532d657558..242f08b68b 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Klaviyo.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Klaviyo.md @@ -302,7 +302,7 @@ Klaviyo { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Klaviyo Source Connector - [Feature][Connector-V2][HTTP] Use json-path parsing ([3510](https://github.com/apache/incubator-seatunnel/pull/3510)) \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Kudu.md b/versioned_docs/version-2.3.0/connector-v2/source/Kudu.md index 7cb6d72cdd..5da03dc11d 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Kudu.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Kudu.md @@ -62,6 +62,6 @@ source { - Add Kudu Source Connector -### 2.3.0 2022-12-30 +### Next Version - Change plugin name from `KuduSource` to `Kudu` [3432](https://github.com/apache/incubator-seatunnel/pull/3432) \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Lemlist.md b/versioned_docs/version-2.3.0/connector-v2/source/Lemlist.md index 1ee6b9f28b..922ebba232 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Lemlist.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Lemlist.md @@ -287,7 +287,7 @@ Lemlist { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Lemlist Source Connector - [Feature][Connector-V2][HTTP] Use json-path parsing ([3510](https://github.com/apache/incubator-seatunnel/pull/3510)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Maxcompute.md b/versioned_docs/version-2.3.0/connector-v2/source/Maxcompute.md index 2a0bf304f2..133d659490 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Maxcompute.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Maxcompute.md @@ -77,6 +77,6 @@ source { ## Changelog -### 2.3.0 2022-12-30 +### next version - [Feature] Add Maxcompute Source Connector([3640](https://github.com/apache/incubator-seatunnel/pull/3640)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/MongoDB.md b/versioned_docs/version-2.3.0/connector-v2/source/MongoDB.md index f60315b572..8b6325706e 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/MongoDB.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/MongoDB.md @@ -22,6 +22,7 @@ Read data from MongoDB. | uri | string | yes | - | | database | string | yes | - | | collection | string | yes | - | +| matchQuery | string | no | - | | schema | object | yes | - | | common-options | config | no | - | @@ -37,6 +38,10 @@ MongoDB database MongoDB collection +### matchQuery [string] + +MatchQuery is a JSON string that specifies the selection criteria using query operators for the documents to be returned from the collection. + ### schema [object] #### fields [Config] @@ -66,6 +71,7 @@ mongodb { uri = "mongodb://username:[email protected]:27017/mypost?retryWrites=true&writeConcern=majority" database = "mydatabase" collection = "mycollection" + matchQuery = "{"id":3}" schema { fields { id = int @@ -83,6 +89,6 @@ mongodb { - Add MongoDB Source Connector -### 2.3.0 2022-12-30 +### Next Version - common-options is not a required option diff --git a/versioned_docs/version-2.3.0/connector-v2/source/MyHours.md b/versioned_docs/version-2.3.0/connector-v2/source/MyHours.md index 2f86276beb..de02a030fb 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/MyHours.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/MyHours.md @@ -313,7 +313,7 @@ MyHours{ ## Changelog -### 2.3.0 2022-12-30 +### next version - Add My Hours Source Connector - [Feature][Connector-V2][HTTP] Use json-path parsing ([3510](https://github.com/apache/incubator-seatunnel/pull/3510)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/MySQL-CDC.md b/versioned_docs/version-2.3.0/connector-v2/source/MySQL-CDC.md index 7348e54274..d12b7bd067 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/MySQL-CDC.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/MySQL-CDC.md @@ -205,6 +205,6 @@ source { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add MySQL CDC Source Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Neo4j.md b/versioned_docs/version-2.3.0/connector-v2/source/Neo4j.md index 6ef44efe49..346532001b 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Neo4j.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Neo4j.md @@ -101,6 +101,6 @@ source { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Neo4j Source Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Notion.md b/versioned_docs/version-2.3.0/connector-v2/source/Notion.md index dcc81aa0a2..e26a1e1c81 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Notion.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Notion.md @@ -300,6 +300,6 @@ Notion { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Notion Source Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/source/OneSignal.md b/versioned_docs/version-2.3.0/connector-v2/source/OneSignal.md index 5fe71d38e9..852fc75ff1 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/OneSignal.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/OneSignal.md @@ -317,7 +317,7 @@ OneSignal { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add OneSignal Source Connector - [Feature][Connector-V2][HTTP] Use json-path parsing ([3510](https://github.com/apache/incubator-seatunnel/pull/3510)) \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/source/OpenMldb.md b/versioned_docs/version-2.3.0/connector-v2/source/OpenMldb.md index b6c77a1d82..28934af8b4 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/OpenMldb.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/OpenMldb.md @@ -82,10 +82,4 @@ Source plugin common parameters, please refer to [Source Common Options](common- cluster_mode = false } -``` - -## Changelog - -### 2.3.0 2022-12-30 - -- Add openmldb source connector \ No newline at end of file +``` \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/connector-v2/source/OssJindoFile.md b/versioned_docs/version-2.3.0/connector-v2/source/OssJindoFile.md index 3246494b69..26409c72b2 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/OssJindoFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/OssJindoFile.md @@ -242,6 +242,6 @@ Source plugin common parameters, please refer to [Source Common Options](common- ## Changelog -### 2.3.0 2022-12-30 +### next version - Add OSS Jindo File Source Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Rabbitmq.md b/versioned_docs/version-2.3.0/connector-v2/source/Rabbitmq.md index 56ea5fa462..47030cd46a 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Rabbitmq.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Rabbitmq.md @@ -153,6 +153,6 @@ source { ## Changelog -### 2.3.0 2022-12-30 +### next version - Add Rabbitmq source Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Redis.md b/versioned_docs/version-2.3.0/connector-v2/source/Redis.md index 8f4f06eba8..930f915eaf 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Redis.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Redis.md @@ -253,6 +253,6 @@ simple: - Add Redis Source Connector -### 2.3.0 2022-12-30 +### next version - [Improve] Support redis cluster mode connection and user authentication [3188](https://github.com/apache/incubator-seatunnel/pull/3188) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/S3File.md b/versioned_docs/version-2.3.0/connector-v2/source/S3File.md index c42227c497..4b35d9692c 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/S3File.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/S3File.md @@ -257,7 +257,7 @@ Source plugin common parameters, please refer to [Source Common Options](common- - Add S3File Source Connector -### 2.3.0 2022-12-30 +### Next version - [Feature] Support S3A protocol ([3632](https://github.com/apache/incubator-seatunnel/pull/3632)) - Allow user to add additional hadoop-s3 parameters - Allow the use of the s3a protocol diff --git a/versioned_docs/version-2.3.0/connector-v2/source/SftpFile.md b/versioned_docs/version-2.3.0/connector-v2/source/SftpFile.md index 07a72ab26d..ba13969070 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/SftpFile.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/SftpFile.md @@ -209,6 +209,6 @@ Source plugin common parameters, please refer to [Source Common Options](common- ## Changelog -### 2.3.0 2022-12-30 +### next version - Add SftpFile Source Connector diff --git a/versioned_docs/version-2.3.0/connector-v2/source/Socket.md b/versioned_docs/version-2.3.0/connector-v2/source/Socket.md index d3dcba0164..eccc1f4fe9 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/Socket.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/Socket.md @@ -62,9 +62,6 @@ source { } } -transform { -} - sink { Console {} } @@ -104,6 +101,6 @@ spark - Add Socket Source Connector -### 2.3.0 2022-12-30 +### Next Version - `host` and `port` become required ([3317](https://github.com/apache/incubator-seatunnel/pull/3317)) diff --git a/versioned_docs/version-2.3.0/connector-v2/source/common-options.md b/versioned_docs/version-2.3.0/connector-v2/source/common-options.md index 7fc32c505e..e1072e6804 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/common-options.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/common-options.md @@ -1,4 +1,4 @@ -# Common Options +# Source Common Options > Common parameters of source connectors diff --git a/versioned_docs/version-2.3.0/connector-v2/source/kafka.md b/versioned_docs/version-2.3.0/connector-v2/source/kafka.md index 88a3bd2d4e..61b96936d8 100644 --- a/versioned_docs/version-2.3.0/connector-v2/source/kafka.md +++ b/versioned_docs/version-2.3.0/connector-v2/source/kafka.md @@ -206,7 +206,8 @@ source { - Add Kafka Source Connector -### 2.3.0 2022-12-30 +### Next Version - [Improve] Support setting read starting offset or time at startup config ([3157](https://github.com/apache/incubator-seatunnel/pull/3157)) - [Improve] Support for dynamic discover topic & partition in streaming mode ([3125](https://github.com/apache/incubator-seatunnel/pull/3125)) +- [Bug] Fixed the problem that parsing the offset format failed when the startup mode was offset([3810](https://github.com/apache/incubator-seatunnel/pull/3810)) diff --git a/versioned_docs/version-2.3.0/seatunnel-engine/deployment.md b/versioned_docs/version-2.3.0/seatunnel-engine/deployment.md index 7deb082a84..1e37dfd021 100644 --- a/versioned_docs/version-2.3.0/seatunnel-engine/deployment.md +++ b/versioned_docs/version-2.3.0/seatunnel-engine/deployment.md @@ -175,7 +175,7 @@ mkdir -p $SEATUNNEL_HOME/logs nohup seatunnel-cluster.sh & ``` -The logs will write in `$SEATUNNEL_HOME/logs/seatunnel-server.log` +The logs will write in `$SEATUNNEL_HOME/logs/seatunnel-engine-server.log` ## 8. Install SeaTunnel Engine Client diff --git a/versioned_docs/version-2.3.0/start-v2/locally/deployment.md b/versioned_docs/version-2.3.0/start-v2/locally/deployment.md index 3bf82d9370..9b87da225b 100644 --- a/versioned_docs/version-2.3.0/start-v2/locally/deployment.md +++ b/versioned_docs/version-2.3.0/start-v2/locally/deployment.md @@ -21,7 +21,7 @@ package `seatunnel-<version>-bin.tar.gz` Or you can download it by terminal ```shell -export version="2.3.0" +export version="2.3.0-beta" wget "https://archive.apache.org/dist/incubator/seatunnel/${version}/apache-seatunnel-incubating-${version}-bin.tar.gz" tar -xzvf "apache-seatunnel-incubating-${version}-bin.tar.gz" ``` @@ -30,11 +30,11 @@ tar -xzvf "apache-seatunnel-incubating-${version}-bin.tar.gz" ## Step 3: Install connectors plugin Since 2.2.0-beta, the binary package does not provide connector dependencies by default, so when using it for the first time, we need to execute the following command to install the connector: (Of course, you can also manually download the connector from [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/ to download, then manually move to the seatunnel subdirectory under the connectors directory). ```bash -sh bin/install_plugin.sh +sh bin/install_plugin.sh 2.3.0-beta ``` -If you need to specify the version of the connector, take 2.3.0 as an example, we need to execute +If you need to specify the version of the connector, take 2.3.0-beta as an example, we need to execute ```bash -sh bin/install_plugin.sh 2.3.0 +sh bin/install_plugin.sh 2.3.0-beta ``` Usually we don't need all the connector plugins, so you can specify the plugins you need by configuring `config/plugin_config`, for example, you only need the `connector-console` plugin, then you can modify plugin.properties as ```plugin_config diff --git a/versioned_docs/version-2.3.0/start-v2/locally/quick-start-flink.md b/versioned_docs/version-2.3.0/start-v2/locally/quick-start-flink.md index f7f58a8d1b..3cc78a8db6 100644 --- a/versioned_docs/version-2.3.0/start-v2/locally/quick-start-flink.md +++ b/versioned_docs/version-2.3.0/start-v2/locally/quick-start-flink.md @@ -40,10 +40,6 @@ source { } } -transform { - -} - sink { Console {} } diff --git a/versioned_docs/version-2.3.0/start-v2/locally/quick-start-seatunnel-engine.md b/versioned_docs/version-2.3.0/start-v2/locally/quick-start-seatunnel-engine.md index d2ce05b373..2e9d86b924 100644 --- a/versioned_docs/version-2.3.0/start-v2/locally/quick-start-seatunnel-engine.md +++ b/versioned_docs/version-2.3.0/start-v2/locally/quick-start-seatunnel-engine.md @@ -32,10 +32,6 @@ source { } } -transform { - -} - sink { Console {} } @@ -82,7 +78,7 @@ row=16 : SGZCr, 94186144 ## What's More -For now, you are already take a quick look about SeaTunnel, you could see [connector](/docs/category/connector-v2) to find all +For now, you are already take a quick look about SeaTunnel, you could see [connector](../../connector-v2/source/FakeSource.md) to find all source and sink SeaTunnel supported. Or see [SeaTunnel Engine](../../seatunnel-engine/about.md) if you want to know more about SeaTunnel Engine. SeaTunnel also supports running jobs in Spark/Flink. You can see [Quick Start With Spark](quick-start-spark.md) or [Quick Start With Flink](quick-start-flink.md). diff --git a/versioned_docs/version-2.3.0/start-v2/locally/quick-start-spark.md b/versioned_docs/version-2.3.0/start-v2/locally/quick-start-spark.md index a66c277127..576b284a9c 100644 --- a/versioned_docs/version-2.3.0/start-v2/locally/quick-start-spark.md +++ b/versioned_docs/version-2.3.0/start-v2/locally/quick-start-spark.md @@ -41,10 +41,6 @@ source { } } -transform { - -} - sink { Console {} } diff --git a/versioned_docs/version-2.3.0/transform-v2/common-options.md b/versioned_docs/version-2.3.0/transform-v2/common-options.md new file mode 100644 index 0000000000..c858245fca --- /dev/null +++ b/versioned_docs/version-2.3.0/transform-v2/common-options.md @@ -0,0 +1,23 @@ +# Transform Common Options + +> Common parameters of source connectors + +| name | type | required | default value | +|-------------------| ------ | -------- | ------------- | +| result_table_name | string | no | - | +| source_table_name | string | no | - | + +### source_table_name [string] + +When `source_table_name` is not specified, the current plug-in processes the data set `(dataset)` output by the previous plug-in in the configuration file; + +When `source_table_name` is specified, the current plugin is processing the data set corresponding to this parameter. + +### result_table_name [string] + +When `result_table_name` is not specified, the data processed by this plugin will not be registered as a data set that can be directly accessed by other plugins, or called a temporary table `(table)`; + +When `result_table_name` is specified, the data processed by this plugin will be registered as a data set `(dataset)` that can be directly accessed by other plugins, or called a temporary table `(table)` . The dataset registered here can be directly accessed by other plugins by specifying `source_table_name` . + +## Examples + diff --git a/versioned_docs/version-2.3.0/transform-v2/copy.md b/versioned_docs/version-2.3.0/transform-v2/copy.md new file mode 100644 index 0000000000..39ad7f58c0 --- /dev/null +++ b/versioned_docs/version-2.3.0/transform-v2/copy.md @@ -0,0 +1,66 @@ +# Copy + +> Copy transform plugin + +## Description + +Copy a field to a new field. + +## Options + +| name | type | required | default value | +|---------------| ------ | -------- |---------------| +| src_field | string | yes | | +| dest_field | string | yes | | + +### src_field [string] + +Src field name you want to copy + +### dest_field [string] + +This dest field name + +### common options [string] + +Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details + +## Example + +The data read from source is a table like this: + +| name | age | card | +|----------|-----|------| +| Joy Ding | 20 | 123 | +| May Ding | 20 | 123 | +| Kin Dom | 20 | 123 | +| Joy Dom | 20 | 123 | + +We want copy field `name` to a new field `name1`, we can add `Copy` Transform like this + +``` +transform { + Copy { + source_table_name = "fake" + result_table_name = "fake1" + src_field = "name" + dest_field = "name1" + } +} +``` + +Then the data in result table `fake1` will like this + +| name | age | card | name1 | +|----------|-----|------|----------| +| Joy Ding | 20 | 123 | Joy Ding | +| May Ding | 20 | 123 | May Ding | +| Kin Dom | 20 | 123 | Kin Dom | +| Joy Dom | 20 | 123 | Joy Dom | + + +## Changelog + +### new version + +- Add Copy Transform Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/transform-v2/filter-rowkind.md b/versioned_docs/version-2.3.0/transform-v2/filter-rowkind.md new file mode 100644 index 0000000000..e67bab89a1 --- /dev/null +++ b/versioned_docs/version-2.3.0/transform-v2/filter-rowkind.md @@ -0,0 +1,67 @@ +# FilterRowKind + +> FilterRowKind transform plugin + +## Description + +Filter the data by RowKind + +## Options + +| name | type | required | default value | +|---------------|-------| -------- |---------------| +| include_kinds | array | yes | | +| exclude_kinds | array | yes | | + +### include_kinds [array] + +The row kinds to include + +### exclude_kinds [array] + +The row kinds to exclude. + +You can only config one of `include_kinds` and `exclude_kinds`. + +### common options [string] + +Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details + +## Examples + +The RowKink of the data generate by FakeSource is `INSERT`, If we use `FilterRowKink` transform and exclude the `INSERT` data, we will write zero rows into sink. + +```yaml + +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + age = "int" + } + } + } +} + +transform { + FilterRowKind { + source_table_name = "fake" + result_table_name = "fake1" + exclude_kinds = ["INSERT"] + } +} + +sink { + Console { + source_table_name = "fake1" + } +} +``` \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/transform-v2/filter.md b/versioned_docs/version-2.3.0/transform-v2/filter.md new file mode 100644 index 0000000000..0a4722a9c5 --- /dev/null +++ b/versioned_docs/version-2.3.0/transform-v2/filter.md @@ -0,0 +1,60 @@ +# Filter + +> Filter transform plugin + +## Description + +Filter the field. + +## Options + +| name | type | required | default value | +|--------------|-------| -------- |---------------| +| fields | array | yes | | + +### fields [array] + +The list of fields that need to be kept. Fields not in the list will be deleted + +### common options [string] + +Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details + +## Example + +The data read from source is a table like this: + +| name | age | card | +|----------|-----|------| +| Joy Ding | 20 | 123 | +| May Ding | 20 | 123 | +| Kin Dom | 20 | 123 | +| Joy Dom | 20 | 123 | + +We want to delete field `age`, we can add `Filter` Transform like this + +``` +transform { + Filter { + source_table_name = "fake" + result_table_name = "fake1" + fields = [name, card] + } +} +``` + +Then the data in result table `fake1` will like this + +| name | card | +|----------|------| +| Joy Ding | 123 | +| May Ding | 123 | +| Kin Dom | 123 | +| Joy Dom | 123 | + + +## Changelog + +### new version + +- Add Filter Transform Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/transform-v2/replace.md b/versioned_docs/version-2.3.0/transform-v2/replace.md new file mode 100644 index 0000000000..f23a50835e --- /dev/null +++ b/versioned_docs/version-2.3.0/transform-v2/replace.md @@ -0,0 +1,121 @@ +# Replace + +> Replace transform plugin + +## Description + +Examines string value in a given field and replaces substring of the string value that matches the given string literal or regexes with the given replacement. + +## Options + +| name | type | required | default value | +| -------------- | ------ | -------- |---------------| +| replace_field | string | yes | | +| pattern | string | yes | - | +| replacement | string | yes | - | +| is_regex | boolean| no | false | +| replace_first | boolean| no | false | + +### replace_field [string] + +The field you want to replace + +### pattern [string] + +The old string that will be replaced + +### replacement [string] + +The new string for replace + +### is_regex [boolean] + +Use regex for string match + +### replace_first [boolean] + +Whether replace the first match string. Only used when `is_regex = true`. + +### common options [string] + +Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details + +## Example + +The data read from source is a table like this: + +| name | age | card | +|----------|-----|------| +| Joy Ding | 20 | 123 | +| May Ding | 20 | 123 | +| Kin Dom | 20 | 123 | +| Joy Dom | 20 | 123 | + +We want to replace the char ` ` to `_` at the `name` field. Then we can add a `Replace` Transform like this: + +``` +transform { + Replace { + source_table_name = "fake" + result_table_name = "fake1" + replace_field = "name" + pattern = " " + replacement = "_" + is_regex = true + } +} +``` + +Then the data in result table `fake1` will update to + + +| name | age | card | +|----------|-----|------| +| Joy_Ding | 20 | 123 | +| May_Ding | 20 | 123 | +| Kin_Dom | 20 | 123 | +| Joy_Dom | 20 | 123 | + +## Job Config Example + +``` +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + } + } + } +} + +transform { + Replace { + source_table_name = "fake" + result_table_name = "fake1" + replace_field = "name" + pattern = ".+" + replacement = "b" + is_regex = true + } +} + +sink { + Console { + source_table_name = "fake1" + } +} +``` + +## Changelog + +### new version + +- Add Replace Transform Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/transform-v2/split.md b/versioned_docs/version-2.3.0/transform-v2/split.md new file mode 100644 index 0000000000..9be8b23145 --- /dev/null +++ b/versioned_docs/version-2.3.0/transform-v2/split.md @@ -0,0 +1,72 @@ +# Split + +> Split transform plugin + +## Description + +Split a field to more than one field. + +## Options + +| name | type | required | default value | +|----------------|--------| -------- |---------------| +| separator | string | yes | | +| split_field | string | yes | | +| output_fields | array | yes | | + +### separator [string] + +The list of fields that need to be kept. Fields not in the list will be deleted + +### split_field[string] + +The field to be split + +### output_fields[array] + +The result fields after split + +### common options [string] + +Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details + +## Example + +The data read from source is a table like this: + +| name | age | card | +|----------|-----|------| +| Joy Ding | 20 | 123 | +| May Ding | 20 | 123 | +| Kin Dom | 20 | 123 | +| Joy Dom | 20 | 123 | + +We want split `name` field to `first_name` and `second name`, we can add `Split` transform like this + +``` +transform { + Split { + source_table_name = "fake" + result_table_name = "fake1" + separator = " " + split_field = "name" + output_fields = [first_name, second_name] + } +} +``` + +Then the data in result table `fake1` will like this + +| name | age | card | first_name | last_name | +|----------|-----|------|------------|-----------| +| Joy Ding | 20 | 123 | Joy | Ding | +| May Ding | 20 | 123 | May | Ding | +| Kin Dom | 20 | 123 | Kin | Dom | +| Joy Dom | 20 | 123 | Joy | Dom | + + +## Changelog + +### new version + +- Add Split Transform Connector \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/transform/common-options.mdx b/versioned_docs/version-2.3.0/transform/common-options.mdx deleted file mode 100644 index 9f20ea13a6..0000000000 --- a/versioned_docs/version-2.3.0/transform/common-options.mdx +++ /dev/null @@ -1,118 +0,0 @@ -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - -# Common Options - -> Common parameters of transform plugins - -:::tip - -This transform both supported by engine Spark and Flink. - -::: - -## Transform Plugin common parameters - -<Tabs - groupId="engine-type" - defaultValue="spark" - values={[ - {label: 'Spark', value: 'spark'}, - {label: 'Flink', value: 'flink'}, - ]}> -<TabItem value="spark"> - -| name | type | required | default value | -| ----------------- | ------ | -------- | ------------- | -| source_table_name | string | no | - | -| result_table_name | string | no | - | - -</TabItem> -<TabItem value="flink"> - -| name | type | required | default value | -| ----------------- | ------ | -------- | ------------- | -| source_table_name | string | no | - | -| result_table_name | string | no | - | -| field_name | string | no | - | - -### field_name [string] - -When the data is obtained from the upper-level plugin, you can specify the name of the obtained field, which is convenient for use in subsequent sql plugins. - -</TabItem> -</Tabs> - -### source_table_name [string] - -When `source_table_name` is not specified, the current plug-in processes the data set `(dataset)` output by the previous plug-in in the configuration file; - -When `source_table_name` is specified, the current plugin is processing the data set corresponding to this parameter. - -### result_table_name [string] - -When `result_table_name` is not specified, the data processed by this plugin will not be registered as a data set that can be directly accessed by other plugins, or called a temporary table `(table)`; - -When `result_table_name` is specified, the data processed by this plugin will be registered as a data set `(dataset)` that can be directly accessed by other plugins, or called a temporary table `(table)` . The dataset registered here can be directly accessed by other plugins by specifying `source_table_name` . - -## Examples - -<Tabs - groupId="engine-type" - defaultValue="spark" - values={[ - {label: 'Spark', value: 'spark'}, - {label: 'Flink', value: 'flink'}, - ]}> -<TabItem value="spark"> - -```bash -split { - source_table_name = "source_view_table" - source_field = "message" - delimiter = "&" - fields = ["field1", "field2"] - result_table_name = "result_view_table" -} -``` - -> The `Split` plugin will process the data in the temporary table `source_view_table` and register the processing result as a temporary table named `result_view_table`. This temporary table can be used by any subsequent `Filter` or `Output` plugin by specifying `source_table_name` . - -```bash -split { - source_field = "message" - delimiter = "&" - fields = ["field1", "field2"] -} -``` - -> Note: If `source_table_name` is not configured, output the processing result of the last `Transform` plugin in the configuration file - -</TabItem> -<TabItem value="flink"> - -```bash -source { - FakeSourceStream { - result_table_name = "fake_1" - field_name = "name,age" - } - FakeSourceStream { - result_table_name = "fake_2" - field_name = "name,age" - } -} - -transform { - sql { - source_table_name = "fake_1" - sql = "select name from fake_1" - result_table_name = "fake_name" - } -} -``` - -> If `source_table_name` is not specified, the sql plugin will process the data of `fake_2` , and if it is set to `fake_1` , it will process the data of `fake_1` . - -</TabItem> -</Tabs> \ No newline at end of file diff --git a/versioned_docs/version-2.3.0/transform/json.md b/versioned_docs/version-2.3.0/transform/json.md deleted file mode 100644 index 5ec5ba5c19..0000000000 --- a/versioned_docs/version-2.3.0/transform/json.md +++ /dev/null @@ -1,197 +0,0 @@ -# Json - -> Json transform plugin - -## Description - -Json analysis of the specified fields of the original data set - -:::tip - -This transform **ONLY** supported by Spark. - -::: - -## Options - -| name | type | required | default value | -| -------------- | ------ | -------- | ------------- | -| source_field | string | no | raw_message | -| target_field | string | no | __root__ | -| schema_dir | string | no | - | -| schema_file | string | no | - | -| common-options | string | no | - | - -### source_field [string] - -Source field, if not configured, the default is `raw_message` - -### target_field [string] - -The target field, if it is not configured, the default is `__root__` , and the result of Json parsing will be uniformly placed at the top of the `Dataframe` - -### schema_dir [string] - -Style directory, if not configured, the default is `$seatunnelRoot/plugins/json/files/schemas/` - -### schema_file [string] - -The style file name, if it is not configured, the default is empty, that is, the structure is not specified, and the system derives it by itself according to the input of the data source. - -### common options [string] - -Transform plugin common parameters, please refer to [Transform Plugin](common-options.mdx) for details - -## Schema Use cases - -- `json schema` usage scenarios - -The multiple data sources of a single task may contain different styles of json data. For example, the `topicA` style from `Kafka` is - -```json -{ - "A": "a_val", - "B": "b_val" -} -``` - -The style from `topicB` is - -```json -{ - "C": "c_val", - "D": "d_val" -} -``` - -When running `Transform` , you need to fuse the data of `topicA` and `topicB` into a wide table for calculation. You can specify a `schema` whose content style is: - -```json -{ - "A": "a_val", - "B": "b_val", - "C": "c_val", - "D": "d_val" -} -``` - -Then the fusion output result of `topicA` and `topicB` is: - -```bash -+-----+-----+-----+-----+ -|A |B |C |D | -+-----+-----+-----+-----+ -|a_val|b_val|null |null | -|null |null |c_val|d_val| -+-----+-----+-----+-----+ -``` - -## Examples - -### Do not use `target_field` - -```bash -json { - source_field = "message" -} -``` - -- Source - -```bash -+----------------------------+ -|message | -+----------------------------+ -|{"name": "ricky", "age": 24}| -|{"name": "gary", "age": 28} | -+----------------------------+ -``` - -- Sink - -```bash -+----------------------------+---+-----+ -|message |age|name | -+----------------------------+---+-----+ -|{"name": "gary", "age": 28} |28 |gary | -|{"name": "ricky", "age": 23}|23 |ricky| -+----------------------------+---+-----+ -``` - -### Use `target_field` - -```bash -json { - source_field = "message" - target_field = "info" -} -``` - -- Souce - -```bash -+----------------------------+ -|message | -+----------------------------+ -|{"name": "ricky", "age": 24}| -|{"name": "gary", "age": 28} | -+----------------------------+ -``` - -- Sink - -```bash -+----------------------------+----------+ -|message |info | -+----------------------------+----------+ -|{"name": "gary", "age": 28} |[28,gary] | -|{"name": "ricky", "age": 23}|[23,ricky]| -+----------------------------+----------+ -``` - -> The results of json processing support `select * from where info.age = 23` such SQL statements - -### Use `schema_file` - -```bash -json { - source_field = "message" - schema_file = "demo.json" -} -``` - -- Schema - -Place the following content in `~/seatunnel/plugins/json/files/schemas/demo.json` of Driver Node: - -```json -{ - "name": "demo", - "age": 24, - "city": "LA" -} -``` - -- Source - -```bash -+----------------------------+ -|message | -+----------------------------+ -|{"name": "ricky", "age": 24}| -|{"name": "gary", "age": 28} | -+----------------------------+ -``` - -- Sink - -```bash -+----------------------------+---+-----+-----+ -|message |age|name |city | -+----------------------------+---+-----+-----+ -|{"name": "gary", "age": 28} |28 |gary |null | -|{"name": "ricky", "age": 23}|23 |ricky|null | -+----------------------------+---+-----+-----+ -``` - -> If you use `cluster mode` for deployment, make sure that the `json schemas` directory is packaged in `plugins.tar.gz` diff --git a/versioned_docs/version-2.3.0/transform/nullRate.md b/versioned_docs/version-2.3.0/transform/nullRate.md deleted file mode 100644 index a5c7bf1eba..0000000000 --- a/versioned_docs/version-2.3.0/transform/nullRate.md +++ /dev/null @@ -1,69 +0,0 @@ -# NullRate - -> NULL rate transform plugin - -## Description - -When there is a large amount of data, the final result will always be greatly affected by the problem of data null value. Therefore, early null value detection is particularly important. For this reason, this function came into being - -:::tip - -This transform **ONLY** supported by Spark. - -::: - -## Options - -| name | type | required | default value | -| -------------------------| ------------ | -------- | ------------- | -| fields | string_list | yes | - | -| rates | double_list | yes | - | -| throw_exception_enable | boolean | no | - | -| save_to_table_name | string | no | - | - - - -### field [string_list] - -Which fields do you want to monitor . - -### rates [double_list] - -It is consistent with the number of fields. Double type indicates the set null rate value . - -### throw_exception_enable [boolean] - -Whether to throw an exception when it is greater than the set value. The default value is false . - -### save_to_table_name [string] - -Whether the current verification value is output to the table. It is not output by defaul . - -### common options [string] - -Transform plugin common parameters, please refer to [Transform Plugin](common-options.mdx) for details - -## Examples - -```bash - nullRate { - fields = ["msg", "name"] - rates = [10.0,3.45] - save_to_table_name = "tmp" - throw_exception_enable = true - } -} -``` - -Use `NullRate` in transform's Dataset. - -```bash - transform { - NullRate { - fields = ["msg", "name"] - rates = [10.0,3.45] - save_to_table_name = "tmp" - throw_exception_enable = true - } - } -``` diff --git a/versioned_docs/version-2.3.0/transform/nulltf.md b/versioned_docs/version-2.3.0/transform/nulltf.md deleted file mode 100644 index 55f124d10d..0000000000 --- a/versioned_docs/version-2.3.0/transform/nulltf.md +++ /dev/null @@ -1,75 +0,0 @@ -# Nulltf - -> NULL default value transform plugin - -## Description - -set default value for null field - -:::tip - -This transform only supported by engine Spark. - -::: - -## Options - -| name | type | required | default value | -| ------------------- | ------- | -------- | ------------- | -| fields | array | no | - | - -### fields [list] - -A list of fields whose default value will be set. -The default value of the field can be set in the form of "field:value", If no set, the default value will be set according to the field type. - -## Examples - -the configuration - -```bash - nulltf { - fields { - name: "", - price: 0, - num: 100, - flag: false, - dt_timestamp: "2022-05-18 13:51:40.603", - dt_date: "2022-05-19" - } - } -``` - -before use nulltf transform - -```bash -+-----+-----+----+-----+--------------------+----------+ -| name|price| num| flag| dt_timestamp| dt_date| -+-----+-----+----+-----+--------------------+----------+ -|名称1| 22.5| 100|false|2022-05-20 14:34:...|2022-05-20| -| null| 22.5| 100|false|2022-05-20 14:35:...|2022-05-20| -|名称1| null| 100|false|2022-05-20 14:35:...|2022-05-20| -|名称1| 22.5|null|false|2022-05-20 14:36:...|2022-05-20| -|名称1| 22.5| 100| null|2022-05-20 14:36:...|2022-05-20| -|名称1| 22.5| 100|false| null|2022-05-20| -|名称1| 22.5| 100|false|2022-05-20 14:37:...| null| -+-----+-----+----+-----+--------------------+----------+ -``` - -after use nulltf transform - -```bash -+-----+-----+----+-----+--------------------+----------+ -| name|price| num| flag| dt_timestamp| dt_date| -+-----+-----+----+-----+--------------------+----------+ -|名称1| 22.5|100|false|2022-05-20 14:34:...|2022-05-20| -| | 22.5|100|false|2022-05-20 14:35:...|2022-05-20| -|名称1| 0.0|100|false|2022-05-20 14:35:...|2022-05-20| -|名称1| 22.5|100|false|2022-05-20 14:36:...|2022-05-20| -|名称1| 22.5|100|false|2022-05-20 14:36:...|2022-05-20| -|名称1| 22.5|100|false|2022-05-18 13:51:...|2022-05-20| -|名称1| 22.5|100|false|2022-05-20 14:37:...|2022-05-19| -+-----+-----+---+-----+--------------------+----------+ -``` - - diff --git a/versioned_docs/version-2.3.0/transform/replace.md b/versioned_docs/version-2.3.0/transform/replace.md deleted file mode 100644 index 1bf57fc7f6..0000000000 --- a/versioned_docs/version-2.3.0/transform/replace.md +++ /dev/null @@ -1,81 +0,0 @@ -# Replace - -> Replace transform plugin - -## Description - -Examines string value in a given field and replaces substring of the string value that matches the given string literal or regexes with the given replacement. - -:::tip - -This transform **ONLY** supported by Spark. - -::: - -## Options - -| name | type | required | default value | -| -------------- | ------ | -------- | ------------- | -| source_field | string | no | raw_message | -| fields | string | yes | - | -| pattern | string | yes | - | -| replacement | string | yes | - | -| is_regex | boolean| no | false | -| replace_first | boolean| no | false | - -### source_field [string] - -Source field, if not configured, the default is `raw_message` - -### field [string] - -The name of the field to replaced. - -### pattern [string] - -The string to match. - -### replacement [string] - -The replacement pattern (is_regex is true) or string literal (is_regex is false). - -### is_regex [boolean] - -Whether or not to interpret the pattern as a regex (true) or string literal (false). - -### replace_first [boolean] - -Whether or not to skip any matches beyond the first match. - -### common options [string] - -Transform plugin common parameters, please refer to [Transform Plugin](common-options.mdx) for details - -## Examples -the word `a` will be replaced by `b` at message field values. - -```bash -replace { - source_field = "message" - fields = "_replaced" - pattern = "a" - replacement = "b" -} -``` - -Use `Replace` as udf in sql. - -```bash - Replace { - fields = "_replaced" - pattern = "([^ ]*) ([^ ]*)" - replacement = "$2" - isRegex = true - replaceFirst = true - } - - # Use the split function (confirm that the fake table exists) - sql { - sql = "select * from (select raw_message, replace(raw_message) as info_row from fake) t1" - } -``` diff --git a/versioned_docs/version-2.3.0/transform/split.mdx b/versioned_docs/version-2.3.0/transform/split.mdx deleted file mode 100644 index abc288ec17..0000000000 --- a/versioned_docs/version-2.3.0/transform/split.mdx +++ /dev/null @@ -1,124 +0,0 @@ -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - -# Split - -> Split transform plugin - -## Description - -A string cutting function is defined, which is used to split the specified field in the Sql plugin. - -:::tip - -This transform both supported by engine Spark and Flink. - -::: - -## Options - -<Tabs - groupId="engine-type" - defaultValue="spark" - values={[ - {label: 'Spark', value: 'spark'}, - {label: 'Flink', value: 'flink'}, - ]}> -<TabItem value="spark"> - -| name | type | required | default value | -| -------------- | ------ | -------- | ------------- | -| separator | string | no | " " | -| fields | array | yes | - | -| source_field | string | no | raw_message | -| target_field | string | no | *root* | -| common-options | string | no | - | - -### separator [string] - -Separator, the input string is separated according to the separator. The default separator is a space `(" ")` . -Note: If you use some special characters in the separator, you need to escape it. e.g. "\\|" - -### source_field [string] - -The source field of the string before being split, if not configured, the default is `raw_message` - -### target_field [string] - -`target_field` can specify the location where multiple split fields are added to the Event. If it is not configured, the default is `_root_` , that is, all split fields will be added to the top level of the Event. If a specific field is specified, the divided field will be added to the next level of this field. - -</TabItem> -<TabItem value="flink"> - -| name | type | required | default value | -| -------------- | ------ | -------- | ------------- | -| separator | string | no | , | -| fields | array | yes | - | -| common-options | string | no | - | - -### separator [string] - -The specified delimiter, the default is `,` - -</TabItem> -</Tabs> - -### fields [list] - -In the split field name list, specify the field names of each character string after splitting in order. If the length of the `fields` is greater than the length of the separation result, the extra fields are assigned null characters. - -### common options [string] - -Transform plugin common parameters, please refer to [Transform Plugin](common-options.mdx) for details - -## Examples - -<Tabs - groupId="engine-type" - defaultValue="spark" - values={[ - {label: 'Spark', value: 'spark'}, - {label: 'Flink', value: 'flink'}, - ]}> -<TabItem value="spark"> - -Split the `message` field in the source data according to `&`, you can use `field1` or `field2` as the key to get the corresponding value - -```bash -split { - source_field = "message" - separator = "&" - fields = ["field1", "field2"] -} -``` - -Split the `message` field in the source data according to `,` , the split field is `info` , you can use `info.field1` or `info.field2` as the key to get the corresponding value - -```bash -split { - source_field = "message" - target_field = "info" - separator = "," - fields = ["field1", "field2"] -} -``` - -</TabItem> -<TabItem value="flink"> - -</TabItem> -</Tabs> - -Use `Split` as udf in sql. - -```bash - # This just created a udf called split - Split{ - separator = "#" - fields = ["name","age"] - } - # Use the split function (confirm that the fake table exists) - sql { - sql = "select * from (select raw_message,split(raw_message) as info_row from fake) t1" - } -``` diff --git a/versioned_docs/version-2.3.0/transform/sql.md b/versioned_docs/version-2.3.0/transform/sql.md deleted file mode 100644 index c06e6357cf..0000000000 --- a/versioned_docs/version-2.3.0/transform/sql.md +++ /dev/null @@ -1,61 +0,0 @@ -# Sql - -> Sql transform plugin - -## Description - -Use SQL to process data and support engine's UDF function. - -:::tip - -This transform both supported by engine Spark and Flink. - -::: - -## Options - -| name | type | required | default value | -| -------------- | ------ | -------- | ------------- | -| sql | string | yes | - | -| common-options | string | no | - | - -### sql [string] - -SQL statement, the table name used in SQL configured in the `Source` or `Transform` plugin - -### common options [string] - -Transform plugin common parameters, please refer to [Transform Plugin](common-options.mdx) for details - -## Examples - -### Simple Select - -Use the SQL plugin for field deletion. Only the `username` and `address` fields are reserved, and the remaining fields will be discarded. `user_info` is the `result_table_name` configured by the previous plugin - -```bash -sql { - sql = "select username, address from user_info", -} -``` - -### Use UDF - -Use SQL plugin for data processing, use `substring` functions to intercept the `telephone` field - -```bash -sql { - sql = "select substring(telephone, 0, 10) from user_info", -} -``` - -### Use UDAF - -Use SQL plugin for data aggregation, use avg functions to perform aggregation operations on the original data set, and take out the average value of the `age` field - -```bash -sql { - sql = "select avg(age) from user_info", -} -``` - diff --git a/versioned_docs/version-2.3.0/transform/udf.md b/versioned_docs/version-2.3.0/transform/udf.md deleted file mode 100644 index 21a37e0e13..0000000000 --- a/versioned_docs/version-2.3.0/transform/udf.md +++ /dev/null @@ -1,44 +0,0 @@ -# udf - -> UDF transform plugin - -## Description - -Supports using UDF in data integration by the transform. -Need to specify the function name and class name and put UDF jars in Flink's classpath or import them via 'Flink run -c xxx.jar' - -:::tip - -This transform **ONLY** supported by Flink. - -::: - -## Options - -| name | type | required | default value | -| -------------- | ----------- | -------- | ------------- | -| function | string | yes | - | - -### function [string] - -A config prefix, use like `function.test="xxx.Test"`. - -### common options [string] - -Transform plugin common parameters, please refer to [Transform Plugin](common-options.mdx) for details - -## Examples - -Use `udf` in sql. - -```bash - udf { - function.test_1 = "com.example.udf.flink.TestUDF" - function.test_2 = "com.example.udf.flink.TestUDTF" - } - - # Use the specify function (confirm that the fake table exists) - sql { - sql = "select test_1(name), age from fake" - } -``` diff --git a/versioned_docs/version-2.3.0/transform/uuid.md b/versioned_docs/version-2.3.0/transform/uuid.md deleted file mode 100644 index 6be962dee6..0000000000 --- a/versioned_docs/version-2.3.0/transform/uuid.md +++ /dev/null @@ -1,64 +0,0 @@ -# UUID - -> UUID transform plugin - -## Description - -Generate a universally unique identifier on a specified field. - -:::tip - -This transform **ONLY** supported by Spark. - -::: - -## Options - -| name | type | required | default value | -| -------------- | ------ | -------- | ------------- | -| fields | string | yes | - | -| prefix | string | no | - | -| secure | boolean| no | false | - -### field [string] - -The name of the field to generate. - -### prefix [string] - -The prefix string constant to prepend to each generated UUID. - -### secure [boolean] - -the cryptographically secure algorithm can be comparatively slow -The nonSecure algorithm uses a secure random seed but is otherwise deterministic - -### common options [string] - -Transform plugin common parameters, please refer to [Transform Plugin](common-options.mdx) for details - -## Examples - -```bash - UUID { - fields = "u" - prefix = "uuid-" - secure = true - } -} -``` - -Use `UUID` as udf in sql. - -```bash - UUID { - fields = "u" - prefix = "uuid-" - secure = true - } - - # Use the uuid function (confirm that the fake table exists) - sql { - sql = "select * from (select raw_message, UUID() as info_row from fake) t1" - } -``` diff --git a/versioned_sidebars/version-2.3.0-sidebars.json b/versioned_sidebars/version-2.3.0-sidebars.json index 1bc94957a1..a4eaf23d1a 100644 --- a/versioned_sidebars/version-2.3.0-sidebars.json +++ b/versioned_sidebars/version-2.3.0-sidebars.json @@ -93,19 +93,19 @@ }, { "type": "category", - "label": "Transform", + "label": "Transform-V2", "link": { "type": "generated-index", - "title": "Transform of SeaTunnel", - "description": "List all transform supported Apache SeaTunnel for now.", - "slug": "/category/transform", - "keywords": ["transform"], + "title": "Transform V2 of SeaTunnel", + "description": "List all transform v2 supported Apache SeaTunnel for now.", + "slug": "/category/transform-v2", + "keywords": ["transform-v2"], "image": "/img/favicon.ico" }, "items": [ { "type": "autogenerated", - "dirName": "transform" + "dirName": "transform-v2" } ] },
