This is an automated email from the ASF dual-hosted git repository.
jark pushed a commit to branch release-1.10
in repository https://gitbox.apache.org/repos/asf/flink.git
The following commit(s) were added to refs/heads/release-1.10 by this push:
new 4a8331f [FLINK-15385][table][docs-zh] Translate "SQL" pages of Table
API into Chinese
4a8331f is described below
commit 4a8331fd6a78cb6a9d14fd3010eaef51b3a036f1
Author: Shaobin.Ou <[email protected]>
AuthorDate: Thu Jan 2 01:16:03 2020 +0800
[FLINK-15385][table][docs-zh] Translate "SQL" pages of Table API into
Chinese
This closes #10738
---
docs/dev/table/sql/alter.md | 6 +-
docs/dev/table/sql/alter.zh.md | 63 ++---
docs/dev/table/sql/create.md | 10 +-
docs/dev/table/sql/create.zh.md | 130 ++++++-----
docs/dev/table/sql/drop.md | 3 +
docs/dev/table/sql/drop.zh.md | 58 ++---
docs/dev/table/sql/index.md | 6 +-
docs/dev/table/sql/index.zh.md | 30 +--
docs/dev/table/sql/queries.zh.md | 493 +++++++++++++++++++--------------------
9 files changed, 410 insertions(+), 389 deletions(-)
diff --git a/docs/dev/table/sql/alter.md b/docs/dev/table/sql/alter.md
index 1cf2b92..4cba082 100644
--- a/docs/dev/table/sql/alter.md
+++ b/docs/dev/table/sql/alter.md
@@ -148,14 +148,18 @@ ALTER [TEMPORARY|TEMPORARY SYSTEM] FUNCTION
Alter a catalog function with the new identifier which is full classpath for
JAVA/SCALA and optional language tag. If a function doesn't exist in the
catalog, an exception is thrown.
**TEMPORARY**
+
Alter temporary catalog function that has catalog and database namespaces and
overrides catalog functions.
**TEMPORARY SYSTEM**
+
Alter temporary system function that has no namespace and overrides built-in
functions
**IF EXISTS**
+
If the function doesn't exist, nothing happens.
-**LANGUAGE JAVA|SCALA**
+**LANGUAGE JAVA\|SCALA**
+
Language tag to instruct flink runtime how to execute the function. Currently
only JAVA and SCALA are supported, the default language for a function is JAVA.
diff --git a/docs/dev/table/sql/alter.zh.md b/docs/dev/table/sql/alter.zh.md
index 4fed734..def2862 100644
--- a/docs/dev/table/sql/alter.zh.md
+++ b/docs/dev/table/sql/alter.zh.md
@@ -1,5 +1,5 @@
---
-title: "ALTER Statements"
+title: "ALTER 语句"
nav-parent_id: sql
nav-pos: 4
---
@@ -25,19 +25,19 @@ under the License.
* This will be replaced by the TOC
{:toc}
-ALTER statements are used to modified a registered table/view/function
definition in the [Catalog]({{ site.baseurl }}/dev/table/catalogs.html).
+ALTER 语句用于修改一个已经在 [Catalog]({{ site.baseurl }}/zh/dev/table/catalogs.html)
中注册的表、视图或函数定义。
-Flink SQL supports the following ALTER statements for now:
+Flink SQL 目前支持以下 ALTER 语句:
- ALTER TABLE
- ALTER DATABASE
- ALTER FUNCTION
-## Run an ALTER statement
+## 执行 ALTER 语句
-ALTER statements can be executed with the `sqlUpdate()` method of the
`TableEnvironment`, or executed in [SQL CLI]({{ site.baseurl
}}/dev/table/sqlClient.html). The `sqlUpdate()` method returns nothing for a
successful ALTER operation, otherwise will throw an exception.
+可以使用 `TableEnvironment` 中的 `sqlUpdate()` 方法执行 ALTER 语句,也可以在 [SQL CLI]({{
site.baseurl }}/zh/dev/table/sqlClient.html) 中执行 ALTER 语句。 若 ALTER
操作执行成功,`sqlUpdate()` 方法不返回任何内容,否则会抛出异常。
-The following examples show how to run an ALTER statement in
`TableEnvironment` and in SQL CLI.
+以下的例子展示了如何在 `TableEnvironment` 和 SQL CLI 中执行一个 ALTER 语句。
<div class="codetabs" markdown="1">
<div data-lang="java" markdown="1">
@@ -45,16 +45,16 @@ The following examples show how to run an ALTER statement
in `TableEnvironment`
EnvironmentSettings settings = EnvironmentSettings.newInstance()...
TableEnvironment tableEnv = TableEnvironment.create(settings);
-// register a table named "Orders"
+// 注册名为 “Orders” 的表
tableEnv.sqlUpdate("CREATE TABLE Orders (`user` BIGINT, product STRING, amount
INT) WITH (...)");
-// a string array: ["Orders"]
+// 字符串数组: ["Orders"]
String[] tables = tableEnv.listTable();
-// rename "Orders" to "NewOrders"
+// 把 “Orders” 的表名改为 “NewOrders”
tableEnv.sqlUpdate("ALTER TABLE Orders RENAME TO NewOrders;");
-// a string array: ["NewOrders"]
+// 字符串数组:["NewOrders"]
String[] tables = tableEnv.listTable();
{% endhighlight %}
</div>
@@ -64,16 +64,16 @@ String[] tables = tableEnv.listTable();
val settings = EnvironmentSettings.newInstance()...
val tableEnv = TableEnvironment.create(settings)
-// register a table named "Orders"
+// 注册名为 “Orders” 的表
tableEnv.sqlUpdate("CREATE TABLE Orders (`user` BIGINT, product STRING, amount
INT) WITH (...)");
-// a string array: ["Orders"]
+// 字符串数组: ["Orders"]
val tables = tableEnv.listTable()
-// rename "Orders" to "NewOrders"
+// 把 “Orders” 的表名改为 “NewOrders”
tableEnv.sqlUpdate("ALTER TABLE Orders RENAME TO NewOrders;")
-// a string array: ["NewOrders"]
+// 字符串数组:["NewOrders"]
val tables = tableEnv.listTable()
{% endhighlight %}
</div>
@@ -83,13 +83,13 @@ val tables = tableEnv.listTable()
settings = EnvironmentSettings.newInstance()...
table_env = TableEnvironment.create(settings)
-# a string array: ["Orders"]
+# 字符串数组: ["Orders"]
tables = tableEnv.listTable()
-# rename "Orders" to "NewOrders"
+# 把 “Orders” 的表名改为 “NewOrders”
tableEnv.sqlUpdate("ALTER TABLE Orders RENAME TO NewOrders;")
-# a string array: ["NewOrders"]
+# 字符串数组:["NewOrders"]
tables = tableEnv.listTable()
{% endhighlight %}
</div>
@@ -113,21 +113,21 @@ NewOrders
## ALTER TABLE
-* Rename Table
+* 重命名表
{% highlight sql %}
ALTER TABLE [catalog_name.][db_name.]table_name RENAME TO new_table_name
{% endhighlight %}
-Rename the given table name to another new table name.
+把原有的表名更改为新的表名。
-* Set or Alter Table Properties
+* 设置或修改表属性
{% highlight sql %}
ALTER TABLE [catalog_name.][db_name.]table_name SET (key1=val1, key2=val2, ...)
{% endhighlight %}
-Set one or more properties in the specified table. If a particular property is
already set in the table, override the old value with the new one.
+为指定的表设置一个或多个属性。若个别属性已经存在于表中,则使用新的值覆盖旧的值。
## ALTER DATABASE
@@ -135,27 +135,30 @@ Set one or more properties in the specified table. If a
particular property is a
ALTER DATABASE [catalog_name.]db_name SET (key1=val1, key2=val2, ...)
{% endhighlight %}
-Set one or more properties in the specified database. If a particular property
is already set in the database, override the old value with the new one.
+在数据库中设置一个或多个属性。若个别属性已经在数据库中设定,将会使用新值覆盖旧值。
## ALTER FUNCTION
{% highlight sql%}
-ALTER [TEMPORARY|TEMPORARY SYSTEM] FUNCTION
- [IF EXISTS] [catalog_name.][db_name.]function_name
+ALTER [TEMPORARY|TEMPORARY SYSTEM] FUNCTION
+ [IF EXISTS] [catalog_name.][db_name.]function_name
AS identifier [LANGUAGE JAVA|SCALA|
{% endhighlight %}
-Alter a catalog function that has catalog and database namespaces with the new
identifier which is full classpath for JAVA/SCALA and optional language tag. If
a function doesn't exist in the catalog, an exception is thrown.
+修改一个有 catalog 和数据库命名空间的 catalog function ,其需要指定 JAVA / SCALA 或其他 language tag
完整的 classpath。若函数不存在,删除会抛出异常。
**TEMPORARY**
-Alter temporary catalog function that has catalog and database namespaces and
overrides catalog functions.
+
+修改一个有 catalog 和数据库命名空间的临时 catalog function ,并覆盖原有的 catalog function 。
**TEMPORARY SYSTEM**
-Alter temporary system function that has no namespace and overrides built-in
functions
+
+修改一个没有数据库命名空间的临时系统 catalog function ,并覆盖系统内置的函数。
**IF EXISTS**
-If the function doesn't exist, nothing happens.
-**LANGUAGE JAVA|SCALA**
-Language tag to instruct flink runtime how to execute the function. Currently
only JAVA and SCALA are supported, the default language for a function is JAVA.
+若函数不存在,则不进行任何操作。
+
+**LANGUAGE JAVA\|SCALA**
+Language tag 用于指定 Flink runtime 如何执行这个函数。目前,只支持 JAVA 和 SCALA,且函数的默认语言为 JAVA。
\ No newline at end of file
diff --git a/docs/dev/table/sql/create.md b/docs/dev/table/sql/create.md
index 2070db9..a7fbd3a 100644
--- a/docs/dev/table/sql/create.md
+++ b/docs/dev/table/sql/create.md
@@ -175,7 +175,7 @@ Flink provides several commonly used watermark strategies.
- Ascending timestamps: `WATERMARK FOR rowtime_column AS rowtime_column -
INTERVAL '0.001' SECOND`.
- Emits a watermark of the maximum observed timestamp so far minus 1. Rows
that have a timestamp equal to the max timestamp are not late.
+ Emits a watermark of the maximum observed timestamp so far minus 1. Rows
that have a timestamp equal and smaller to the max timestamp are not late.
- Bounded out of orderness timestamps: `WATERMARK FOR rowtime_column AS
rowtimeField - INTERVAL 'string' timeUnit`.
@@ -237,13 +237,17 @@ CREATE [TEMPORARY|TEMPORARY SYSTEM] FUNCTION
Create a catalog function that has catalog and database namespaces with the
identifier which is full classpath for JAVA/SCALA and optional language tag. If
a function with the same name already exists in the catalog, an exception is
thrown.
**TEMPORARY**
+
Create temporary catalog function that has catalog and database namespaces and
overrides catalog functions.
**TEMPORARY SYSTEM**
+
Create temporary system function that has no namespace and overrides built-in
functions
**IF NOT EXISTS**
+
If the function already exists, nothing happens.
-**LANGUAGE JAVA|SCALA**
-Language tag to instruct flink runtime how to execute the function. Currently
only JAVA and SCALA are supported, the default language for a function is JAVA.
+**LANGUAGE JAVA\|SCALA**
+
+Language tag to instruct Flink runtime how to execute the function. Currently
only JAVA and SCALA are supported, the default language for a function is JAVA.
diff --git a/docs/dev/table/sql/create.zh.md b/docs/dev/table/sql/create.zh.md
index 34a680b..fcc6b22 100644
--- a/docs/dev/table/sql/create.zh.md
+++ b/docs/dev/table/sql/create.zh.md
@@ -1,5 +1,5 @@
---
-title: "CREATE Statements"
+title: "CREATE 语句"
nav-parent_id: sql
nav-pos: 2
---
@@ -25,19 +25,19 @@ under the License.
* This will be replaced by the TOC
{:toc}
-CREATE statements are used to register a table/view/function into current or
specified [Catalog]({{ site.baseurl }}/dev/table/catalogs.html). A registered
table/view/function can be used in SQL queries.
+CREATE 语句用于向当前或指定的 [Catalog]({{ site.baseurl }}/zh/dev/table/catalogs.html)
中注册表、视图或函数。注册后的表、视图和函数可以在 SQL 查询中使用。
-Flink SQL supports the following CREATE statements for now:
+目前 Flink SQL 支持下列 CREATE 语句:
- CREATE TABLE
- CREATE DATABASE
- CREATE FUNCTION
-## Run a CREATE statement
+## 执行 CREATE 语句
-CREATE statements can be executed with the `sqlUpdate()` method of the
`TableEnvironment`, or executed in [SQL CLI]({{ site.baseurl
}}/dev/table/sqlClient.html). The `sqlUpdate()` method returns nothing for a
successful CREATE operation, otherwise will throw an exception.
+可以使用 `TableEnvironment` 中的 `sqlUpdate()` 方法执行 CREATE 语句,也可以在 [SQL CLI]({{
site.baseurl }}/zh/dev/table/sqlClient.html) 中执行 CREATE 语句。 若 CREATE
操作执行成功,`sqlUpdate()` 方法不返回任何内容,否则会抛出异常。
-The following examples show how to run a CREATE statement in
`TableEnvironment` and in SQL CLI.
+以下的例子展示了如何在 `TableEnvironment` 和 SQL CLI 中执行一个 CREATE 语句。
<div class="codetabs" markdown="1">
<div data-lang="java" markdown="1">
@@ -45,17 +45,17 @@ The following examples show how to run a CREATE statement
in `TableEnvironment`
EnvironmentSettings settings = EnvironmentSettings.newInstance()...
TableEnvironment tableEnv = TableEnvironment.create(settings);
-// SQL query with a registered table
-// register a table named "Orders"
+// 对已经已经注册的表进行 SQL 查询
+// 注册名为 “Orders” 的表
tableEnv.sqlUpdate("CREATE TABLE Orders (`user` BIGINT, product STRING, amount
INT) WITH (...)");
-// run a SQL query on the Table and retrieve the result as a new Table
+// 在表上执行 SQL 查询,并把得到的结果作为一个新的表
Table result = tableEnv.sqlQuery(
"SELECT product, amount FROM Orders WHERE product LIKE '%Rubber%'");
-// SQL update with a registered table
-// register a TableSink
+// SQL 对已注册的表进行 update 操作
+// 注册 TableSink
tableEnv.sqlUpdate("CREATE TABLE RubberOrders(product STRING, amount INT) WITH
(...)");
-// run a SQL update query on the Table and emit the result to the TableSink
+// 在表上执行 SQL 更新查询并向 TableSink 发出结果
tableEnv.sqlUpdate(
"INSERT INTO RubberOrders SELECT product, amount FROM Orders WHERE product
LIKE '%Rubber%'");
{% endhighlight %}
@@ -66,17 +66,17 @@ tableEnv.sqlUpdate(
val settings = EnvironmentSettings.newInstance()...
val tableEnv = TableEnvironment.create(settings)
-// SQL query with a registered table
-// register a table named "Orders"
+// 对已经已经注册的表进行 SQL 查询
+// 注册名为 “Orders” 的表
tableEnv.sqlUpdate("CREATE TABLE Orders (`user` BIGINT, product STRING, amount
INT) WITH (...)");
-// run a SQL query on the Table and retrieve the result as a new Table
+// 在表上执行 SQL 查询,并把得到的结果作为一个新的表
val result = tableEnv.sqlQuery(
"SELECT product, amount FROM Orders WHERE product LIKE '%Rubber%'");
-// SQL update with a registered table
-// register a TableSink
+// SQL 对已注册的表进行 update 操作
+// 注册 TableSink
tableEnv.sqlUpdate("CREATE TABLE RubberOrders(product STRING, amount INT) WITH
('connector.path'='/path/to/file' ...)");
-// run a SQL update query on the Table and emit the result to the TableSink
+// 在表上执行 SQL 更新查询并向 TableSink 发出结果
tableEnv.sqlUpdate(
"INSERT INTO RubberOrders SELECT product, amount FROM Orders WHERE product
LIKE '%Rubber%'")
{% endhighlight %}
@@ -87,17 +87,17 @@ tableEnv.sqlUpdate(
settings = EnvironmentSettings.newInstance()...
table_env = TableEnvironment.create(settings)
-# SQL query with a registered table
-# register a table named "Orders"
+# 对已经已经注册的表进行 SQL 查询
+# 注册名为 “Orders” 的表
tableEnv.sqlUpdate("CREATE TABLE Orders (`user` BIGINT, product STRING, amount
INT) WITH (...)");
-# run a SQL query on the Table and retrieve the result as a new Table
+# 在表上执行 SQL 查询,并把得到的结果作为一个新的表
result = tableEnv.sqlQuery(
"SELECT product, amount FROM Orders WHERE product LIKE '%Rubber%'");
-# SQL update with a registered table
-# register a TableSink
+# SQL 对已注册的表进行 update 操作
+# 注册 TableSink
table_env.sql_update("CREATE TABLE RubberOrders(product STRING, amount INT)
WITH (...)")
-# run a SQL update query on the Table and emit the result to the TableSink
+# 在表上执行 SQL 更新查询并向 TableSink 发出结果
table_env \
.sql_update("INSERT INTO RubberOrders SELECT product, amount FROM Orders
WHERE product LIKE '%Rubber%'")
{% endhighlight %}
@@ -119,7 +119,7 @@ Flink SQL> INSERT INTO RubberOrders SELECT product, amount
FROM Orders WHERE pro
{% top %}
-## CREATE TABLE
+## CREATE TABLE
{% highlight sql %}
CREATE TABLE [catalog_name.][db_name.]table_name
@@ -142,44 +142,48 @@ CREATE TABLE [catalog_name.][db_name.]table_name
{% endhighlight %}
-Creates a table with the given name. If a table with the same name already
exists in the catalog, an exception is thrown.
+根据指定的表名创建一个表,如果同名表已经在 catalog 中存在了,则无法注册。
**COMPUTED COLUMN**
-A computed column is a virtual column that is generated using the syntax
"`column_name AS computed_column_expression`". It is generated from a non-query
expression that uses other columns in the same table and is not physically
stored within the table. For example, a computed column could be defined as
`cost AS price * quantity`. The expression may contain any combination of
physical column, constant, function, or variable. The expression cannot contain
a subquery.
+计算列是一个使用 “`column_name AS computed_column_expression`”
语法生成的虚拟列。它由使用同一表中其他列的非查询表达式生成,并且不会在表中进行物理存储。例如,一个计算列可以使用 `cost AS price *
quantity` 进行定义,这个表达式可以包含物理列、常量、函数或变量的任意组合,但这个表达式不能存在任何子查询。
-Computed columns are commonly used in Flink for defining [time attributes]({{
site.baseurl}}/dev/table/streaming/time_attributes.html) in CREATE TABLE
statements.
-A [processing time attribute]({{
site.baseurl}}/dev/table/streaming/time_attributes.html#processing-time) can be
defined easily via `proc AS PROCTIME()` using the system `PROCTIME()` function.
-On the other hand, computed column can be used to derive event time column
because an event time column may need to be derived from existing fields, e.g.
the original field is not `TIMESTAMP(3)` type or is nested in a JSON string.
+在 Flink 中计算列一般用于为 CREATE TABLE 语句定义 [时间属性]({{
site.baseurl}}/zh/dev/table/streaming/time_attributes.html)。
+[处理时间属性]({{
site.baseurl}}/zh/dev/table/streaming/time_attributes.html#processing-time)
可以简单地通过使用了系统函数 `PROCTIME()` 的 `proc AS PROCTIME()` 语句进行定义。
+另一方面,由于事件时间列可能需要从现有的字段中获得,因此计算列可用于获得事件时间列。例如,原始字段的类型不是 `TIMESTAMP(3)` 或嵌套在
JSON 字符串中。
-Notes:
+注意:
-- A computed column defined on a source table is computed after reading from
the source, it can be used in the following SELECT query statements.
-- A computed column cannot be the target of an INSERT statement. In INSERT
statements, the schema of SELECT clause should match the schema of the target
table without computed columns.
+- 定义在一个数据源表( source table )上的计算列会在从数据源读取数据后被计算,它们可以在 SELECT 查询语句中使用。
+- 计算列不可以作为 INSERT 语句的目标,在 INSERT 语句中,SELECT 语句的 schema 需要与目标表不带有计算列的 schema 一致。
**WATERMARK**
-The `WATERMARK` defines the event time attributes of a table and takes the
form `WATERMARK FOR rowtime_column_name AS watermark_strategy_expression`.
+`WATERMARK` 定义了表的事件时间属性,其形式为 `WATERMARK FOR rowtime_column_name AS
watermark_strategy_expression` 。
-The `rowtime_column_name` defines an existing column that is marked as the
event time attribute of the table. The column must be of type `TIMESTAMP(3)`
and be a top-level column in the schema. It may be a computed column.
+`rowtime_column_name` 把一个现有的列定义为一个为表标记事件时间的属性。该列的类型必须为 `TIMESTAMP(3)`,且是
schema 中的顶层列,它也可以是一个计算列。
-The `watermark_strategy_expression` defines the watermark generation strategy.
It allows arbitrary non-query expression, including computed columns, to
calculate the watermark. The expression return type must be TIMESTAMP(3), which
represents the timestamp since the Epoch.
+`watermark_strategy_expression` 定义了 watermark 的生成策略。它允许使用包括计算列在内的任意非查询表达式来计算
watermark ;表达式的返回类型必须是 `TIMESTAMP(3)`,表示了从 Epoch 以来的经过的时间。
+返回的 watermark 只有当其不为空且其值大于之前发出的本地 watermark 时才会被发出(以保证 watermark 递增)。每条记录的
watermark 生成表达式计算都会由框架完成。
+框架会定期发出所生成的最大的 watermark ,如果当前 watermark 仍然与前一个 watermark 相同、为空、或返回的 watermark
的值小于最后一个发出的 watermark ,则新的 watermark 不会被发出。
+Watermark 根据 [`pipeline.auto-watermark-interval`]({{ site.baseurl
}}/zh/ops/config.html#pipeline-auto-watermark-interval) 中所配置的间隔发出。
+若 watermark 的间隔是 `0ms` ,那么每条记录都会产生一个 watermark,且 watermark 会在不为空并大于上一个发出的
watermark 时发出。
-When using event time semantics, tables must contain an event time attribute
and watermarking strategy.
+使用事件时间语义时,表必须包含事件时间属性和 watermark 策略。
-Flink provides several commonly used watermark strategies.
+Flink 提供了几种常用的 watermark 策略。
-- Strictly ascending timestamps: `WATERMARK FOR rowtime_column AS
rowtime_column`.
+- 严格递增时间戳: `WATERMARK FOR rowtime_column AS rowtime_column`。
- Emits a watermark of the maximum observed timestamp so far. Rows that have a
timestamp smaller to the max timestamp are not late.
+ 发出到目前为止已观察到的最大时间戳的 watermark ,时间戳小于最大时间戳的行被认为没有迟到。
-- Ascending timestamps: `WATERMARK FOR rowtime_column AS rowtime_column -
INTERVAL '0.001' SECOND`.
+- 递增时间戳: `WATERMARK FOR rowtime_column AS rowtime_column - INTERVAL '0.001'
SECOND`。
- Emits a watermark of the maximum observed timestamp so far minus 1. Rows
that have a timestamp equal to the max timestamp are not late.
+ 发出到目前为止已观察到的最大时间戳减 1 的 watermark ,时间戳等于或小于最大时间戳的行被认为没有迟到。
-- Bounded out of orderness timestamps: `WATERMARK FOR rowtime_column AS
rowtimeField - INTERVAL 'string' timeUnit`.
+- 有界乱序时间戳: `WATERMARK FOR rowtime_column AS rowtimeField - INTERVAL 'string'
timeUnit`。
- Emits watermarks, which are the maximum observed timestamp minus the
specified delay, e.g., `WATERMARK FOR rowtime_column AS rowtimeField - INTERVAL
'5' SECOND` is a 5 seconds delayed watermark strategy.
+ 发出到目前为止已观察到的最大时间戳减去指定延迟的 watermark ,例如, `WATERMARK FOR rowtime_column AS
rowtimeField - INTERVAL '5' SECOND` 是一个 5 秒延迟的 watermark 策略。
{% highlight sql %}
CREATE TABLE Orders (
@@ -192,17 +196,17 @@ CREATE TABLE Orders (
**PARTITIONED BY**
-Partition the created table by the specified columns. A directory is created
for each partition if this table is used as a filesystem sink.
+根据指定的列对已经创建的表进行分区。若表使用 filesystem sink ,则将会为每个分区创建一个目录。
**WITH OPTIONS**
-Table properties used to create a table source/sink. The properties are
usually used to find and create the underlying connector.
+表属性用于创建 table source/sink ,一般用于寻找和创建底层的连接器。
-The key and value of expression `key1=val1` should both be string literal. See
details in [Connect to External Systems]({{ site.baseurl
}}/dev/table/connect.html) for all the supported table properties of different
connectors.
+表达式 `key1=val1` 的键和值必须为字符串文本常量。请参考 [连接外部系统]({ site.baseurl
}}/zh/dev/table/connect.html) 了解不同连接器所支持的属性。
-**Notes:** The table name can be of three formats: 1.
`catalog_name.db_name.table_name` 2. `db_name.table_name` 3. `table_name`. For
`catalog_name.db_name.table_name`, the table would be registered into metastore
with catalog named "catalog_name" and database named "db_name"; for
`db_name.table_name`, the table would be registered into the current catalog of
the execution table environment and database named "db_name"; for `table_name`,
the table would be registered into the current cata [...]
+**注意:** 表名可以为以下三种格式 1. `catalog_name.db_name.table_name` 2.
`db_name.table_name` 3. `table_name`。使用`catalog_name.db_name.table_name`
的表将会与名为 "catalog_name" 的 catalog 和名为 "db_name" 的数据库一起注册到 metastore 中。使用
`db_name.table_name` 的表将会被注册到当前执行的 table environment 中的 catalog 且数据库会被命名为
"db_name";对于 `table_name`, 数据表将会被注册到当前正在运行的catalog和数据库中。
-**Notes:** The table registered with `CREATE TABLE` statement can be used as
both table source and table sink, we can not decide if it is used as a source
or sink until it is referenced in the DMLs.
+**注意:** 使用 `CREATE TABLE` 语句注册的表均可用作 table source 和 table sink。 在被 DML
语句引用前,我们无法决定其实际用于 source 抑或是 sink。
{% top %}
@@ -214,37 +218,41 @@ CREATE DATABASE [IF NOT EXISTS] [catalog_name.]db_name
WITH (key1=val1, key2=val2, ...)
{% endhighlight %}
-Create a database with the given database properties. If a database with the
same name already exists in the catalog, an exception is thrown.
+根据给定的表属性创建数据库。若数据库中已存在同名表会抛出异常。
**IF NOT EXISTS**
-If the database already exists, nothing happens.
+若数据库已经存在,则不会进行任何操作。
**WITH OPTIONS**
-Database properties used to store extra information related to this database.
-The key and value of expression `key1=val1` should both be string literal.
+数据库属性一般用于存储关于这个数据库额外的信息。
+表达式 `key1=val1` 中的键和值都需要是字符串文本常量。
{% top %}
## CREATE FUNCTION
{% highlight sql%}
-CREATE [TEMPORARY|TEMPORARY SYSTEM] FUNCTION
- [IF NOT EXISTS] [[catalog_name.]db_name.]function_name
+CREATE [TEMPORARY|TEMPORARY SYSTEM] FUNCTION
+ [IF NOT EXISTS] [[catalog_name.]db_name.]function_name
AS identifier [LANGUAGE JAVA|SCALA]
{% endhighlight %}
-Create a catalog function that has catalog and database namespaces with the
identifier which is full classpath for JAVA/SCALA and optional language tag. If
a function with the same name already exists in the catalog, an exception is
thrown.
+创建一个有 catalog 和数据库命名空间的 catalog function ,其需要指定 JAVA / SCALA 或其他 language tag
完整的 classpath。 若 catalog 中,已经有同名的函数注册了,则无法注册。
**TEMPORARY**
-Create temporary catalog function that has catalog and database namespaces and
overrides catalog functions.
+
+创建一个有 catalog 和数据库命名空间的临时 catalog function ,并覆盖原有的 catalog function 。
**TEMPORARY SYSTEM**
-Create temporary system function that has no namespace and overrides built-in
functions
+
+创建一个没有数据库命名空间的临时系统 catalog function ,并覆盖系统内置的函数。
**IF NOT EXISTS**
-If the function already exists, nothing happens.
-**LANGUAGE JAVA|SCALA**
-Language tag to instruct flink runtime how to execute the function. Currently
only JAVA and SCALA are supported, the default language for a function is JAVA.
+若该函数已经存在,则不会进行任何操作。
+
+**LANGUAGE JAVA\|SCALA**
+
+Language tag 用于指定 Flink runtime 如何执行这个函数。目前,只支持 JAVA 和 SCALA,且函数的默认语言为 JAVA。
diff --git a/docs/dev/table/sql/drop.md b/docs/dev/table/sql/drop.md
index dd4dec9..6f340e8 100644
--- a/docs/dev/table/sql/drop.md
+++ b/docs/dev/table/sql/drop.md
@@ -152,10 +152,13 @@ DROP [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS]
[catalog_name.][db_name.]
Drop a catalog function that has catalog and database namespaces. If the
function to drop does not exist, an exception is thrown.
**TEMPORARY**
+
Drop temporary catalog function that has catalog and database namespaces.
**TEMPORARY SYSTEM**
+
Drop temporary system function that has no namespace.
**IF EXISTS**
+
If the function doesn't exists, nothing happens.
diff --git a/docs/dev/table/sql/drop.zh.md b/docs/dev/table/sql/drop.zh.md
index c42569e..51ac456 100644
--- a/docs/dev/table/sql/drop.zh.md
+++ b/docs/dev/table/sql/drop.zh.md
@@ -1,5 +1,5 @@
---
-title: "DROP Statements"
+title: "DROP 语句"
nav-parent_id: sql
nav-pos: 3
---
@@ -25,18 +25,19 @@ under the License.
* This will be replaced by the TOC
{:toc}
-DROP statements are used to remove a registered table/view/function from
current or specified [Catalog]({{ site.baseurl }}/dev/table/catalogs.html).
+DROP 语句用于从当前或指定的 [Catalog]({{ site.baseurl }}/zh/dev/table/catalogs.html)
中删除一个已经注册的表、视图或函数。
-Flink SQL supports the following DROP statements for now:
+Flink SQL 目前支持以下 DROP 语句:
- DROP TABLE
- DROP DATABASE
+- DROP FUNCTION
-## Run a DROP statement
+## 执行 DROP 语句
-DROP statements can be executed with the `sqlUpdate()` method of the
`TableEnvironment`, or executed in [SQL CLI]({{ site.baseurl
}}/dev/table/sqlClient.html). The `sqlUpdate()` method returns nothing for a
successful DROP operation, otherwise will throw an exception.
+可以使用 `TableEnvironment` 中的 `sqlUpdate()` 方法执行 DROP 语句,也可以在 [SQL CLI]({{
site.baseurl }}/zh/dev/table/sqlClient.html) 中执行 DROP 语句。 若 DROP
操作执行成功,`sqlUpdate()` 方法不返回任何内容,否则会抛出异常。
-The following examples show how to run a DROP statement in `TableEnvironment`
and in SQL CLI.
+以下的例子展示了如何在 `TableEnvironment` 和 SQL CLI 中执行一个 DROP 语句。
<div class="codetabs" markdown="1">
<div data-lang="java" markdown="1">
@@ -44,16 +45,16 @@ The following examples show how to run a DROP statement in
`TableEnvironment` an
EnvironmentSettings settings = EnvironmentSettings.newInstance()...
TableEnvironment tableEnv = TableEnvironment.create(settings);
-// register a table named "Orders"
+// 注册名为 “Orders” 的表
tableEnv.sqlUpdate("CREATE TABLE Orders (`user` BIGINT, product STRING, amount
INT) WITH (...)");
-// a string array: ["Orders"]
+// 字符串数组: ["Orders"]
String[] tables = tableEnv.listTable();
-// drop "Orders" table from catalog
+// 从 catalog 删除 “Orders” 表
tableEnv.sqlUpdate("DROP TABLE Orders");
-// an empty string array
+// 空字符串数组
String[] tables = tableEnv.listTable();
{% endhighlight %}
</div>
@@ -63,16 +64,16 @@ String[] tables = tableEnv.listTable();
val settings = EnvironmentSettings.newInstance()...
val tableEnv = TableEnvironment.create(settings)
-// register a table named "Orders"
+// 注册名为 “Orders” 的表
tableEnv.sqlUpdate("CREATE TABLE Orders (`user` BIGINT, product STRING, amount
INT) WITH (...)");
-// a string array: ["Orders"]
+// 字符串数组: ["Orders"]
val tables = tableEnv.listTable()
-// drop "Orders" table from catalog
+// 从 catalog 删除 “Orders” 表
tableEnv.sqlUpdate("DROP TABLE Orders")
-// an empty string array
+// 空字符串数组
val tables = tableEnv.listTable()
{% endhighlight %}
</div>
@@ -82,13 +83,13 @@ val tables = tableEnv.listTable()
settings = EnvironmentSettings.newInstance()...
table_env = TableEnvironment.create(settings)
-# a string array: ["Orders"]
+# 字符串数组: ["Orders"]
tables = tableEnv.listTable()
-# drop "Orders" table from catalog
+# 从 catalog 删除 “Orders” 表
tableEnv.sqlUpdate("DROP TABLE Orders")
-# an empty string array
+# 空字符串数组
tables = tableEnv.listTable()
{% endhighlight %}
</div>
@@ -116,11 +117,11 @@ Flink SQL> SHOW TABLES;
DROP TABLE [IF EXISTS] [catalog_name.][db_name.]table_name
{% endhighlight %}
-Drop a table with the given table name. If the table to drop does not exist,
an exception is thrown.
+根据给定的表名删除某个表。若需要删除的表不存在,则抛出异常。
**IF EXISTS**
-If the table does not exist, nothing happens.
+表不存在时不会进行任何操作。
## DROP DATABASE
@@ -128,19 +129,19 @@ If the table does not exist, nothing happens.
DROP DATABASE [IF EXISTS] [catalog_name.]db_name [ (RESTRICT | CASCADE) ]
{% endhighlight %}
-Drop a database with the given database name. If the database to drop does not
exist, an exception is thrown.
+根据给定的表名删除数据库。若需要删除的数据库不存在会抛出异常 。
**IF EXISTS**
-If the database does not exist, nothing happens.
+若数据库不存在,不执行任何操作。
**RESTRICT**
-Dropping a non-empty database triggers an exception. Enabled by default.
+当删除一个非空数据库时,会触发异常。(默认为开)
**CASCADE**
-Dropping a non-empty database also drops all associated tables and functions.
+删除一个非空数据库时,把相关联的表与函数一并删除。
## DROP FUNCTION
@@ -148,13 +149,16 @@ Dropping a non-empty database also drops all associated
tables and functions.
DROP [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS]
[catalog_name.][db_name.]function_name;
{% endhighlight %}
-Drop a catalog function that has catalog and database namespaces. If the
function to drop does not exist, an exception is thrown.
+删除一个有 catalog 和数据库命名空间的 catalog function。若需要删除的函数不存在,则会产生异常。
**TEMPORARY**
-Drop temporary catalog function that has catalog and database namespaces.
+
+删除一个有 catalog 和数据库命名空间的临时 catalog function。
**TEMPORARY SYSTEM**
-Drop temporary system function that has no namespace.
+
+删除一个没有数据库命名空间的临时系统函数。
**IF EXISTS**
-If the function doesn't exists, nothing happens.
+
+若函数不存在,则不会进行任何操作。
\ No newline at end of file
diff --git a/docs/dev/table/sql/index.md b/docs/dev/table/sql/index.md
index 8f8fa19..0554054 100644
--- a/docs/dev/table/sql/index.md
+++ b/docs/dev/table/sql/index.md
@@ -29,9 +29,9 @@ This page describes the SQL language supported in Flink,
including Data Definiti
This page lists all the supported statements supported in Flink SQL for now:
- [SELECT (Queries)](queries.html)
-- [CREATE TABLE, VIEW, DATABASE, FUNCTION](create.html)
-- [DROP TABLE, VIEW, DATABASE, FUNCTION](drop.html)
-- [ALTER TABLE, DATABASE](alter.html)
+- [CREATE TABLE, DATABASE, FUNCTION](create.html)
+- [DROP TABLE, DATABASE, FUNCTION](drop.html)
+- [ALTER TABLE, DATABASE, FUNCTION](alter.html)
## Data Types
diff --git a/docs/dev/table/sql/index.zh.md b/docs/dev/table/sql/index.zh.md
index 8f8fa19..493c4b5 100644
--- a/docs/dev/table/sql/index.zh.md
+++ b/docs/dev/table/sql/index.zh.md
@@ -24,34 +24,34 @@ specific language governing permissions and limitations
under the License.
-->
-This page describes the SQL language supported in Flink, including Data
Definition Language (DDL), Data Manipulation Language (DML) and Query Language.
Flink’s SQL support is based on [Apache Calcite](https://calcite.apache.org/)
which implements the SQL standard.
+本页面描述了 Flink 所支持的 SQL 语言,包括数据定义语言(Data Definition Language,DDL)、数据操纵语言(Data
Manipulation Language,DML)以及查询语言。Flink 对 SQL 的支持基于实现了 SQL 标准的 [Apache
Calcite](https://calcite.apache.org/)。
-This page lists all the supported statements supported in Flink SQL for now:
+本页面列出了目前 Flink SQL 所支持的所有语句:
-- [SELECT (Queries)](queries.html)
-- [CREATE TABLE, VIEW, DATABASE, FUNCTION](create.html)
-- [DROP TABLE, VIEW, DATABASE, FUNCTION](drop.html)
-- [ALTER TABLE, DATABASE](alter.html)
+- [SELECT (查询)](queries.html)
+- [CREATE TABLE, DATABASE, FUNCTION](create.html)
+- [DROP TABLE, DATABASE, FUNCTION](drop.html)
+- [ALTER TABLE, DATABASE, FUNCTION](alter.html)
-## Data Types
+## 数据类型
-Please see the dedicated page about [data types]({{ site.baseurl
}}/dev/table/types.html).
+请参考专门描述该主题的页面 [数据类型]({{ site.baseurl }}/zh/dev/table/types.html)。
-Generic types and (nested) composite types (e.g., POJOs, tuples, rows, Scala
case classes) can be fields of a row as well.
+通用类型与(嵌套的)符合类型 (如:POJO、tuples、rows、Scala case 类) 都可以作为行的字段。
-Fields of composite types with arbitrary nesting can be accessed with [value
access functions]({{ site.baseurl
}}/dev/table/functions/systemFunctions.html#value-access-functions).
+符合类型的字段任意的嵌套可被 [值访问函数]({{ site.baseurl
}}/zh/dev/table/functions/systemFunctions.html#value-access-functions) 访问。
-Generic types are treated as a black box and can be passed on or processed by
[user-defined functions]({{ site.baseurl }}/dev/table/functions/udfs.html).
+通用类型将会被视为一个黑箱,且可以被 [用户自定义函数]({{ site.baseurl
}}/zh/dev/table/functions/udfs.html) 传递或引用。
-For DDLs, we support full data types defined in page [Data Types]({{
site.baseurl }}/dev/table/types.html).
+对于 DDL 语句而言,我们支持所有在 [数据类型]({{ site.baseurl }}/zh/dev/table/types.html)
页面中定义的数据类型。
-**Notes:** Some of the data types are not supported in SQL queries yet (i.e.
in cast expressions or literals). E.g. `STRING`, `BYTES`, `RAW`, `TIME(p)
WITHOUT TIME ZONE`, `TIME(p) WITH LOCAL TIME ZONE`, `TIMESTAMP(p) WITHOUT TIME
ZONE`, `TIMESTAMP(p) WITH LOCAL TIME ZONE`, `ARRAY`, `MULTISET`, `ROW`.
+**注意:** SQL查询不支持部分数据类型(cast 表达式或字符常量值)。如:`STRING`, `BYTES`, `RAW`, `TIME(p)
WITHOUT TIME ZONE`, `TIME(p) WITH LOCAL TIME ZONE`, `TIMESTAMP(p) WITHOUT TIME
ZONE`, `TIMESTAMP(p) WITH LOCAL TIME ZONE`, `ARRAY`, `MULTISET`, `ROW`.
{% top %}
-## Reserved Keywords
+## 保留关键字
-Although not every SQL feature is implemented yet, some string combinations
are already reserved as keywords for future use. If you want to use one of the
following strings as a field name, make sure to surround them with backticks
(e.g. `` `value` ``, `` `count` ``).
+虽然 SQL
的特性并未完全实现,但是一些字符串的组合却已经被预留为关键字以备未来使用。如果你希望使用以下字符串作为你的字段名,请在使用时使用反引号将该字段名包起来(如
`` `value` ``, `` `count` `` )。
{% highlight sql %}
diff --git a/docs/dev/table/sql/queries.zh.md b/docs/dev/table/sql/queries.zh.md
index de8d906..67407c0 100644
--- a/docs/dev/table/sql/queries.zh.md
+++ b/docs/dev/table/sql/queries.zh.md
@@ -1,5 +1,5 @@
---
-title: "Queries"
+title: "查询语句"
nav-parent_id: sql
nav-pos: 1
---
@@ -25,17 +25,17 @@ under the License.
* This will be replaced by the TOC
{:toc}
-SELECT queries are specified with the `sqlQuery()` method of the
`TableEnvironment`. The method returns the result of the SELECT query as a
`Table`. A `Table` can be used in [subsequent SQL and Table API queries]({{
site.baseurl }}/dev/table/common.html#mixing-table-api-and-sql), be [converted
into a DataSet or DataStream]({{ site.baseurl
}}/dev/table/common.html#integration-with-datastream-and-dataset-api), or
[written to a TableSink]({{ site.baseurl }}/dev/table/common.html#emit-a-tabl
[...]
+SELECT 查询需要使用 `TableEnvironment` 的 `sqlQuery()` 方法加以指定。这个方法会以 `Table` 的形式返回
SELECT 的查询结果。 `Table` 可以被用于 [随后的SQL 与 Table API 查询]({{ site.baseurl
}}/zh/dev/table/common.html#mixing-table-api-and-sql) 、 [转换为 DataSet 或
DataStream ]({{ site.baseurl
}}/zh/dev/table/common.html#integration-with-datastream-and-dataset-api)或 [输出到
TableSink ]({{ site.baseurl }}/dev/table/common.html#emit-a-table))。SQL 与 Table
API 的查询可以进行无缝融合、整体优化并翻译为单一的程序。
-In order to access a table in a SQL query, it must be [registered in the
TableEnvironment]({{ site.baseurl
}}/dev/table/common.html#register-tables-in-the-catalog). A table can be
registered from a [TableSource]({{ site.baseurl
}}/dev/table/common.html#register-a-tablesource), [Table]({{ site.baseurl
}}/dev/table/common.html#register-a-table), [CREATE TABLE
statement](#create-table), [DataStream, or DataSet]({{ site.baseurl
}}/dev/table/common.html#register-a-datastream-or-dataset-as-tab [...]
+为了可以在 SQL 查询中访问到表,你需要先 [在 TableEnvironment 中注册表 ]({{ site.baseurl
}}/zh/dev/table/common.html#register-tables-in-the-catalog)。表可以通过
[TableSource]({{ site.baseurl
}}/zh/dev/table/common.html#register-a-tablesource)、 [Table]({{ site.baseurl
}}/zh/dev/table/common.html#register-a-table)、[CREATE TABLE 语句](create.html)、
[DataStream 或 DataSet]({{ site.baseurl
}}/zh/dev/table/common.html#register-a-datastream-or-dataset-as-table) 注册。
用户也可以通过 [向 TableEnvironment 中注册 catalog ]({{ site.baseurl }}/ [...]
-For convenience, `Table.toString()` automatically registers the table under a
unique name in its `TableEnvironment` and returns the name. So, `Table` objects
can be directly inlined into SQL queries as shown in the examples below.
+为方便起见 `Table.toString()` 将会在其 `TableEnvironment` 中自动使用一个唯一的名字注册表并返回表名。 因此,
`Table` 对象可以如下文所示样例,直接内联到 SQL 查询中。
-**Note:** Queries that include unsupported SQL features cause a
`TableException`. The supported features of SQL on batch and streaming tables
are listed in the following sections.
+**注意:** 查询若包括了不支持的 SQL 特性,将会抛出 `TableException`。批处理和流处理所支持的 SQL 特性将会在下述章节中列出。
-## Specifying a Query
+## 指定查询
-The following examples show how to specify a SQL queries on registered and
inlined tables.
+以下示例显示如何在已注册和内联表上指定 SQL 查询。
<div class="codetabs" markdown="1">
<div data-lang="java" markdown="1">
@@ -43,23 +43,23 @@ The following examples show how to specify a SQL queries on
registered and inlin
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
-// ingest a DataStream from an external source
+// 从外部数据源读取 DataStream
DataStream<Tuple3<Long, String, Integer>> ds = env.addSource(...);
-// SQL query with an inlined (unregistered) table
+// 使用 SQL 查询内联的(未注册的)表
Table table = tableEnv.fromDataStream(ds, "user, product, amount");
Table result = tableEnv.sqlQuery(
"SELECT SUM(amount) FROM " + table + " WHERE product LIKE '%Rubber%'");
-// SQL query with a registered table
-// register the DataStream as view "Orders"
+// SQL 查询一个已经注册的表
+// 根据视图 "Orders" 创建一个 DataStream
tableEnv.createTemporaryView("Orders", ds, "user, product, amount");
-// run a SQL query on the Table and retrieve the result as a new Table
+// 在表上执行 SQL 查询并得到以新表返回的结果
Table result2 = tableEnv.sqlQuery(
"SELECT product, amount FROM Orders WHERE product LIKE '%Rubber%'");
-// SQL update with a registered table
-// create and register a TableSink
+// SQL 更新一个已经注册的表
+// 创建并注册一个 TableSink
final Schema schema = new Schema()
.field("product", DataTypes.STRING())
.field("amount", DataTypes.INT());
@@ -69,7 +69,7 @@ tableEnv.connect(new FileSystem("/path/to/file"))
.withSchema(schema)
.createTemporaryTable("RubberOrders");
-// run a SQL update query on the Table and emit the result to the TableSink
+// 在表上执行更新语句并把结果发出到 TableSink
tableEnv.sqlUpdate(
"INSERT INTO RubberOrders SELECT product, amount FROM Orders WHERE product
LIKE '%Rubber%'");
{% endhighlight %}
@@ -80,23 +80,23 @@ tableEnv.sqlUpdate(
val env = StreamExecutionEnvironment.getExecutionEnvironment
val tableEnv = StreamTableEnvironment.create(env)
-// read a DataStream from an external source
+// 从外部数据源读取 DataStream
val ds: DataStream[(Long, String, Integer)] = env.addSource(...)
-// SQL query with an inlined (unregistered) table
+// 使用 SQL 查询内联的(未注册的)表
val table = ds.toTable(tableEnv, 'user, 'product, 'amount)
val result = tableEnv.sqlQuery(
s"SELECT SUM(amount) FROM $table WHERE product LIKE '%Rubber%'")
-// SQL query with a registered table
-// register the DataStream under the name "Orders"
+// SQL 查询一个已经注册的表
+// 使用名称 "Orders" 注册一个 DataStream
tableEnv.createTemporaryView("Orders", ds, 'user, 'product, 'amount)
-// run a SQL query on the Table and retrieve the result as a new Table
+// 在表上执行 SQL 查询并得到以新表返回的结果
val result2 = tableEnv.sqlQuery(
"SELECT product, amount FROM Orders WHERE product LIKE '%Rubber%'")
-// SQL update with a registered table
-// create and register a TableSink
+// 使用 SQL 更新一个已经注册的表
+// 创建并注册一个 TableSink
val schema = new Schema()
.field("product", DataTypes.STRING())
.field("amount", DataTypes.INT())
@@ -106,7 +106,7 @@ tableEnv.connect(new FileSystem("/path/to/file"))
.withSchema(schema)
.createTemporaryTable("RubberOrders")
-// run a SQL update query on the Table and emit the result to the TableSink
+// 在表上执行 SQL 更新操作,并把结果发出到 TableSink
tableEnv.sqlUpdate(
"INSERT INTO RubberOrders SELECT product, amount FROM Orders WHERE product
LIKE '%Rubber%'")
{% endhighlight %}
@@ -117,14 +117,14 @@ tableEnv.sqlUpdate(
env = StreamExecutionEnvironment.get_execution_environment()
table_env = StreamTableEnvironment.create(env)
-# SQL query with an inlined (unregistered) table
-# elements data type: BIGINT, STRING, BIGINT
+# SQL 查询内联的(未注册的)表
+# 元素数据类型: BIGINT, STRING, BIGINT
table = table_env.from_elements(..., ['user', 'product', 'amount'])
result = table_env \
.sql_query("SELECT SUM(amount) FROM %s WHERE product LIKE '%%Rubber%%'" %
table)
-# SQL update with a registered table
-# create and register a TableSink
+# SQL 更新已经注册的表
+# 创建并注册 TableSink
t_env.connect(FileSystem().path("/path/to/file")))
.with_format(Csv()
.field_delimiter(',')
@@ -134,7 +134,7 @@ t_env.connect(FileSystem().path("/path/to/file")))
.field("amount", DataTypes.BIGINT()))
.create_temporary_table("RubberOrders")
-# run a SQL update query on the Table and emit the result to the TableSink
+# 在表上执行 SQL 更新操作,并把结果发出到 TableSink
table_env \
.sql_update("INSERT INTO RubberOrders SELECT product, amount FROM Orders
WHERE product LIKE '%Rubber%'")
{% endhighlight %}
@@ -143,11 +143,11 @@ table_env \
{% top %}
-## Supported Syntax
+## 支持的语法
-Flink parses SQL using [Apache
Calcite](https://calcite.apache.org/docs/reference.html), which supports
standard ANSI SQL.
+Flink 通过支持标准 ANSI SQL的 [Apache
Calcite](https://calcite.apache.org/docs/reference.html) 解析 SQL。
-The following BNF-grammar describes the superset of supported SQL features in
batch and streaming queries. The [Operations](#operations) section shows
examples for the supported features and indicates which features are only
supported for batch or streaming queries.
+以下 BNF-语法 描述了批处理和流处理查询中所支持的 SQL 特性的超集。其中 [操作符](#操作符)
章节展示了所支持的特性的样例,并指明了哪些特性仅适用于批处理或流处理。
{% highlight sql %}
query:
@@ -270,47 +270,47 @@ patternQuantifier:
{% endhighlight %}
-Flink SQL uses a lexical policy for identifier (table, attribute, function
names) similar to Java:
+Flink SQL 对于标识符(表、属性、函数名)有类似于 Java 的词法约定:
-- The case of identifiers is preserved whether or not they are quoted.
-- After which, identifiers are matched case-sensitively.
-- Unlike Java, back-ticks allow identifiers to contain non-alphanumeric
characters (e.g. <code>"SELECT a AS `my field` FROM t"</code>).
+- 不管是否引用标识符,都保留标识符的大小写。
+- 且标识符需区分大小写。
+- 与 Java 不一样的地方在于,通过反引号,可以允许标识符带有非字母的字符(如:<code>"SELECT a AS `my field` FROM
t"</code>)。
-String literals must be enclosed in single quotes (e.g., `SELECT 'Hello
World'`). Duplicate a single quote for escaping (e.g., `SELECT 'It''s me.'`).
Unicode characters are supported in string literals. If explicit unicode code
points are required, use the following syntax:
+字符串文本常量需要被单引号包起来(如 `SELECT 'Hello World'` )。两个单引号表示转移(如 `SELECT 'It''s
me.'`)。字符串文本常量支持 Unicode 字符,如需明确使用 Unicode 编码,请使用以下语法:
-- Use the backslash (`\`) as escaping character (default): `SELECT U&'\263A'`
-- Use a custom escaping character: `SELECT U&'#263A' UESCAPE '#'`
+- 使用反斜杠(`\`)作为转义字符(默认):`SELECT U&'\263A'`
+- 使用自定义的转义字符: `SELECT U&'#263A' UESCAPE '#'`
{% top %}
-## Operations
+## 操作符
-### Show and Use
+### Show 与 Use
<div markdown="1">
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 20%">Operation</th>
- <th class="text-center">Description</th>
+ <th class="text-left" style="width: 20%">操作符</th>
+ <th class="text-center">描述</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<strong>Show</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
- <p>Show all catalogs</p>
+ <p>显示所有 catalog</p>
{% highlight sql %}
SHOW CATALOGS;
{% endhighlight %}
- <p>Show all databases in the current catalog</p>
+ <p>显示当前 catalog 中所有的数据库</p>
{% highlight sql %}
SHOW DATABASES;
{% endhighlight %}
- <p>Show all tables in the current database in the current
catalog</p>
+ <p>显示当前数据库、Catalog中的所有表</p>
{% highlight sql %}
SHOW TABLES;
{% endhighlight %}
@@ -319,14 +319,14 @@ SHOW TABLES;
<tr>
<td>
<strong>Use</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
- <p>Set current catalog for the session </p>
+ <p>为本次会话设置 catalog </p>
{% highlight sql %}
USE CATALOG mycatalog;
{% endhighlight %}
- <p>Set current database of the current catalog for the session</p>
+ <p>为会话设置一个属于当前 catalog 的数据库</p>
{% highlight sql %}
USE mydatabase;
{% endhighlight %}
@@ -336,34 +336,34 @@ USE mydatabase;
</table>
</div>
-### Scan, Projection, and Filter
+### Scan、Projection 与 Filter
<div markdown="1">
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 20%">Operation</th>
- <th class="text-center">Description</th>
+ <th class="text-left" style="width: 20%">操作符</th>
+ <th class="text-center">描述</th>
</tr>
</thead>
<tbody>
- <tr>
- <td>
+ <tr>
+ <td>
<strong>Scan / Select / As</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
- <td>
+ <td>
{% highlight sql %}
SELECT * FROM Orders
SELECT a, c AS d FROM Orders
{% endhighlight %}
</td>
- </tr>
+ </tr>
<tr>
<td>
<strong>Where / Filter</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
{% highlight sql %}
@@ -375,11 +375,11 @@ SELECT * FROM Orders WHERE a % 2 = 0
</tr>
<tr>
<td>
- <strong>User-defined Scalar Functions (Scalar UDF)</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <strong>用户定义标量函数(Scalar UDF)</strong><br>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
- <p>UDFs must be registered in the TableEnvironment. See the <a href="{{
site.baseurl }}/dev/table/functions/udfs.html">UDF documentation</a> for
details on how to specify and register scalar UDFs.</p>
+ <p>自定义函数必须事先注册到 TableEnvironment 中。 可阅读 <a href="{{ site.baseurl
}}/zh/dev/table/functions/udfs.html">自定义函数文档</a> 以获得如何指定和注册自定义函数的详细信息。</p>
{% highlight sql %}
SELECT PRETTY_PRINT(user) FROM Orders
{% endhighlight %}
@@ -391,25 +391,25 @@ SELECT PRETTY_PRINT(user) FROM Orders
{% top %}
-### Aggregations
+### 聚合
<div markdown="1">
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 20%">Operation</th>
- <th class="text-center">Description</th>
+ <th class="text-left" style="width: 20%">操作符</th>
+ <th class="text-center">描述</th>
</tr>
</thead>
<tbody>
<tr>
<td>
- <strong>GroupBy Aggregation</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span><br>
- <span class="label label-info">Result Updating</span>
+ <strong>GroupBy 聚合</strong><br>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span><br>
+ <span class="label label-info">可自动更新结果</span>
</td>
<td>
- <p><b>Note:</b> GroupBy on a streaming table produces an updating
result. See the <a href="{{
site.baseurl}}/dev/table/streaming/dynamic_tables.html">Dynamic Tables
Streaming Concepts</a> page for details.
+ <p><b>注意:</b> GroupBy 在流处理表中会产生更新结果(updating result)。详情请阅读 <a href="{{
site.baseurl }}/zh/dev/table/streaming/dynamic_tables.html">动态表流概念</a> 。
</p>
{% highlight sql %}
SELECT a, SUM(b) as d
@@ -419,12 +419,12 @@ GROUP BY a
</td>
</tr>
<tr>
- <td>
- <strong>GroupBy Window Aggregation</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <td>
+ <strong>GroupBy 窗口聚合</strong><br>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
- <td>
- <p>Use a group window to compute a single result row per group. See <a
href="#group-windows">Group Windows</a> section for more details.</p>
+ <td>
+ <p>使用分组窗口对每个组进行计算并得到一个结果行。详情请阅读 <a href="#分组窗口">分组窗口</a> 章节</p>
{% highlight sql %}
SELECT user, SUM(amount)
FROM Orders
@@ -433,12 +433,12 @@ GROUP BY TUMBLE(rowtime, INTERVAL '1' DAY), user
</td>
</tr>
<tr>
- <td>
+ <td>
<strong>Over Window aggregation</strong><br>
- <span class="label label-primary">Streaming</span>
+ <span class="label label-primary">流处理</span>
</td>
- <td>
- <p><b>Note:</b> All aggregates must be defined over the same window,
i.e., same partitioning, sorting, and range. Currently, only windows with
PRECEDING (UNBOUNDED and bounded) to CURRENT ROW range are supported. Ranges
with FOLLOWING are not supported yet. ORDER BY must be specified on a single <a
href="{{ site.baseurl }}/dev/table/streaming/time_attributes.html">time
attribute</a></p>
+ <td>
+ <p><b>注意:</b> 所有的聚合必须定义到同一个窗口中,即相同的分区、排序和区间。当前仅支持 PRECEDING (无界或有界) 到
CURRENT ROW 范围内的窗口、FOLLOWING 所描述的区间并未支持,ORDER BY 必须指定于单个的<a href="{{
site.baseurl }}/zh/dev/table/streaming/time_attributes.html">时间属性</a>。</p>
{% highlight sql %}
SELECT COUNT(amount) OVER (
PARTITION BY user
@@ -447,31 +447,31 @@ SELECT COUNT(amount) OVER (
FROM Orders
SELECT COUNT(amount) OVER w, SUM(amount) OVER w
-FROM Orders
+FROM Orders
WINDOW w AS (
PARTITION BY user
ORDER BY proctime
- ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
+ ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
{% endhighlight %}
</td>
</tr>
<tr>
<td>
<strong>Distinct</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span> <br>
- <span class="label label-info">Result Updating</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span> <br>
+ <span class="label label-info">可自动更新结果</span>
</td>
<td>
{% highlight sql %}
SELECT DISTINCT users FROM Orders
{% endhighlight %}
- <p><b>Note:</b> For streaming queries the required state to compute the
query result might grow infinitely depending on the number of distinct fields.
Please provide a query configuration with valid retention interval to prevent
excessive state size. See <a href="{{ site.baseurl
}}/dev/table/streaming/query_configuration.html">Query Configuration</a> for
details.</p>
+ <p><b>注意:</b>
对于流处理查询,根据不同字段的数量,计算查询结果所需的状态可能会无限增长。请提供具有有效保留间隔的查询配置,以防止出现过多的状态。请阅读 <a
href="{{ site.baseurl
}}/zh/dev/table/streaming/query_configuration.html">查询配置</a> 以获取详细的信息</p>
</td>
</tr>
<tr>
<td>
<strong>Grouping sets, Rollup, Cube</strong><br>
- <span class="label label-primary">Batch</span>
+ <span class="label label-primary">批处理</span>
</td>
<td>
{% highlight sql %}
@@ -484,7 +484,7 @@ GROUP BY GROUPING SETS ((user), (product))
<tr>
<td>
<strong>Having</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
{% highlight sql %}
@@ -497,11 +497,11 @@ HAVING SUM(amount) > 50
</tr>
<tr>
<td>
- <strong>User-defined Aggregate Functions (UDAGG)</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <strong>用户自定义聚合函数 (UDAGG)</strong><br>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
- <p>UDAGGs must be registered in the TableEnvironment. See the <a
href="{{ site.baseurl }}/dev/table/functions/udfs.html">UDF documentation</a>
for details on how to specify and register UDAGGs.</p>
+ <p>UDAGG 必须注册到 TableEnvironment. 参考<a href="{{ site.baseurl
}}/zh/dev/table/functions/udfs.html">自定义函数文档</a> 以了解如何指定和注册 UDAGG 。</p>
{% highlight sql %}
SELECT MyAggregate(amount)
FROM Orders
@@ -521,35 +521,35 @@ GROUP BY users
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 20%">Operation</th>
- <th class="text-center">Description</th>
+ <th class="text-left" style="width: 20%">操作符</th>
+ <th class="text-center">描述</th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>Inner Equi-join</strong><br>
- <span class="label label-primary">Batch</span>
- <span class="label label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span>
+ <span class="label label-primary">流处理</span>
</td>
<td>
- <p>Currently, only equi-joins are supported, i.e., joins that have at
least one conjunctive condition with an equality predicate. Arbitrary cross or
theta joins are not supported.</p>
- <p><b>Note:</b> The order of joins is not optimized. Tables are joined
in the order in which they are specified in the FROM clause. Make sure to
specify tables in an order that does not yield a cross join (Cartesian product)
which are not supported and would cause a query to fail.</p>
+ <p>目前仅支持 equi-join ,即 join 的联合条件至少拥有一个相等谓词。不支持任何 cross join 和 theta
join。</p>
+ <p><b>注意:</b> Join 的顺序没有进行优化,join 会按照 FROM 中所定义的顺序依次执行。请确保 join
所指定的表在顺序执行中不会产生不支持的 cross join (笛卡儿积)以至查询失败。</p>
{% highlight sql %}
SELECT *
FROM Orders INNER JOIN Product ON Orders.productId = Product.id
{% endhighlight %}
- <p><b>Note:</b> For streaming queries the required state to compute
the query result might grow infinitely depending on the number of distinct
input rows. Please provide a query configuration with valid retention interval
to prevent excessive state size. See <a href="{{ site.baseurl
}}/dev/table/streaming/query_configuration.html">Query Configuration</a> for
details.</p>
+ <p><b>注意:</b>
流查询中可能会因为不同行的输入数量导致计算结果的状态无限增长。请提供具有有效保留间隔的查询配置,以防止出现过多的状态。详情请参考 <a href="{{
site.baseurl }}/zh/dev/table/streaming/query_configuration.html">查询配置</a>
页面.</p>
</td>
</tr>
<tr>
<td><strong>Outer Equi-join</strong><br>
- <span class="label label-primary">Batch</span>
- <span class="label label-primary">Streaming</span>
- <span class="label label-info">Result Updating</span>
+ <span class="label label-primary">批处理</span>
+ <span class="label label-primary">流处理</span>
+ <span class="label label-info">可自动更新结果</span>
</td>
<td>
- <p>Currently, only equi-joins are supported, i.e., joins that have at
least one conjunctive condition with an equality predicate. Arbitrary cross or
theta joins are not supported.</p>
- <p><b>Note:</b> The order of joins is not optimized. Tables are joined
in the order in which they are specified in the FROM clause. Make sure to
specify tables in an order that does not yield a cross join (Cartesian product)
which are not supported and would cause a query to fail.</p>
+ <p>目前仅支持 equi-join ,即 join 的联合条件至少拥有一个相等谓词。不支持任何 cross join 和 theta
join。</p>
+ <p><b>注意:</b> Join 的顺序没有进行优化,join 会按照 FROM 中所定义的顺序依次执行。请确保 join
所指定的表在顺序执行中不会产生不支持的 cross join (笛卡儿积)以至查询失败。</p>
{% highlight sql %}
SELECT *
FROM Orders LEFT JOIN Product ON Orders.productId = Product.id
@@ -560,19 +560,19 @@ FROM Orders RIGHT JOIN Product ON Orders.productId =
Product.id
SELECT *
FROM Orders FULL OUTER JOIN Product ON Orders.productId = Product.id
{% endhighlight %}
- <p><b>Note:</b> For streaming queries the required state to compute
the query result might grow infinitely depending on the number of distinct
input rows. Please provide a query configuration with valid retention interval
to prevent excessive state size. See <a href="{{ site.baseurl
}}/dev/table/streaming/query_configuration.html">Query Configuration</a> for
details.</p>
+ <p><b>注意:</b>
流查询中可能会因为不同行的输入数量导致计算结果的状态无限增长。请提供具有有效保留间隔的查询配置,以防止出现过多的状态。详情请参考 <a href="{{
site.baseurl }}zh//dev/table/streaming/query_configuration.html">查询配置</a>
页面.</p>
</td>
</tr>
<tr>
<td><strong>Time-windowed Join</strong><br>
- <span class="label label-primary">Batch</span>
- <span class="label label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span>
+ <span class="label label-primary">流处理</span>
</td>
<td>
- <p><b>Note:</b> Time-windowed joins are a subset of regular joins that
can be processed in a streaming fashion.</p>
+ <p><b>注意:</b> 时间窗口 join 是常规 join 的子集,可以使用流的方式进行处理。</p>
- <p>A time-windowed join requires at least one equi-join predicate and
a join condition that bounds the time on both sides. Such a condition can be
defined by two appropriate range predicates (<code><, <=, >=,
></code>), a <code>BETWEEN</code> predicate, or a single equality predicate
that compares <a href="{{ site.baseurl
}}/dev/table/streaming/time_attributes.html">time attributes</a> of the same
type (i.e., processing time or event time) of both input tables.</p>
- <p>For example, the following predicates are valid window join
conditions:</p>
+ <p>时间窗口join需要至少一个 equi-join 谓词和一个限制了双方时间的 join
条件。例如使用两个适当的范围谓词(<code><, <=, >=, ></code>),一个 <code>BETWEEN</code>
谓词或一个比较两个输入表中相同类型的 <a href="{{ site.baseurl
}}/zh/dev/table/streaming/time_attributes.html">时间属性</a> (即处理时间和事件时间)的相等谓词</p>
+ <p>比如,以下谓词是合法的窗口 join 条件:</p>
<ul>
<li><code>ltime = rtime</code></li>
@@ -587,16 +587,16 @@ WHERE o.id = s.orderId AND
o.ordertime BETWEEN s.shiptime - INTERVAL '4' HOUR AND s.shiptime
{% endhighlight %}
-The example above will join all orders with their corresponding shipments if
the order was shipped four hours after the order was received.
+以上示例中,所有在收到后四小时内发货的 order 会与他们相关的 shipment 进行 join。
</td>
</tr>
<tr>
- <td>
+ <td>
<strong>Expanding arrays into a relation</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
- <td>
- <p>Unnesting WITH ORDINALITY is not supported yet.</p>
+ <td>
+ <p>目前尚未支持非嵌套的 WITH ORDINALITY 。</p>
{% highlight sql %}
SELECT users, tag
FROM Orders CROSS JOIN UNNEST(tags) AS t (tag)
@@ -604,44 +604,44 @@ FROM Orders CROSS JOIN UNNEST(tags) AS t (tag)
</td>
</tr>
<tr>
- <td>
- <strong>Join with Table Function (UDTF)</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <td>
+ <strong>Join 表函数 (UDTF)</strong><br>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
- <td>
- <p>Joins a table with the results of a table function. Each row of the
left (outer) table is joined with all rows produced by the corresponding call
of the table function.</p>
- <p>User-defined table functions (UDTFs) must be registered before. See
the <a href="{{ site.baseurl }}/dev/table/functions/udfs.html">UDF
documentation</a> for details on how to specify and register UDTFs. </p>
+ <td>
+ <p>将表与表函数的结果进行 join 操作。左表(outer)中的每一行将会与调用表函数所产生的所有结果中相关联行进行 join 。</p>
+ <p>用户自定义表函数( User-defined table functions,UDTFs ) 在执行前必须先注册。请参考 <a
href="{{ site.baseurl }}/zh/dev/table/functions/udfs.html">UDF 文档</a>
以获取更多关于指定和注册UDF的信息 </p>
<p><b>Inner Join</b></p>
- <p>A row of the left (outer) table is dropped, if its table function
call returns an empty result.</p>
+ <p>若表函数返回了空结果,左表(outer)的行将会被删除。</p>
{% highlight sql %}
SELECT users, tag
FROM Orders, LATERAL TABLE(unnest_udtf(tags)) t AS tag
{% endhighlight %}
<p><b>Left Outer Join</b></p>
- <p>If a table function call returns an empty result, the corresponding
outer row is preserved and the result padded with null values.</p>
+ <p>若表函数返回了空结果,将会保留相对应的外部行并用空值填充结果。</p>
{% highlight sql %}
SELECT users, tag
FROM Orders LEFT JOIN LATERAL TABLE(unnest_udtf(tags)) t AS tag ON TRUE
{% endhighlight %}
- <p><b>Note:</b> Currently, only literal <code>TRUE</code> is supported
as predicate for a left outer join against a lateral table.</p>
+ <p><b>注意:</b> 当前仅支持文本常量 <code>TRUE</code> 作为针对横向表的左外部联接的谓词。</p>
</td>
</tr>
<tr>
<td>
- <strong>Join with Temporal Table Function</strong><br>
- <span class="label label-primary">Streaming</span>
+ <strong>Join Temporal Table Function</strong><br>
+ <span class="label label-primary">流处理</span>
</td>
<td>
- <p><a href="{{ site.baseurl
}}/dev/table/streaming/temporal_tables.html">Temporal tables</a> are tables
that track changes over time.</p>
- <p>A <a href="{{ site.baseurl
}}/dev/table/streaming/temporal_tables.html#temporal-table-functions">Temporal
table function</a> provides access to the state of a temporal table at a
specific point in time.
- The syntax to join a table with a temporal table function is the same
as in <i>Join with Table Function</i>.</p>
+ <p><a href="{{ site.baseurl
}}/zh/dev/table/streaming/temporal_tables.html">Temporal Tables</a>
是跟随时间变化而变化的表。</p>
+ <p><a href="{{ site.baseurl
}}/zh/dev/table/streaming/temporal_tables.html#temporal-table-functions">
Temporal Table Function</a> 提供访问 Temporal Tables 在某一时间点的状态的能力。
+ Join Temporal Table Function 的语法与 <i>Join Table Function</i> 一致。</p>
- <p><b>Note:</b> Currently only inner joins with temporal tables are
supported.</p>
+ <p><b>注意:</b> 目前仅支持在 Temporal Tables 上的 inner join 。</p>
- <p>Assuming <i>Rates</i> is a <a href="{{ site.baseurl
}}/dev/table/streaming/temporal_tables.html#temporal-table-functions">temporal
table function</a>, the join can be expressed in SQL as follows:</p>
+ <p>假如 <i>Rates</i> 是一个 <a href="{{ site.baseurl
}}/zh/dev/table/streaming/temporal_tables.html#temporal-table-functions">
Temporal Table Function</a>, join 可以使用 SQL 进行如下的表达:</p>
{% highlight sql %}
SELECT
o_amount, r_rate
@@ -651,20 +651,20 @@ FROM
WHERE
r_currency = o_currency
{% endhighlight %}
- <p>For more information please check the more detailed <a href="{{
site.baseurl }}/dev/table/streaming/temporal_tables.html">temporal tables
concept description</a>.</p>
+ <p>请查看 <a href="{{ site.baseurl
}}/zh/dev/table/streaming/temporal_tables.html"> Temporal Tables 概念描述</a>
以了解详细信息。</p>
</td>
</tr>
<tr>
<td>
- <strong>Join with Temporal Table</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <strong>Join Temporal Tables </strong><br>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
- <p><a href="{{ site.baseurl
}}/dev/table/streaming/temporal_tables.html">Temporal Tables</a> are tables
that track changes over time.
- A <a href="{{ site.baseurl
}}/dev/table/streaming/temporal_tables.html#temporal-table">Temporal Table</a>
provides access to the versions of a temporal table at a specific point in
time.</p>
+ <p><a href="{{ site.baseurl
}}/zh/dev/table/streaming/temporal_tables.html">Temporal Tables</a> 是随时间变化而变化的表。
+ <a href="{{ site.baseurl
}}/zh/dev/table/streaming/temporal_tables.html#temporal-table">Temporal
Table</a> 提供访问指定时间点的 temporal table 版本的功能。</p>
- <p>Only inner and left joins with processing-time temporal tables are
supported.</p>
- <p>The following example assumes that <strong>LatestRates</strong> is
a <a href="{{ site.baseurl
}}/dev/table/streaming/temporal_tables.html#temporal-table">Temporal Table</a>
which is materialized with the latest rate.</p>
+ <p>仅支持带有处理时间的 temporal tables 的 inner 和 left join。</p>
+ <p>下述示例中,假设 <strong>LatestRates</strong> 是一个根据最新的 rates 物化的 <a
href="{{ site.baseurl
}}/zh/dev/table/streaming/temporal_tables.html#temporal-table">Temporal
Table</a> 。</p>
{% highlight sql %}
SELECT
o.amout, o.currency, r.rate, o.amount * r.rate
@@ -673,8 +673,8 @@ FROM
JOIN LatestRates FOR SYSTEM_TIME AS OF o.proctime AS r
ON r.currency = o.currency
{% endhighlight %}
- <p>For more information please check the more detailed <a href="{{
site.baseurl }}/dev/table/streaming/temporal_tables.html">Temporal Tables</a>
concept description.</p>
- <p>Only supported in Blink planner.</p>
+ <p>请阅读 <a href="{{ site.baseurl
}}/zh/dev/table/streaming/temporal_tables.html">Temporal Tables</a>
概念描述以了解详细信息。</p>
+ <p>仅 Blink planner 支持。</p>
</td>
</tr>
@@ -684,21 +684,21 @@ FROM
{% top %}
-### Set Operations
+### 集合操作
<div markdown="1">
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 20%">Operation</th>
- <th class="text-center">Description</th>
+ <th class="text-left" style="width: 20%">操作符</th>
+ <th class="text-center">描述</th>
</tr>
</thead>
<tbody>
- <tr>
+ <tr>
<td>
<strong>Union</strong><br>
- <span class="label label-primary">Batch</span>
+ <span class="label label-primary">批处理</span>
</td>
<td>
{% highlight sql %}
@@ -714,7 +714,7 @@ FROM (
<tr>
<td>
<strong>UnionAll</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
{% highlight sql %}
@@ -731,7 +731,7 @@ FROM (
<tr>
<td>
<strong>Intersect / Except</strong><br>
- <span class="label label-primary">Batch</span>
+ <span class="label label-primary">批处理</span>
</td>
<td>
{% highlight sql %}
@@ -756,10 +756,10 @@ FROM (
<tr>
<td>
<strong>In</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
- <p>Returns true if an expression exists in a given table sub-query.
The sub-query table must consist of one column. This column must have the same
data type as the expression.</p>
+ <p>若表达式在给定的表子查询中存在,则返回 true 。子查询表必须由单个列构成,且该列的数据类型需与表达式保持一致。</p>
{% highlight sql %}
SELECT user, amount
FROM Orders
@@ -767,17 +767,17 @@ WHERE product IN (
SELECT product FROM NewProducts
)
{% endhighlight %}
- <p><b>Note:</b> For streaming queries the operation is rewritten in a
join and group operation. The required state to compute the query result might
grow infinitely depending on the number of distinct input rows. Please provide
a query configuration with valid retention interval to prevent excessive state
size. See <a href="{{ site.baseurl
}}/dev/table/streaming/query_configuration.html">Query Configuration</a> for
details.</p>
+ <p><b>注意:</b> 在流查询中,这一操作将会被重写为 join 和 group
操作。该查询所需要的状态可能会由于不同的输入行数而导致无限增长。请在查询配置中提合理的保留间隔以避免产生状态过大。请阅读 <a href="{{
site.baseurl }}/zh/dev/table/streaming/query_configuration.html">查询配置</a>
以了解详细信息</p>
</td>
</tr>
<tr>
<td>
<strong>Exists</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
- <p>Returns true if the sub-query returns at least one row. Only
supported if the operation can be rewritten in a join and group operation.</p>
+ <p>若子查询的结果多于一行,将返回 true 。仅支持可以被通过 join 和 group 重写的操作。</p>
{% highlight sql %}
SELECT user, amount
FROM Orders
@@ -785,7 +785,7 @@ WHERE product EXISTS (
SELECT product FROM NewProducts
)
{% endhighlight %}
- <p><b>Note:</b> For streaming queries the operation is rewritten in a
join and group operation. The required state to compute the query result might
grow infinitely depending on the number of distinct input rows. Please provide
a query configuration with valid retention interval to prevent excessive state
size. See <a href="{{ site.baseurl
}}/dev/table/streaming/query_configuration.html">Query Configuration</a> for
details.</p>
+ <p><b>注意:</b> 在流查询中,这一操作将会被重写为 join 和 group
操作。该查询所需要的状态可能会由于不同的输入行数而导致无限增长。请在查询配置中提合理的保留间隔以避免产生状态过大。请阅读 <a href="{{
site.baseurl }}/zh/dev/table/streaming/query_configuration.html">查询配置</a>
以了解详细信息</p>
</td>
</tr>
</tbody>
@@ -800,18 +800,18 @@ WHERE product EXISTS (
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 20%">Operation</th>
- <th class="text-center">Description</th>
+ <th class="text-left" style="width: 20%">操作符</th>
+ <th class="text-center">描述</th>
</tr>
</thead>
<tbody>
- <tr>
+ <tr>
<td>
<strong>Order By</strong><br>
- <span class="label label-primary">Batch</span> <span class="label
label-primary">Streaming</span>
+ <span class="label label-primary">批处理</span> <span class="label
label-primary">流处理</span>
</td>
<td>
-<b>Note:</b> The result of streaming queries must be primarily sorted on an
ascending <a href="{{ site.baseurl
}}/dev/table/streaming/time_attributes.html">time attribute</a>. Additional
sorting attributes are supported.
+<b>注意:</b> 流处理结果需主要根据 <a href="{{ site.baseurl
}}/zh/dev/table/streaming/time_attributes.html">时间属性</a> 按照升序进行排序。支持使用其他排序属性。
{% highlight sql %}
SELECT *
@@ -823,10 +823,10 @@ ORDER BY orderTime
<tr>
<td><strong>Limit</strong><br>
- <span class="label label-primary">Batch</span>
+ <span class="label label-primary">批处理</span>
</td>
<td>
-<b>Note:</b> The LIMIT clause requires an ORDER BY clause.
+<b>注意:</b> LIMIT 查询需要有一个 ORDER BY 字句。
{% highlight sql %}
SELECT *
FROM Orders
@@ -844,14 +844,12 @@ LIMIT 3
### Top-N
-<span class="label label-danger">Attention</span> Top-N is only supported in
Blink planner.
-
-Top-N queries ask for the N smallest or largest values ordered by columns.
Both smallest and largest values sets are considered Top-N queries. Top-N
queries are useful in cases where the need is to display only the N bottom-most
or the N top-
-most records from batch/streaming table on a condition. This result set can be
used for further analysis.
+<span class="label label-danger">注意</span> 目前仅 Blink 计划器支持 Top-N 。
-Flink uses the combination of a OVER window clause and a filter condition to
express a Top-N query. With the power of OVER window `PARTITION BY` clause,
Flink also supports per group Top-N. For example, the top five products per
category that have the maximum sales in realtime. Top-N queries are supported
for SQL on batch and streaming tables.
+Top-N 查询是根据列排序找到N个最大或最小的值。最大值集和最小值集都被视为是一种 Top-N 的查询。若在批处理或流处理的表中需要显示出满足条件的 N
个最底层记录或最顶层记录, Top-N 查询将会十分有用。得到的结果集将可以进行进一步的分析。
-The following shows the syntax of the TOP-N statement:
+Flink 使用 OVER 窗口条件和过滤条件相结合以进行 Top-N 查询。利用 OVER 窗口的 `PARTITION BY` 子句的功能,Flink
还支持逐组 Top-N 。 例如,每个类别中实时销量最高的前五种产品。批处理表和流处理表都支持基于SQL的 Top-N 查询。
+以下是 TOP-N 表达式的语法:
{% highlight sql %}
SELECT [column_list]
@@ -863,20 +861,20 @@ FROM (
WHERE rownum <= N [AND conditions]
{% endhighlight %}
-**Parameter Specification:**
+**参数说明:**
-- `ROW_NUMBER()`: Assigns an unique, sequential number to each row, starting
with one, according to the ordering of rows within the partition. Currently, we
only support `ROW_NUMBER` as the over window function. In the future, we will
support `RANK()` and `DENSE_RANK()`.
-- `PARTITION BY col1[, col2...]`: Specifies the partition columns. Each
partition will have a Top-N result.
-- `ORDER BY col1 [asc|desc][, col2 [asc|desc]...]`: Specifies the ordering
columns. The ordering directions can be different on different columns.
-- `WHERE rownum <= N`: The `rownum <= N` is required for Flink to recognize
this query is a Top-N query. The N represents the N smallest or largest records
will be retained.
-- `[AND conditions]`: It is free to add other conditions in the where clause,
but the other conditions can only be combined with `rownum <= N` using `AND`
conjunction.
+- `ROW_NUMBER()`: 根据当前分区内的各行的顺序从第一行开始,依次为每一行分配一个唯一且连续的号码。目前,我们只支持 `ROW_NUMBER`
在 over 窗口函数中使用。未来将会支持 `RANK()` 和 `DENSE_RANK()`函数。
+- `PARTITION BY col1[, col2...]`: 指定分区列,每个分区都将会有一个 Top-N 结果。
+- `ORDER BY col1 [asc|desc][, col2 [asc|desc]...]`: 指定排序列,不同列的排序方向可以不一样。
+- `WHERE rownum <= N`: Flink 需要 `rownum <= N` 才能识别一个查询是否为 Top-N 查询。 其中, N
代表最大或最小的 N 条记录会被保留。
+- `[AND conditions]`: 在 where 语句中,可以随意添加其他的查询条件,但其他条件只允许通过 `AND` 与 `rownum <=
N` 结合使用。
-<span class="label label-danger">Attention in Streaming Mode</span> The TopN
query is <span class="label label-info">Result Updating</span>. Flink SQL will
sort the input data stream according to the order key, so if the top N records
have been changed, the changed ones will be sent as retraction/update records
to downstream.
-It is recommended to use a storage which supports updating as the sink of
Top-N query. In addition, if the top N records need to be stored in external
storage, the result table should have the same unique key with the Top-N query.
+<span class="label label-danger">流处理模式需注意</span> TopN 查询 <span class="label
label-info">可自动更新结果</span>。 Flink SQL 会根据排序键对输入的流进行排序;若 top N
的记录发生了变化,变化的部分会以撤销、更新记录的形式发送到下游。
+推荐使用一个支持更新的存储作为 Top-N 查询的 sink 。另外,若 top N 记录需要存储到外部存储,则结果表需要拥有相同与 Top-N
查询相同的唯一键。
-The unique keys of Top-N query is the combination of partition columns and
rownum column. Top-N query can also derive the unique key of upstream. Take
following job as an example, say `product_id` is the unique key of the
`ShopSales`, then the unique keys of the Top-N query are [`category`, `rownum`]
and [`product_id`].
+Top-N 的唯一键是分区列和 rownum 列的结合,另外 Top-N 查询也可以获得上游的唯一键。以下面的任务为例,`product_id` 是
`ShopSales` 的唯一键,然后 Top-N 的唯一键是 [`category`, `rownum`] 和 [`product_id`] 。
-The following examples show how to specify SQL queries with Top-N on streaming
tables. This is an example to get "the top five products per category that have
the maximum sales in realtime" we mentioned above.
+下面的样例描述了如何指定带有 Top-N 的 SQL 查询。这个例子的作用是我们上面提到的“查询每个分类实时销量最大的五个产品”。
<div class="codetabs" markdown="1">
<div data-lang="java" markdown="1">
@@ -884,12 +882,12 @@ The following examples show how to specify SQL queries
with Top-N on streaming t
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
-// ingest a DataStream from an external source
+// 接收来自外部数据源的 DataStream
DataStream<Tuple3<String, String, String, Long>> ds = env.addSource(...);
-// register the DataStream as table "ShopSales"
+// 把 DataStream 注册为表,表名是 “ShopSales”
tableEnv.createTemporaryView("ShopSales", ds, "product_id, category,
product_name, sales");
-// select top-5 products per category which have the maximum sales.
+// 选择每个分类中销量前5的产品
Table result1 = tableEnv.sqlQuery(
"SELECT * " +
"FROM (" +
@@ -905,13 +903,13 @@ Table result1 = tableEnv.sqlQuery(
val env = StreamExecutionEnvironment.getExecutionEnvironment
val tableEnv = TableEnvironment.getTableEnvironment(env)
-// read a DataStream from an external source
+// 读取外部数据源的 DataStream
val ds: DataStream[(String, String, String, Long)] = env.addSource(...)
-// register the DataStream under the name "ShopSales"
+// 注册名为 “ShopSales” 的 DataStream
tableEnv.createTemporaryView("ShopSales", ds, 'product_id, 'category,
'product_name, 'sales)
-// select top-5 products per category which have the maximum sales.
+// 选择每个分类中销量前5的产品
val result1 = tableEnv.sqlQuery(
"""
|SELECT *
@@ -925,13 +923,13 @@ val result1 = tableEnv.sqlQuery(
</div>
</div>
-#### No Ranking Output Optimization
+#### 无排名输出优化
-As described above, the `rownum` field will be written into the result table
as one field of the unique key, which may lead to a lot of records being
written to the result table. For example, when the record (say `product-1001`)
of ranking 9 is updated and its rank is upgraded to 1, all the records from
ranking 1 ~ 9 will be output to the result table as update messages. If the
result table receives too many data, it will become the bottleneck of the SQL
job.
+如上文所描述,`rownum` 字段会作为唯一键的其中一个字段写到结果表里面,这会导致大量的结果写出到结果表。比如,当原始结果(名为
`product-1001` )从排序第九变化为排序第一时,排名 1-9 的所有结果都会以更新消息的形式发送到结果表。若结果表收到太多的数据,将会成为 SQL
任务的瓶颈。
-The optimization way is omitting rownum field in the outer SELECT clause of
the Top-N query. This is reasonable because the number of the top N records is
usually not large, thus the consumers can sort the records themselves quickly.
Without rownum field, in the example above, only the changed record
(`product-1001`) needs to be sent to downstream, which can reduce much IO to
the result table.
+优化方法是在 Top-N 查询的外部 SELECT 子句中省略 rownum
字段。由于前N条记录的数量通常不大,因此消费者可以自己对记录进行快速排序,因此这是合理的。去掉 rownum 字段后,上述的例子中,只有变化了的记录(
`product-1001` )需要发送到下游,从而可以节省大量的对结果表的 IO 操作。
-The following example shows how to optimize the above Top-N example in this
way:
+以下的例子描述了如何以这种方式优化上述的 Top-N 查询:
<div class="codetabs" markdown="1">
<div data-lang="java" markdown="1">
@@ -939,12 +937,12 @@ The following example shows how to optimize the above
Top-N example in this way:
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
-// ingest a DataStream from an external source
+// 从外部数据源读取 DataStream
DataStream<Tuple3<String, String, String, Long>> ds = env.addSource(...);
-// register the DataStream as table "ShopSales"
+// 把 DataStream 注册为表,表名是 “ShopSales”
tableEnv.createTemporaryView("ShopSales", ds, "product_id, category,
product_name, sales");
-// select top-5 products per category which have the maximum sales.
+// 选择每个分类中销量前5的产品
Table result1 = tableEnv.sqlQuery(
"SELECT product_id, category, product_name, sales " + // omit row_num field
in the output
"FROM (" +
@@ -960,13 +958,13 @@ Table result1 = tableEnv.sqlQuery(
val env = StreamExecutionEnvironment.getExecutionEnvironment
val tableEnv = TableEnvironment.getTableEnvironment(env)
-// read a DataStream from an external source
+// 从外部数据源读取 DataStream
val ds: DataStream[(String, String, String, Long)] = env.addSource(...)
-// register the DataStream under the name "ShopSales"
+// 注册名为 “ShopSales” 的数据源
tableEnv.createTemporaryView("ShopSales", ds, 'product_id, 'category,
'product_name, 'sales)
-// select top-5 products per category which have the maximum sales.
+// 选择每个分类中销量前5的产品
val result1 = tableEnv.sqlQuery(
"""
|SELECT product_id, category, product_name, sales -- omit row_num field
in the output
@@ -980,19 +978,18 @@ val result1 = tableEnv.sqlQuery(
</div>
</div>
-<span class="label label-danger">Attention in Streaming Mode</span> In order
to output the above query to an external storage and have a correct result, the
external storage must have the same unique key with the Top-N query. In the
above example query, if the `product_id` is the unique key of the query, then
the external table should also has `product_id` as the unique key.
-
-{% top %}
+<span class="label label-danger">使用流处理模式时需注意</span>
为了使上述查询输出可以输出到外部存储并且结果正确,外部存储需要拥有与 Top-N 查询一致的唯一键。在上述的查询例子中,若 `product_id`
是查询的唯一键,那么外部表必须要有 `product_id` 作为其唯一键。
-### Deduplication
+### 去重
-<span class="label label-danger">Attention</span> Deduplication is only
supported in Blink planner.
+<span class="label label-danger">注意</span> 仅 Blink planner 支持去重。
-Deduplication is removing rows that duplicate over a set of columns, keeping
only the first one or the last one. In some cases, the upstream ETL jobs are
not end-to-end exactly-once, this may result in there are duplicate records in
the sink in case of failover. However, the duplicate records will affect the
correctness of downstream analytical jobs (e.g. `SUM`, `COUNT`). So a
deduplication is needed before further analysis.
+去重是指对在列的集合内重复的行进行删除,只保留第一行或最后一行数据。 在某些情况下,上游的 ETL 作业不能实现精确一次的端到端,这将可能导致在故障恢复
+时,sink 中有重复的记录。 由于重复的记录将影响下游分析作业的正确性(例如,`SUM`、`COUNT`), 所以在进一步分析之前需要进行数据去重。
-Flink uses `ROW_NUMBER()` to remove duplicates just like the way of Top-N
query. In theory, deduplication is a special case of Top-N which the N is one
and order by the processing time or event time.
+与 Top-N 查询相似,Flink 使用 `ROW_NUMBER()` 去除重复的记录。理论上来说,去重是一个特殊的 Top-N 查询,其中 N 是 1
,记录则是以处理时间或事件事件进行排序的。
-The following shows the syntax of the Deduplication statement:
+以下代码展示了去重语句的语法:
{% highlight sql %}
SELECT [column_list]
@@ -1004,14 +1001,14 @@ FROM (
WHERE rownum = 1
{% endhighlight %}
-**Parameter Specification:**
+**参数说明:**
-- `ROW_NUMBER()`: Assigns an unique, sequential number to each row, starting
with one.
-- `PARTITION BY col1[, col2...]`: Specifies the partition columns, i.e. the
deduplicate key.
-- `ORDER BY time_attr [asc|desc]`: Specifies the ordering column, it must be a
[time attribute]({{ site.baseurl }}/dev/table/streaming/time_attributes.html).
Currently only support [proctime attribute]({{ site.baseurl
}}/dev/table/streaming/time_attributes.html#processing-time). [Rowtime
atttribute]({{ site.baseurl
}}/dev/table/streaming/time_attributes.html#event-time) will be supported in
the future. Ordering by ASC means keeping the first row, ordering by DESC means
keeping the last row.
-- `WHERE rownum = 1`: The `rownum = 1` is required for Flink to recognize this
query is deduplication.
+- `ROW_NUMBER()`: 从第一行开始,依次为每一行分配一个唯一且连续的号码。
+- `PARTITION BY col1[, col2...]`: 指定分区的列,例如去重的键。
+- `ORDER BY time_attr [asc|desc]`: 指定排序的列。所制定的列必须为
[时间属性](streaming/time_attributes.html)。目前仅支持 [proctime
attribute](streaming/time_attributes.html#processing-time),在未来版本中将会支持 [Rowtime
atttribute](streaming/time_attributes.html#event-time) 。升序( ASC
)排列指只保留第一行,而降序排列( DESC )则指保留最后一行。
+- `WHERE rownum = 1`: Flink 需要 `rownum = 1` 以确定该查询是否为去重查询。
-The following examples show how to specify SQL queries with Deduplication on
streaming tables.
+以下的例子描述了如何指定 SQL 查询以在一个流计算表中进行去重操作。
<div class="codetabs" markdown="1">
<div data-lang="java" markdown="1">
@@ -1019,13 +1016,12 @@ The following examples show how to specify SQL queries
with Deduplication on str
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
-// ingest a DataStream from an external source
+// 从外部数据源读取 DataStream
DataStream<Tuple3<String, String, String, Integer>> ds = env.addSource(...);
-// register the DataStream as table "Orders"
+// 注册名为 “Orders” 的 DataStream
tableEnv.createTemporaryView("Orders", ds, "order_id, user, product, number,
proctime.proctime");
-// remove duplicate rows on order_id and keep the first occurrence row,
-// because there shouldn't be two orders with the same order_id.
+// 由于不应该出现两个订单有同一个order_id,所以根据 order_id 去除重复的行,并保留第一行
Table result1 = tableEnv.sqlQuery(
"SELECT order_id, user, product, number " +
"FROM (" +
@@ -1041,13 +1037,12 @@ Table result1 = tableEnv.sqlQuery(
val env = StreamExecutionEnvironment.getExecutionEnvironment
val tableEnv = TableEnvironment.getTableEnvironment(env)
-// read a DataStream from an external source
+// 从外部数据源读取 DataStream
val ds: DataStream[(String, String, String, Int)] = env.addSource(...)
-// register the DataStream under the name "Orders"
+// 注册名为 “Orders” 的 DataStream
tableEnv.createTemporaryView("Orders", ds, 'order_id, 'user, 'product,
'number, 'proctime.proctime)
-// remove duplicate rows on order_id and keep the first occurrence row,
-// because there shouldn't be two orders with the same order_id.
+// 由于不应该出现两个订单有同一个order_id,所以根据 order_id 去除重复的行,并保留第一行
val result1 = tableEnv.sqlQuery(
"""
|SELECT order_id, user, product, number
@@ -1063,49 +1058,49 @@ val result1 = tableEnv.sqlQuery(
{% top %}
-### Group Windows
+### 分组窗口
-Group windows are defined in the `GROUP BY` clause of a SQL query. Just like
queries with regular `GROUP BY` clauses, queries with a `GROUP BY` clause that
includes a group window function compute a single result row per group. The
following group windows functions are supported for SQL on batch and streaming
tables.
+SQL 查询的分组窗口是通过 `GROUP BY` 子句定义的。类似于使用常规 `GROUP BY` 语句的查询,窗口分组语句的 `GROUP BY`
子句中带有一个窗口函数为每个分组计算出一个结果。以下是批处理表和流处理表支持的分组窗口函数:
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 30%">Group Window Function</th>
- <th class="text-left">Description</th>
+ <th class="text-left" style="width: 30%">分组窗口函数</th>
+ <th class="text-left">描述</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>TUMBLE(time_attr, interval)</code></td>
- <td>Defines a tumbling time window. A tumbling time window assigns rows
to non-overlapping, continuous windows with a fixed duration
(<code>interval</code>). For example, a tumbling window of 5 minutes groups
rows in 5 minutes intervals. Tumbling windows can be defined on event-time
(stream + batch) or processing-time (stream).</td>
+ <td>定义一个滚动窗口。滚动窗口把行分配到有固定持续时间( <code>interval</code> )的不重叠的连续窗口。比如,5
分钟的滚动窗口以 5 分钟为间隔对行进行分组。滚动窗口可以定义在事件时间(批处理、流处理)或处理时间(流处理)上。</td>
</tr>
<tr>
<td><code>HOP(time_attr, interval, interval)</code></td>
- <td>Defines a hopping time window (called sliding window in the Table
API). A hopping time window has a fixed duration (second <code>interval</code>
parameter) and hops by a specified hop interval (first <code>interval</code>
parameter). If the hop interval is smaller than the window size, hopping
windows are overlapping. Thus, rows can be assigned to multiple windows. For
example, a hopping window of 15 minutes size and 5 minute hop interval assigns
each row to 3 different windows [...]
+ <td>定义一个跳跃的时间窗口(在 Table API 中称为滑动窗口)。滑动窗口有一个固定的持续时间( 第二个
<code>interval</code> 参数 )以及一个滑动的间隔(第一个 <code>interval</code> 参数
)。若滑动间隔小于窗口的持续时间,滑动窗口则会出现重叠;因此,行将会被分配到多个窗口中。比如,一个大小为 15 分组的滑动窗口,其滑动间隔为 5
分钟,将会把每一行数据分配到 3 个 15 分钟的窗口中。滑动窗口可以定义在事件时间(批处理、流处理)或处理时间(流处理)上。</td>
</tr>
<tr>
<td><code>SESSION(time_attr, interval)</code></td>
- <td>Defines a session time window. Session time windows do not have a
fixed duration but their bounds are defined by a time <code>interval</code> of
inactivity, i.e., a session window is closed if no event appears for a defined
gap period. For example a session window with a 30 minute gap starts when a row
is observed after 30 minutes inactivity (otherwise the row would be added to an
existing window) and is closed if no row is added within 30 minutes. Session
windows can work on e [...]
+ <td>定义一个会话时间窗口。会话时间窗口没有一个固定的持续时间,但是它们的边界会根据 <code>interval</code>
所定义的不活跃时间所确定;即一个会话时间窗口在定义的间隔时间内没有时间出现,该窗口会被关闭。例如时间窗口的间隔时间是 30
分钟,当其不活跃的时间达到30分钟后,若观测到新的记录,则会启动一个新的会话时间窗口(否则该行数据会被添加到当前的窗口),且若在 30
分钟内没有观测到新纪录,这个窗口将会被关闭。会话时间窗口可以使用事件时间(批处理、流处理)或处理时间(流处理)。</td>
</tr>
</tbody>
</table>
-#### Time Attributes
+#### 时间属性
-For SQL queries on streaming tables, the `time_attr` argument of the group
window function must refer to a valid time attribute that specifies the
processing time or event time of rows. See the [documentation of time
attributes]({{ site.baseurl }}/dev/table/streaming/time_attributes.html) to
learn how to define time attributes.
+在流处理表中的 SQL 查询中,分组窗口函数的 `time_attr` 参数必须引用一个合法的时间属性,且该属性需要指定行的处理时间或事件时间。可参考
[时间属性文档](streaming/time_attributes.html) 以了解如何定义时间属性。
-For SQL on batch tables, the `time_attr` argument of the group window function
must be an attribute of type `TIMESTAMP`.
+对于批处理的 SQL 查询,分组窗口函数的 `time_attr` 参数必须是一个 `TIMESTAMP` 类型的属性。
-#### Selecting Group Window Start and End Timestamps
+#### 选择分组窗口的开始和结束时间戳
-The start and end timestamps of group windows as well as time attributes can
be selected with the following auxiliary functions:
+可以使用以下辅助函数选择组窗口的开始和结束时间戳以及时间属性:
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 40%">Auxiliary Function</th>
- <th class="text-left">Description</th>
+ <th class="text-left" style="width: 40%">辅助函数</th>
+ <th class="text-left">描述</th>
</tr>
</thead>
@@ -1116,7 +1111,7 @@ The start and end timestamps of group windows as well as
time attributes can be
<code>HOP_START(time_attr, interval, interval)</code><br/>
<code>SESSION_START(time_attr, interval)</code><br/>
</td>
- <td><p>Returns the timestamp of the inclusive lower bound of the
corresponding tumbling, hopping, or session window.</p></td>
+ <td><p>返回相对应的滚动、滑动和会话窗口范围内的下界时间戳。</p></td>
</tr>
<tr>
<td>
@@ -1124,8 +1119,8 @@ The start and end timestamps of group windows as well as
time attributes can be
<code>HOP_END(time_attr, interval, interval)</code><br/>
<code>SESSION_END(time_attr, interval)</code><br/>
</td>
- <td><p>Returns the timestamp of the <i>exclusive</i> upper bound of the
corresponding tumbling, hopping, or session window.</p>
- <p><b>Note:</b> The exclusive upper bound timestamp <i>cannot</i> be
used as a <a href="{{ site.baseurl
}}/dev/table/streaming/time_attributes.html">rowtime attribute</a> in
subsequent time-based operations, such as <a href="#joins">time-windowed
joins</a> and <a href="#aggregations">group window or over window
aggregations</a>.</p></td>
+ <td><p>返回相对应的滚动、滑动和会话窗口<i>范围以外</i>的上界时间戳。</p>
+ <p><b>注意:</b> 范围以外的上界时间戳<i>不可以</i> 在随后基于时间的操作中,作为 <a href="{{
site.baseurl }}/zh/dev/table/streaming/time_attributes.html">行时间属性</a> 使用,比如 <a
href="#joins">基于时间窗口的 join </a> 以及 <a
href="#aggregations">分组窗口或分组窗口上的聚合</a>。</p></td>
</tr>
<tr>
<td>
@@ -1133,8 +1128,8 @@ The start and end timestamps of group windows as well as
time attributes can be
<code>HOP_ROWTIME(time_attr, interval, interval)</code><br/>
<code>SESSION_ROWTIME(time_attr, interval)</code><br/>
</td>
- <td><p>Returns the timestamp of the <i>inclusive</i> upper bound of the
corresponding tumbling, hopping, or session window.</p>
- <p>The resulting attribute is a <a href="{{ site.baseurl
}}/dev/table/streaming/time_attributes.html">rowtime attribute</a> that can be
used in subsequent time-based operations such as <a href="#joins">time-windowed
joins</a> and <a href="#aggregations">group window or over window
aggregations</a>.</p></td>
+ <td><p>返回相对应的滚动、滑动和会话窗口<i>范围以内</i>的上界时间戳。</p>
+ <p>返回的是一个可用于后续需要基于时间的操作的<a href="{{ site.baseurl
}}/zh/dev/table/streaming/time_attributes.html">时间属性(rowtime
attribute)</a>,比如<a href="#joins">基于时间窗口的 join </a> 以及 <a
href="#aggregations">分组窗口或分组窗口上的聚合</a>。</p></td>
</tr>
<tr>
<td>
@@ -1142,14 +1137,14 @@ The start and end timestamps of group windows as well
as time attributes can be
<code>HOP_PROCTIME(time_attr, interval, interval)</code><br/>
<code>SESSION_PROCTIME(time_attr, interval)</code><br/>
</td>
- <td><p>Returns a <a href="{{ site.baseurl
}}/dev/table/streaming/time_attributes.html#processing-time">proctime
attribute</a> that can be used in subsequent time-based operations such as <a
href="#joins">time-windowed joins</a> and <a href="#aggregations">group window
or over window aggregations</a>.</p></td>
+ <td><p>返回一个可用于后续需要基于时间的操作的 <a href="{{ site.baseurl
}}/zh/dev/table/streaming/time_attributes.html#processing-time">处理时间参数</a>,比如<a
href="#joins">基于时间窗口的 join </a> 以及 <a
href="#aggregations">分组窗口或分组窗口上的聚合</a>.</p></td>
</tr>
</tbody>
</table>
-*Note:* Auxiliary functions must be called with exactly same arguments as the
group window function in the `GROUP BY` clause.
+*注意:* 辅助函数必须使用与 `GROUP BY` 子句中的分组窗口函数完全相同的参数来调用.
-The following examples show how to specify SQL queries with group windows on
streaming tables.
+以下的例子展示了如何在流处理表中指定使用分组窗口函数的 SQL 查询。
<div class="codetabs" markdown="1">
<div data-lang="java" markdown="1">
@@ -1157,27 +1152,27 @@ The following examples show how to specify SQL queries
with group windows on str
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
-// ingest a DataStream from an external source
+// 从外部数据源读取 DataSource
DataStream<Tuple3<Long, String, Integer>> ds = env.addSource(...);
-// register the DataStream as table "Orders"
+// 使用“Orders”作为表名把 DataStream 注册为表
tableEnv.createTemporaryView("Orders", ds, "user, product, amount,
proctime.proctime, rowtime.rowtime");
-// compute SUM(amount) per day (in event-time)
+// 计算每日的 SUM(amount)(使用事件时间)
Table result1 = tableEnv.sqlQuery(
"SELECT user, " +
" TUMBLE_START(rowtime, INTERVAL '1' DAY) as wStart, " +
" SUM(amount) FROM Orders " +
"GROUP BY TUMBLE(rowtime, INTERVAL '1' DAY), user");
-// compute SUM(amount) per day (in processing-time)
+// 计算每日的 SUM(amount)(使用处理时间)
Table result2 = tableEnv.sqlQuery(
"SELECT user, SUM(amount) FROM Orders GROUP BY TUMBLE(proctime, INTERVAL '1'
DAY), user");
-// compute every hour the SUM(amount) of the last 24 hours in event-time
+// 使用事件时间计算过去24小时中每小时的 SUM(amount)
Table result3 = tableEnv.sqlQuery(
"SELECT product, SUM(amount) FROM Orders GROUP BY HOP(rowtime, INTERVAL '1'
HOUR, INTERVAL '1' DAY), product");
-// compute SUM(amount) per session with 12 hour inactivity gap (in event-time)
+// 计算每个以12小时(事件时间)作为不活动时间的会话的 SUM(amount)
Table result4 = tableEnv.sqlQuery(
"SELECT user, " +
" SESSION_START(rowtime, INTERVAL '12' HOUR) AS sStart, " +
@@ -1194,12 +1189,12 @@ Table result4 = tableEnv.sqlQuery(
val env = StreamExecutionEnvironment.getExecutionEnvironment
val tableEnv = StreamTableEnvironment.create(env)
-// read a DataStream from an external source
+// 从外部数据源读取 DataSource
val ds: DataStream[(Long, String, Int)] = env.addSource(...)
-// register the DataStream under the name "Orders"
+// 计算每日(使用处理时间)的 SUM(amount)
tableEnv.createTemporaryView("Orders", ds, 'user, 'product, 'amount,
'proctime.proctime, 'rowtime.rowtime)
-// compute SUM(amount) per day (in event-time)
+// 计算每日的 SUM(amount) (使用事件时间)
val result1 = tableEnv.sqlQuery(
"""
|SELECT
@@ -1210,15 +1205,15 @@ val result1 = tableEnv.sqlQuery(
| GROUP BY TUMBLE(rowtime, INTERVAL '1' DAY), user
""".stripMargin)
-// compute SUM(amount) per day (in processing-time)
+// 计算每日的 SUM(amount) (使用处理时间)
val result2 = tableEnv.sqlQuery(
"SELECT user, SUM(amount) FROM Orders GROUP BY TUMBLE(proctime, INTERVAL '1'
DAY), user")
-// compute every hour the SUM(amount) of the last 24 hours in event-time
+// 使用事件时间计算过去24小时中每小时的 SUM(amount)
val result3 = tableEnv.sqlQuery(
"SELECT product, SUM(amount) FROM Orders GROUP BY HOP(rowtime, INTERVAL '1'
HOUR, INTERVAL '1' DAY), product")
-// compute SUM(amount) per session with 12 hour inactivity gap (in event-time)
+// 计算每个以12小时(事件时间)作为不活动时间的会话的 SUM(amount)
val result4 = tableEnv.sqlQuery(
"""
|SELECT
@@ -1236,25 +1231,25 @@ val result4 = tableEnv.sqlQuery(
{% top %}
-### Pattern Recognition
+### 模式匹配
<div markdown="1">
<table class="table table-bordered">
<thead>
<tr>
- <th class="text-left" style="width: 20%">Operation</th>
- <th class="text-center">Description</th>
+ <th class="text-left" style="width: 20%">操作符</th>
+ <th class="text-center">描述</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<strong>MATCH_RECOGNIZE</strong><br>
- <span class="label label-primary">Streaming</span>
+ <span class="label label-primary">流处理</span>
</td>
<td>
- <p>Searches for a given pattern in a streaming table according to the
<code>MATCH_RECOGNIZE</code> <a
href="https://standards.iso.org/ittf/PubliclyAvailableStandards/c065143_ISO_IEC_TR_19075-5_2016.zip">ISO
standard</a>. This makes it possible to express complex event processing (CEP)
logic in SQL queries.</p>
- <p>For a more detailed description, see the dedicated page for <a
href="{{ site.baseurl }}/dev/table/streaming/match_recognize.html">detecting
patterns in tables</a>.</p>
+ <p>根据 <code>MATCH_RECOGNIZE</code> <a
href="https://standards.iso.org/ittf/PubliclyAvailableStandards/c065143_ISO_IEC_TR_19075-5_2016.zip">ISO
标准</a>在流处理表中搜索给定的模式。 这样就可以在SQL查询中描述复杂的事件处理(CEP)逻辑。</p>
+ <p>更多详情请参考 <a href="{{ site.baseurl
}}/zh/dev/table/streaming/match_recognize.html">检测表中的模式</a>.</p>
{% highlight sql %}
SELECT T.aid, T.bid, T.cid