This is an automated email from the ASF dual-hosted git repository.
dianfu pushed a commit to branch release-1.15
in repository https://gitbox.apache.org/repos/asf/flink.git
The following commit(s) were added to refs/heads/release-1.15 by this push:
new c956e822ccb [hotfix][python][docs] Sync the documentation of a few
classes with the correspoinding Java classes
c956e822ccb is described below
commit c956e822ccbab326f3b7638eda6dd0ee848715ed
Author: Dian Fu <[email protected]>
AuthorDate: Mon Apr 11 19:34:06 2022 +0800
[hotfix][python][docs] Sync the documentation of a few classes with the
correspoinding Java classes
---
flink-python/pyflink/table/environment_settings.py | 4 +-
flink-python/pyflink/table/statement_set.py | 14 ++---
flink-python/pyflink/table/table.py | 60 +++++++++++++++-------
flink-python/pyflink/table/table_config.py | 28 ++++++++++
4 files changed, 77 insertions(+), 29 deletions(-)
diff --git a/flink-python/pyflink/table/environment_settings.py
b/flink-python/pyflink/table/environment_settings.py
index 1fbf0907681..bc615824961 100644
--- a/flink-python/pyflink/table/environment_settings.py
+++ b/flink-python/pyflink/table/environment_settings.py
@@ -39,13 +39,13 @@ class EnvironmentSettings(object):
... .with_built_in_database_name("my_database") \\
... .build()
- :func:`EnvironmentSettings.in_streaming_mode` or
:func:`EnvironmentSettings.in_batch_mode`
+ :func:`~EnvironmentSettings.in_streaming_mode` or
:func:`~EnvironmentSettings.in_batch_mode`
might be convenient as shortcuts.
"""
class Builder(object):
"""
- A builder for :class:`EnvironmentSettings`.
+ A builder for :class:`~EnvironmentSettings`.
"""
def __init__(self):
diff --git a/flink-python/pyflink/table/statement_set.py
b/flink-python/pyflink/table/statement_set.py
index c87fbea375c..4c3bb7a4625 100644
--- a/flink-python/pyflink/table/statement_set.py
+++ b/flink-python/pyflink/table/statement_set.py
@@ -27,14 +27,10 @@ __all__ = ['StatementSet']
class StatementSet(object):
"""
- A StatementSet accepts DML statements or Tables,
- the planner can optimize all added statements and Tables together
- and then submit as one job.
+ A :class:`~StatementSet` accepts pipelines defined by DML statements or
:class:`~Table` objects.
+ The planner can optimize all added statements together and then submit
them as one job.
- .. note::
-
- The added statements and Tables will be cleared
- when calling the `execute` method.
+ The added statements will be cleared when calling the
:func:`~StatementSet.execute` method.
.. versionadded:: 1.11.0
"""
@@ -93,9 +89,9 @@ class StatementSet(object):
>>> stmt_set = table_env.create_statement_set()
>>> source_table = table_env.from_path("SourceTable")
- >>> sink_descriptor =
TableDescriptor.for_connector("blackhole")
+ >>> sink_descriptor =
TableDescriptor.for_connector("blackhole") \\
... .schema(Schema.new_builder()
- ... .build())
+ ... .build()) \\
... .build()
>>> stmt_set.add_insert(sink_descriptor, source_table)
diff --git a/flink-python/pyflink/table/table.py
b/flink-python/pyflink/table/table.py
index b7d8ae60673..200cafa2342 100644
--- a/flink-python/pyflink/table/table.py
+++ b/flink-python/pyflink/table/table.py
@@ -40,32 +40,56 @@ __all__ = ['Table', 'GroupedTable', 'GroupWindowedTable',
'OverWindowedTable', '
class Table(object):
-
"""
- A :class:`~pyflink.table.Table` is the core component of the Table API.
+ A :class:`~pyflink.table.Table` object is the core abstraction of the
Table API.
Similar to how the DataStream API has DataStream,
the Table API is built around :class:`~pyflink.table.Table`.
- Use the methods of :class:`~pyflink.table.Table` to transform data.
+ A :class:`~pyflink.table.Table` object describes a pipeline of data
transformations. It does not
+ contain the data itself in any way. Instead, it describes how to read data
from a table source,
+ and how to eventually write data to a table sink. The declared pipeline
can be
+ printed, optimized, and eventually executed in a cluster. The pipeline can
work with bounded or
+ unbounded streams which enables both streaming and batch scenarios.
+
+ By the definition above, a :class:`~pyflink.table.Table` object can
actually be considered as
+ a view in SQL terms.
+
+ The initial :class:`~pyflink.table.Table` object is constructed by a
+ :class:`~pyflink.table.TableEnvironment`. For example,
+ :func:`~pyflink.table.TableEnvironment.from_path` obtains a table from a
catalog.
+ Every :class:`~pyflink.table.Table` object has a schema that is available
through
+ :func:`~pyflink.table.Table.get_schema`. A :class:`~pyflink.table.Table`
object is
+ always associated with its original table environment during programming.
+
+ Every transformation (i.e. :func:`~pyflink.table.Table.select`} or
+ :func:`~pyflink.table.Table.filter` on a :class:`~pyflink.table.Table`
object leads to a new
+ :class:`~pyflink.table.Table` object.
+
+ Use :func:`~pyflink.table.Table.execute` to execute the pipeline and
retrieve the transformed
+ data locally during development. Otherwise, use
:func:`~pyflink.table.Table.execute_insert` to
+ write the data into a table sink.
+
+ Many methods of this class take one or more
:class:`~pyflink.table.Expression` as parameters.
+ For fluent definition of expressions and easier readability, we recommend
to add a star import:
+
+ Example:
+ ::
+
+ >>> from pyflink.table.expressions import *
+
+ Check the documentation for more programming language specific APIs.
+
+ The following example shows how to work with a
:class:`~pyflink.table.Table` object.
Example:
::
- >>> env = StreamExecutionEnvironment.get_execution_environment()
- >>> env.set_parallelism(1)
- >>> t_env = StreamTableEnvironment.create(env)
- >>> ...
- >>> t_env.register_table_source("source", ...)
- >>> t = t_env.from_path("source")
- >>> t.select(...)
- >>> ...
- >>> t_env.register_table_sink("result", ...)
- >>> t.execute_insert("result")
-
- Operations such as :func:`~pyflink.table.Table.join`,
:func:`~pyflink.table.Table.select`,
- :func:`~pyflink.table.Table.where` and
:func:`~pyflink.table.Table.group_by`
- take arguments in an expression string. Please refer to the documentation
for
- the expression syntax.
+ >>> from pyflink.table import TableEnvironment
+ >>> from pyflink.table.expressions import *
+ >>> env_settings = EnvironmentSettings.in_streaming_mode()
+ >>> t_env = TableEnvironment.create(env_settings)
+ >>> table = t_env.from_path("my_table").select(col("colA").trim(),
col("colB") + 12)
+ >>> table.execute().print()
"""
def __init__(self, j_table, t_env):
diff --git a/flink-python/pyflink/table/table_config.py
b/flink-python/pyflink/table/table_config.py
index 6d0899ee9af..5fa9896a566 100644
--- a/flink-python/pyflink/table/table_config.py
+++ b/flink-python/pyflink/table/table_config.py
@@ -35,12 +35,40 @@ class TableConfig(object):
Configuration for the current :class:`TableEnvironment` session to adjust
Table & SQL API
programs.
+ This class is a pure API class that abstracts configuration from various
sources. Currently,
+ configuration can be set in any of the following layers (in the given
order):
+
+ - flink-conf.yaml
+ - CLI parameters
+ - :class:`~pyflink.datastream.StreamExecutionEnvironment` when bridging to
DataStream API
+ - :func:`~EnvironmentSettings.Builder.with_configuration`
+ - :func:`~TableConfig.set`
+
+ The latter two represent the application-specific part of the
configuration. They initialize
+ and directly modify :func:`~TableConfig.get_configuration`. Other layers
represent the
+ configuration of the execution context and are immutable.
+
+ The getter :func:`~TableConfig.get` gives read-only access to the full
configuration. However,
+ application-specific configuration has precedence. Configuration of outer
layers is used for
+ defaults and fallbacks. The setter :func:`~TableConfig.set` will only
affect
+ application-specific configuration.
+
For common or important configuration options, this class provides getters
and setters methods
with detailed inline documentation.
For more advanced configuration, users can directly access the underlying
key-value map via
:func:`~pyflink.table.TableConfig.get_configuration`.
+ Example:
+ ::
+
+ >>> table_config = t_env.get_config()
+ >>> config = Configuration()
+ >>> config.set_string("parallelism.default", "128") \\
+ ... .set_string("pipeline.auto-watermark-interval", "800ms") \\
+ ... .set_string("execution.checkpointing.interval", "30s")
+ >>> table_config.add_configuration(config)
+
.. note::
Because options are read at different point in time when performing
operations, it is