This is an automated email from the ASF dual-hosted git repository.
pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new dd71c9a Update doc/examples: BigQuerySource to ReadFromBigQuery
(#13239)
dd71c9a is described below
commit dd71c9ac472b62ff3598d3db47b0f26985bbc82a
Author: Xinbin Huang <[email protected]>
AuthorDate: Mon Jan 11 13:27:13 2021 -0800
Update doc/examples: BigQuerySource to ReadFromBigQuery (#13239)
* Update doc: BigQuerySource to ReadFromBigQuery
* Resolve comment
* Update ReadFromBigQuery in examples
* Add deprecation notice
* Use kwarg `table` to `ReadFromBigQuery`
* fixup! Use kwarg `table` to `ReadFromBigQuery`
---
.../apache_beam/examples/cookbook/bigquery_side_input.py | 7 +++----
sdks/python/apache_beam/examples/cookbook/filters.py | 2 +-
sdks/python/apache_beam/examples/snippets/snippets.py | 10 +++++-----
sdks/python/apache_beam/io/gcp/bigquery.py | 2 +-
.../content/en/documentation/io/built-in/google-bigquery.md | 11 ++++++++---
5 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
b/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
index 1b909a3..28ab7d9 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
@@ -99,10 +99,9 @@ def run(argv=None):
ignore_corpus = known_args.ignore_corpus
ignore_word = known_args.ignore_word
- pcoll_corpus = p | 'read corpus' >> beam.io.Read(
- beam.io.BigQuerySource(query=query_corpus))
- pcoll_word = p | 'read_words' >> beam.io.Read(
- beam.io.BigQuerySource(query=query_word))
+ pcoll_corpus = p | 'read corpus' >> beam.io.ReadFromBigQuery(
+ query=query_corpus)
+ pcoll_word = p | 'read_words' >> beam.io.ReadFromBigQuery(query=query_word)
pcoll_ignore_corpus = p | 'create_ignore_corpus' >> beam.Create(
[ignore_corpus])
pcoll_ignore_word = p | 'create_ignore_word' >> beam.Create([ignore_word])
diff --git a/sdks/python/apache_beam/examples/cookbook/filters.py
b/sdks/python/apache_beam/examples/cookbook/filters.py
index 95f32f9..9381234 100644
--- a/sdks/python/apache_beam/examples/cookbook/filters.py
+++ b/sdks/python/apache_beam/examples/cookbook/filters.py
@@ -90,7 +90,7 @@ def run(argv=None):
with beam.Pipeline(argv=pipeline_args) as p:
- input_data = p | beam.io.Read(beam.io.BigQuerySource(known_args.input))
+ input_data = p | beam.io.ReadFromBigQuery(table=known_args.input)
# pylint: disable=expression-not-assigned
(
diff --git a/sdks/python/apache_beam/examples/snippets/snippets.py
b/sdks/python/apache_beam/examples/snippets/snippets.py
index 42e5886..5155ee4 100644
--- a/sdks/python/apache_beam/examples/snippets/snippets.py
+++ b/sdks/python/apache_beam/examples/snippets/snippets.py
@@ -1117,7 +1117,7 @@ def model_bigqueryio(p, write_project='',
write_dataset='', write_table=''):
# [START model_bigqueryio_read_table]
max_temperatures = (
p
- | 'ReadTable' >> beam.io.Read(beam.io.BigQuerySource(table_spec))
+ | 'ReadTable' >> beam.io.ReadFromBigQuery(table=table_spec)
# Each row is a dictionary where the keys are the BigQuery columns
| beam.Map(lambda elem: elem['max_temperature']))
# [END model_bigqueryio_read_table]
@@ -1125,9 +1125,9 @@ def model_bigqueryio(p, write_project='',
write_dataset='', write_table=''):
# [START model_bigqueryio_read_query]
max_temperatures = (
p
- | 'QueryTable' >> beam.io.Read(beam.io.BigQuerySource(
+ | 'QueryTable' >> beam.io.ReadFromBigQuery(
query='SELECT max_temperature FROM '\
- '[clouddataflow-readonly:samples.weather_stations]'))
+ '[clouddataflow-readonly:samples.weather_stations]')
# Each row is a dictionary where the keys are the BigQuery columns
| beam.Map(lambda elem: elem['max_temperature']))
# [END model_bigqueryio_read_query]
@@ -1135,10 +1135,10 @@ def model_bigqueryio(p, write_project='',
write_dataset='', write_table=''):
# [START model_bigqueryio_read_query_std_sql]
max_temperatures = (
p
- | 'QueryTableStdSQL' >> beam.io.Read(beam.io.BigQuerySource(
+ | 'QueryTableStdSQL' >> beam.io.ReadFromBigQuery(
query='SELECT max_temperature FROM '\
'`clouddataflow-readonly.samples.weather_stations`',
- use_standard_sql=True))
+ use_standard_sql=True)
# Each row is a dictionary where the keys are the BigQuery columns
| beam.Map(lambda elem: elem['max_temperature']))
# [END model_bigqueryio_read_query_std_sql]
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py
b/sdks/python/apache_beam/io/gcp/bigquery.py
index a5ceb5b..2858b98 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -45,7 +45,7 @@ call *one* row of the main table and *all* rows of the side
table. The runner
may use some caching techniques to share the side inputs between calls in order
to avoid excessive reading:::
- main_table = pipeline | 'VeryBig' >> beam.io.ReadFroBigQuery(...)
+ main_table = pipeline | 'VeryBig' >> beam.io.ReadFromBigQuery(...)
side_table = pipeline | 'NotBig' >> beam.io.ReadFromBigQuery(...)
results = (
main_table
diff --git
a/website/www/site/content/en/documentation/io/built-in/google-bigquery.md
b/website/www/site/content/en/documentation/io/built-in/google-bigquery.md
index 79f1ef7..9c87ec3d 100644
--- a/website/www/site/content/en/documentation/io/built-in/google-bigquery.md
+++ b/website/www/site/content/en/documentation/io/built-in/google-bigquery.md
@@ -252,13 +252,18 @@ them into JSON `TableRow` objects.
<!-- Python specific -->
{{< paragraph class="language-py" >}}
-To read from a BigQuery table using the Beam SDK for Python, apply a `Read`
-transform on a `BigQuerySource`. Read returns a `PCollection` of dictionaries,
+To read from a BigQuery table using the Beam SDK for Python, apply a
`ReadFromBigQuery`
+transfrom. `ReadFromBigQuery` returns a `PCollection` of dictionaries,
where each element in the `PCollection` represents a single row in the table.
Integer values in the `TableRow` objects are encoded as strings to match
BigQuery's exported JSON format.
{{< /paragraph >}}
+{{< paragraph class="language-py" >}}
+***Note:*** `BigQuerySource()` is deprecated as of Beam SDK 2.25.0. Before
2.25.0, to read from
+a BigQuery table using the Beam SDK, you will apply a `Read` transform on a
`BigQuerySource`. For example,
+`beam.io.Read(beam.io.BigQuerySource(table_spec))`.
+{{< /paragraph >}}
### Reading from a table
@@ -293,7 +298,7 @@ the `fromQuery` method.
{{< paragraph class="language-py" >}}
If you don't want to read an entire table, you can supply a query string to
-`BigQuerySource` by specifying the `query` parameter.
+`ReadFromBigQuery` by specifying the `query` parameter.
{{< /paragraph >}}
{{< paragraph class="language-py" >}}