This is an automated email from the ASF dual-hosted git repository. turbaszek pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/master by this push: new 4799af3 Extend BigQuery example with include clause (#9572) 4799af3 is described below commit 4799af30ee02c596647d1538854769124f9f4961 Author: Tomek Urbaszek <turbas...@gmail.com> AuthorDate: Tue Jun 30 14:02:23 2020 +0200 Extend BigQuery example with include clause (#9572) --- .../cloud/example_dags/example_bigquery_queries.py | 18 ++++++++++++++++-- .../cloud/example_dags/example_bigquery_query.sql | 20 ++++++++++++++++++++ docs/howto/operator/gcp/bigquery.rst | 13 ++++++++++++- 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/airflow/providers/google/cloud/example_dags/example_bigquery_queries.py b/airflow/providers/google/cloud/example_dags/example_bigquery_queries.py index e3fcda6..e17a53d 100644 --- a/airflow/providers/google/cloud/example_dags/example_bigquery_queries.py +++ b/airflow/providers/google/cloud/example_dags/example_bigquery_queries.py @@ -64,6 +64,7 @@ for location in [None, LOCATION]: default_args=default_args, schedule_interval=None, # Override to match your needs tags=["example"], + user_defined_macros={"DATASET": DATASET_NAME, "TABLE": TABLE_1} ) as dag_with_locations: create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create-dataset", dataset_id=DATASET_NAME, location=location, @@ -97,13 +98,26 @@ for location in [None, LOCATION]: configuration={ "query": { "query": INSERT_ROWS_QUERY, - "useLegacySql": False, + "useLegacySql": "False", } }, location=location, ) # [END howto_operator_bigquery_insert_job] + # [START howto_operator_bigquery_select_job] + select_query_job = BigQueryInsertJobOperator( + task_id="select_query_job", + configuration={ + "query": { + "query": "{% include 'example_bigquery_query.sql' %}", + "useLegacySql": False, + } + }, + location=location, + ) + # [END howto_operator_bigquery_select_job] + execute_insert_query = BigQueryExecuteQueryOperator( task_id="execute_insert_query", sql=INSERT_ROWS_QUERY, use_legacy_sql=False, location=location ) @@ -172,7 +186,7 @@ for location in [None, LOCATION]: ) # [END howto_operator_bigquery_interval_check] - [create_table_1, create_table_2] >> insert_query_job + [create_table_1, create_table_2] >> insert_query_job >> select_query_job insert_query_job >> execute_insert_query execute_insert_query >> get_data >> get_data_result >> delete_dataset diff --git a/airflow/providers/google/cloud/example_dags/example_bigquery_query.sql b/airflow/providers/google/cloud/example_dags/example_bigquery_query.sql new file mode 100644 index 0000000..b629f27 --- /dev/null +++ b/airflow/providers/google/cloud/example_dags/example_bigquery_query.sql @@ -0,0 +1,20 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +SELECT * FROM {{ DATASET }}.{{ TABLE }} diff --git a/docs/howto/operator/gcp/bigquery.rst b/docs/howto/operator/gcp/bigquery.rst index 6fa2d91..ee35f41 100644 --- a/docs/howto/operator/gcp/bigquery.rst +++ b/docs/howto/operator/gcp/bigquery.rst @@ -256,7 +256,7 @@ Let's say you would like to execute the following query. To execute the SQL query in a specific BigQuery database you can use :class:`~airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator` with -proper query job configuration. +proper query job configuration that can be Jinja templated. .. exampleinclude:: ../../../../airflow/providers/google/cloud/example_dags/example_bigquery_queries.py :language: python @@ -267,6 +267,17 @@ proper query job configuration. For more information on types of BigQuery job please check `documentation <https://cloud.google.com/bigquery/docs/reference/v2/jobs>`__. +If you want to include some files in your configuration you can use ``include`` clause of Jinja template +language as follow: + +.. exampleinclude:: ../../../../airflow/providers/google/cloud/example_dags/example_bigquery_queries.py + :language: python + :dedent: 8 + :start-after: [START howto_operator_bigquery_select_job] + :end-before: [END howto_operator_bigquery_select_job] + +The included file can also use Jinaj templates which can be useful in case of ``.sql`` files. + Additionally you can use ``job_id`` parameter of :class:`~airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator` to improve idempotency. If this parameter is not passed then uuid will be used as ``job_id``. If provided then