yohei1126 commented on a change in pull request #4324: [AIRFLOW-3327] Add 
support for location in BigQueryHook
URL: https://github.com/apache/incubator-airflow/pull/4324#discussion_r244277306
 
 

 ##########
 File path: airflow/contrib/hooks/bigquery_hook.py
 ##########
 @@ -580,11 +587,18 @@ def run_query(self,
             by one or more columns. This is only available in combination with
             time_partitioning. The order of columns given determines the sort 
order.
         :type cluster_fields: list of str
+        :param location: The geographic location of the job. Required except 
for
+            US and EU. See details at
 
 Review comment:
   This operator has `destination_dataset_table` so I tested the following four 
patterns. I suppose it works as expected.
   
   1. (OK) source dataset in US, destination_dataset in US, no location 
specified
   2. (OK) source dataset in Tokyo, destination_dataset in Tokyo, location 
specified as `asia-northeast1`
   3. (Fail) source dataset in Tokyo, destination_dataset in US, location 
specified as `asia-northeast1`
   4. (Fail) source dataset in US, destination_dataset in Tokyo, no location 
specified
   
   
   ```
   # -*- coding: utf-8 -*-
   from airflow import DAG
   from operators.bigquery_operator import BigQueryOperator
   from airflow.utils.dates import days_ago
   
   ARGS = {
       'owner': 'airflow',
       'depends_on_past': False,
       'retries': 0,
       'start_date': days_ago(1)
   }
   
   COMMON_PARAMS = {}
   
   with DAG(
       dag_id='test_bq',
       default_args=ARGS,
       params=COMMON_PARAMS,
       schedule_interval='@once') as dag:
   
       # both datasets are in US
       # this should be OK
       t1 = BigQueryOperator(
           task_id='us_to_us',
           sql='SELECT * FROM test_us_ds.test_table',
           bigquery_conn_id='google_cloud_default',
           
destination_dataset_table='fr-stg-datalake:test_us_ds.dest_table_us_to_us'
       )
   
       # both source dataset and dest dataset are in Tokyo
       # this should be OK
       t2 = BigQueryOperator(
           task_id='tky_to_tky',
           sql='SELECT * FROM test_tokyo_ds.test_table',
           bigquery_conn_id='google_cloud_default',
           location='asia-northeast1',
           
destination_dataset_table='fr-stg-datalake:test_tokyo_ds.dest_table_tky_to_tky'
       )
   
       # source dataset is in Tokyo but dest table is in US
       # this should fail
       t3 = BigQueryOperator(
           task_id='tky_to_us',
           sql='SELECT * FROM test_tokyo_ds.test_table',
           bigquery_conn_id='google_cloud_default',
           location='asia-northeast1',
           
destination_dataset_table='fr-stg-datalake:test_us_ds.dest_table_tky_to_us'
       )
   
       # source dataset is in US but dest table is in Tokyo
       # this should fail
       t4 = BigQueryOperator(
           task_id='us_to_tky',
           sql='SELECT * FROM test_us_ds.test_table',
           bigquery_conn_id='google_cloud_default',
           
destination_dataset_table='fr-stg-datalake:test_tokyo_ds.dest_table_us_to_tky'
       )
   
       t1 >> t2
       t1 >> t3
       t1 >> t4
       globals()[dag] = dag
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to