[ 
https://issues.apache.org/jira/browse/AIRFLOW-1874?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16576091#comment-16576091
 ] 

ASF GitHub Bot commented on AIRFLOW-1874:
-----------------------------------------

kaxil closed pull request #3717: [AIRFLOW-1874] use_legacy_sql added to 
BigQueryCheck operators
URL: https://github.com/apache/incubator-airflow/pull/3717
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/airflow/contrib/operators/bigquery_check_operator.py 
b/airflow/contrib/operators/bigquery_check_operator.py
index 59ef5d377d..a9c493f4fd 100644
--- a/airflow/contrib/operators/bigquery_check_operator.py
+++ b/airflow/contrib/operators/bigquery_check_operator.py
@@ -55,21 +55,25 @@ class BigQueryCheckOperator(CheckOperator):
     :type sql: string
     :param bigquery_conn_id: reference to the BigQuery database
     :type bigquery_conn_id: string
+    :param use_legacy_sql: Whether to use legacy SQL (true)
+    or standard SQL (false).
+    :type use_legacy_sql: boolean
     """
 
     @apply_defaults
-    def __init__(
-            self,
-            sql,
-            bigquery_conn_id='bigquery_default',
-            *args,
-            **kwargs):
+    def __init__(self,
+                 sql,
+                 bigquery_conn_id='bigquery_default',
+                 use_legacy_sql=True,
+                 *args, **kwargs):
         super(BigQueryCheckOperator, self).__init__(sql=sql, *args, **kwargs)
         self.bigquery_conn_id = bigquery_conn_id
         self.sql = sql
+        self.use_legacy_sql = use_legacy_sql
 
     def get_db_hook(self):
-        return BigQueryHook(bigquery_conn_id=self.bigquery_conn_id)
+        return BigQueryHook(bigquery_conn_id=self.bigquery_conn_id,
+                            use_legacy_sql=self.use_legacy_sql)
 
 
 class BigQueryValueCheckOperator(ValueCheckOperator):
@@ -78,20 +82,27 @@ class BigQueryValueCheckOperator(ValueCheckOperator):
 
     :param sql: the sql to be executed
     :type sql: string
+    :param use_legacy_sql: Whether to use legacy SQL (true)
+    or standard SQL (false).
+    :type use_legacy_sql: boolean
     """
 
     @apply_defaults
-    def __init__(
-            self, sql, pass_value, tolerance=None,
-            bigquery_conn_id='bigquery_default',
-            *args, **kwargs):
+    def __init__(self, sql,
+                 pass_value,
+                 tolerance=None,
+                 bigquery_conn_id='bigquery_default',
+                 use_legacy_sql=True,
+                 *args, **kwargs):
         super(BigQueryValueCheckOperator, self).__init__(
             sql=sql, pass_value=pass_value, tolerance=tolerance,
             *args, **kwargs)
         self.bigquery_conn_id = bigquery_conn_id
+        self.use_legacy_sql = use_legacy_sql
 
     def get_db_hook(self):
-        return BigQueryHook(bigquery_conn_id=self.bigquery_conn_id)
+        return BigQueryHook(bigquery_conn_id=self.bigquery_conn_id,
+                            use_legacy_sql=self.use_legacy_sql)
 
 
 class BigQueryIntervalCheckOperator(IntervalCheckOperator):
@@ -113,19 +124,22 @@ class 
BigQueryIntervalCheckOperator(IntervalCheckOperator):
         example 'COUNT(*)': 1.5 would require a 50 percent or less difference
         between the current day, and the prior days_back.
     :type metrics_threshold: dict
+    :param use_legacy_sql: Whether to use legacy SQL (true)
+    or standard SQL (false).
+    :type use_legacy_sql: boolean
     """
 
     @apply_defaults
-    def __init__(
-            self, table, metrics_thresholds,
-            date_filter_column='ds', days_back=-7,
-            bigquery_conn_id='bigquery_default',
-            *args, **kwargs):
+    def __init__(self, table, metrics_thresholds, date_filter_column='ds',
+                 days_back=-7, bigquery_conn_id='bigquery_default',
+                 use_legacy_sql=True, *args, **kwargs):
         super(BigQueryIntervalCheckOperator, self).__init__(
             table=table, metrics_thresholds=metrics_thresholds,
             date_filter_column=date_filter_column, days_back=days_back,
             *args, **kwargs)
         self.bigquery_conn_id = bigquery_conn_id
+        self.use_legacy_sql = use_legacy_sql
 
     def get_db_hook(self):
-        return BigQueryHook(bigquery_conn_id=self.bigquery_conn_id)
+        return BigQueryHook(bigquery_conn_id=self.bigquery_conn_id,
+                            use_legacy_sql=self.use_legacy_sql)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Support standard SQL in Check, ValueCheck and IntervalCheck BigQuery operators
> ------------------------------------------------------------------------------
>
>                 Key: AIRFLOW-1874
>                 URL: https://issues.apache.org/jira/browse/AIRFLOW-1874
>             Project: Apache Airflow
>          Issue Type: Bug
>          Components: contrib, gcp, operators
>            Reporter: Guillermo Rodríguez Cano
>            Assignee: Iuliia Volkova
>            Priority: Major
>             Fix For: 2.0.0
>
>
> BigQueryCheckOperator, BigQueryValueCheckOperator and 
> BigQueryIntervalCheckOperator do not support disabling use of default legacy 
> SQL in BigQuery.
> This is a major blocker to support correct migration to standard SQL when 
> queries are complicated. For example, a query that can be queried in legacy 
> SQL may be blocked from any subsequent view done in standard SQL that this 
> view uses as the queries are bound to either standard or legacy SQL but not a 
> mix.
> These operators inherit from base ones of the same name (without the BigQuery 
> prefix) from Airflow which may make the process more complicated as the flag 
> to use standard SQL should be enabled because the underlying BigQueryHook has 
> the corresponding parameter, use_legacy_sql, set to True, when running a 
> query. But it is not possible to pass parameters all the way to it via the 
> aforementioned operators.
> The workaround of including #standardSQL and a new line before the query 
> doesn't work either as there is mismatch. BigQuery reports the following in 
> fact: "Query text specifies use_legacy_sql:false, while API options 
> specify:true"
> A workaround for queries on views using standard SQL is to persist the result 
> of the query in a temporary table, then run the check operation and 
> thereafter delete the temporary table. 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to