kennknowles commented on a change in pull request #8553:
URL: https://github.com/apache/airflow/pull/8553#discussion_r416136954



##########
File path: airflow/providers/google/cloud/hooks/dataflow.py
##########
@@ -93,15 +94,30 @@ def inner_wrapper(self: "DataflowHook", *args, **kwargs) -> 
RT:
 class DataflowJobStatus:
     """
     Helper class with Dataflow job statuses.
+    Reference: 
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState
     """
-    JOB_STATE_DONE = "JOB_STATE_DONE"
+    JOB_STATE_UNKNOWN = "JOB_STATE_UNKNOWN"
+    JOB_STATE_STOPPED = "JOB_STATE_STOPPED"
     JOB_STATE_RUNNING = "JOB_STATE_RUNNING"
+    JOB_STATE_DONE = "JOB_STATE_DONE"
     JOB_STATE_FAILED = "JOB_STATE_FAILED"
     JOB_STATE_CANCELLED = "JOB_STATE_CANCELLED"
+    JOB_STATE_UPDATED = "JOB_STATE_UPDATED"
+    JOB_STATE_DRAINING = "JOB_STATE_DRAINING"
+    JOB_STATE_DRAINED = "JOB_STATE_DRAINED"
     JOB_STATE_PENDING = "JOB_STATE_PENDING"
-    FAILED_END_STATES = {JOB_STATE_FAILED, JOB_STATE_CANCELLED}
-    SUCCEEDED_END_STATES = {JOB_STATE_DONE}
-    END_STATES = SUCCEEDED_END_STATES | FAILED_END_STATES
+    JOB_STATE_CANCELLING = "JOB_STATE_CANCELLING"
+    JOB_STATE_QUEUED = "JOB_STATE_QUEUED"
+    FAILED_END_STATES = {JOB_STATE_FAILED, JOB_STATE_CANCELLED, 
JOB_STATE_STOPPED}
+    SUCCEEDED_END_STATES = {JOB_STATE_DONE, JOB_STATE_UPDATED, 
JOB_STATE_DRAINED}
+    TERMINAL_STATES = SUCCEEDED_END_STATES | FAILED_END_STATES

Review comment:
       Are these lists used anywhere else? I think that the success/fail 
distinction is artificial. You cannot really say if CANCELED is a failure or 
not. Probably the same with DRAINED and UPDATED. Whatever is looking at the job 
status probably wants the full details.

##########
File path: airflow/providers/google/cloud/operators/dataflow.py
##########
@@ -406,6 +406,88 @@ def on_kill(self) -> None:
             self.hook.cancel_job(job_id=self.job_id, 
project_id=self.project_id)
 
 
+class DataflowStartSqlJobOperator(BaseOperator):
+    """
+    Starts Dataflow SQL query.

Review comment:
       @ibzib would be a good reviewer here

##########
File path: airflow/providers/google/cloud/hooks/dataflow.py
##########
@@ -783,6 +794,77 @@ def cancel_job(
             name=job_name,
             job_id=job_id,
             location=location,
-            poll_sleep=self.poll_sleep
+            poll_sleep=self.poll_sleep,
+            num_retries=self.num_retries,
         )
         jobs_controller.cancel()
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def start_sql_job(
+        self,
+        job_name: str,
+        query: str,
+        options: Dict[str, Any],
+        project_id: str,
+        location: str = DEFAULT_DATAFLOW_LOCATION,
+        on_new_job_id_callback: Optional[Callable[[str], None]] = None
+    ):
+        """
+        Starts Dataflow SQL query.
+
+        :param job_name: The unique name to assign to the Cloud Dataflow job.
+        :type job_name: str
+        :param query: The SQL query to execute.
+        :type query: str
+        :param options: Job parameters to be executed.
+            For more information, look at:
+            
`https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/sql/query
+            <gcloud beta dataflow sql query>`__
+            command reference
+        :param location: The location of the Dataflow job (for example 
europe-west1)
+        :type location: str
+        :param project_id: The ID of the GCP project that owns the job.
+            If set to ``None`` or missing, the default project_id from the GCP 
connection is used.
+        :type project_id: Optional[str]
+        :param on_new_job_id_callback: Callback called when the job ID is 
known.
+        :type on_new_job_id_callback: callable
+        :return: the new job object
+        """
+        cmd = [
+            'gcloud',
+            'beta',

Review comment:
       It is not required.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to