dstandish commented on a change in pull request #14492:
URL: https://github.com/apache/airflow/pull/14492#discussion_r587737276



##########
File path: airflow/providers/airbyte/example_dags/example_airbyte_trigger_job.py
##########
@@ -0,0 +1,66 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Example DAG demonstrating the usage of the BashOperator."""
+
+from datetime import timedelta
+
+from airflow import DAG
+from airflow.providers.airbyte.operators.airbyte import 
AirbyteTriggerSyncOperator
+from airflow.providers.airbyte.sensors.airbyte import AirbyteJobSensor
+from airflow.utils.dates import days_ago
+
+args = {
+    'owner': 'airflow',
+}
+
+with DAG(
+    dag_id='example_airbyte_operator',
+    default_args=args,
+    schedule_interval=None,
+    start_date=days_ago(1),
+    dagrun_timeout=timedelta(minutes=60),
+    tags=['example'],
+) as dag:
+
+    # [START howto_operator_airbyte_synchronous]
+    sync_source_destination = AirbyteTriggerSyncOperator(
+        task_id='airbyte_sync_source_dest_example',
+        airbyte_conn_id='airbyte_default',
+        connection_id='15bc3800-82e4-48c3-a32d-620661273f28',
+    )
+    # [END howto_operator_airbyte_synchronous]
+
+    # [START howto_operator_airbyte_asynchronous]
+    async_source_destination = AirbyteTriggerSyncOperator(
+        task_id='airbyte_async_source_dest_example',
+        airbyte_conn_id='airbyte_default',
+        connection_id='15bc3800-82e4-48c3-a32d-620661273f28',
+        asynchronous=True,
+    )
+
+    airbyte_sensor = AirbyteJobSensor(
+        task_id='airbyte_sensor_source_dest_example',
+        airbyte_job_id=async_source_destination.output,
+        airbyte_conn_id='airbyte_default',
+    )
+    # [END howto_operator_airbyte_asynchronous]
+
+    sync_source_destination

Review comment:
       has no effect

##########
File path: airflow/providers/airbyte/operators/airbyte.py
##########
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from typing import Optional
+
+from airflow.models import BaseOperator
+from airflow.providers.airbyte.hooks.airbyte import AirbyteHook
+from airflow.utils.decorators import apply_defaults
+
+
+class AirbyteTriggerSyncOperator(BaseOperator):
+    """
+    This operator allows you to submit a job to an Airbyte server to run a 
integration
+    process between your source and destination.
+
+    .. seealso::
+        For more information on how to use this operator, take a look at the 
guide:
+        :ref:`howto/operator:AirbyteTriggerSyncOperator`
+
+    :param airbyte_conn_id: Required. The name of the Airflow connection to 
get connection
+        information for Airbyte.
+    :type airbyte_conn_id: str
+    :param connection_id: Required. The Airbyte ConnectionId UUID between a 
source and destination.
+    :type connection_id: str
+    :param asynchronous: Optional. Flag to get job_id after submitting the job 
to the Airbyte API.
+    :type asynchronous: bool
+    :param api_version: Optional. Airbyte API version.
+    :type api_version: str
+    :param timeout: Optional. The amount of time, in seconds, to wait for the 
request to complete.
+    :type timeout: float
+    """
+
+    template_fields = ('connection_id',)
+
+    @apply_defaults
+    def __init__(
+        self,
+        connection_id: str,
+        airbyte_conn_id: str = "airbyte_default",
+        asynchronous: Optional[bool] = False,
+        api_version: Optional[str] = "v1",
+        timeout: Optional[float] = 3600,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.airbyte_conn_id = airbyte_conn_id
+        self.connection_id = connection_id
+        self.timeout = timeout
+        self.api_version = api_version
+        self.asynchronous = asynchronous
+
+    def execute(self, context) -> None:
+        """Create Airbyte Job and wait to finish"""
+        hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id, 
api_version=self.api_version)
+        job_object = 
hook.submit_sync_connection(connection_id=self.connection_id)

Review comment:
       super mega nit ... mightn't this be better described as a `response` 
object?  job object makes it sound like it's an instance of Job class. but this 
is requests.Response and i think calling it `response` might be more 
conventional.  

##########
File path: airflow/providers/airbyte/hooks/airbyte.py
##########
@@ -0,0 +1,99 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import time
+from typing import Any, Optional
+
+from airflow.exceptions import AirflowException
+from airflow.providers.http.hooks.http import HttpHook
+
+
+class AirbyteHook(HttpHook):
+    """
+    Hook for Airbyte API
+
+    :param airbyte_conn_id: Required. The name of the Airflow connection to get
+        connection information for Airbyte.
+    :type airbyte_conn_id: str
+    :param api_version: Required. Airbyte API version.
+    :type api_version: str
+    """
+
+    RUNNING = "running"
+    SUCCEEDED = "succeeded"
+    CANCELLED = "cancelled"
+    PENDING = "pending"
+    FAILED = "failed"
+    ERROR = "error"
+
+    def __init__(self, airbyte_conn_id: str = "airbyte_default", api_version: 
str = "v1") -> None:
+        super().__init__(http_conn_id=airbyte_conn_id)
+        self.api_version: str = api_version
+
+    def wait_for_job(self, job_id: str, wait_seconds: int = 3, timeout: 
Optional[float] = None) -> None:
+        """
+        Helper method which polls a job to check if it finishes.
+
+        :param job_id: Id of the Airbyte job
+        :type job_id: str
+        :param wait_seconds: Number of seconds between checks
+        :type wait_seconds: int
+        :param timeout: How many seconds wait for job to be ready. Used only 
if ``asynchronous`` is False
+        :type timeout: float
+        """
+        state = None
+        start = time.monotonic()
+        while state not in (self.ERROR, self.SUCCEEDED, self.CANCELLED):
+            if timeout and start + timeout < time.monotonic():
+                raise AirflowException(f"Timeout: Airbyte job {job_id} is not 
ready after {timeout}s")
+            time.sleep(wait_seconds)
+            try:
+                job = self.get_job(job_id=job_id)
+                state = job.json()["job"]["status"]
+            except AirflowException as err:
+                self.log.info("Retrying. Airbyte API returned server error 
when waiting for job: %s", err)

Review comment:
       one thing that sticks out about this is, how do you know it's an error 
that should be a retry?
   
   for example what if the requested job_id doesn't exist?
   
   maybe that's just what you have to accept when using the httphook?
   
   there is actually a `run_with_advanced_retry` which might be better and give 
you finer grain if desired.  but i don't think that needs to block this from 
being merged if you want to leave it 
   
   perhaps @turbaszek has a thought here?
   
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to