ferruzzi commented on code in PR #24057:
URL: https://github.com/apache/airflow/pull/24057#discussion_r886138033


##########
airflow/providers/amazon/aws/hooks/appflow.py:
##########
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import TYPE_CHECKING
+
+from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
+
+if TYPE_CHECKING:
+    from mypy_boto3_appflow.client import AppflowClient
+
+

Review Comment:
   Nice clean hook, thanks for that!  We are going to be cleaning up some of 
the existing hooks that have "just pass the values through" methods. :+1: 



##########
airflow/providers/amazon/aws/operators/appflow.py:
##########
@@ -0,0 +1,562 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import copy
+import json
+import sys
+from datetime import datetime, timedelta, timezone
+from time import sleep
+from typing import TYPE_CHECKING, List, Optional, cast
+
+if sys.version_info >= (3, 8):
+    from functools import cached_property
+else:
+    from cached_property import cached_property
+
+from airflow.models import BaseOperator
+from airflow.operators.python import ShortCircuitOperator
+from airflow.providers.amazon.aws.hooks.appflow import AppflowHook
+
+if TYPE_CHECKING:
+    from mypy_boto3_appflow.client import AppflowClient
+    from mypy_boto3_appflow.type_defs import (
+        DescribeFlowExecutionRecordsResponseTypeDef,
+        ExecutionRecordTypeDef,
+        TaskTypeDef,
+    )
+
+    from airflow.utils.context import Context
+
+EVENTUAL_CONSISTENCY_OFFSET: int = 15  # seconds
+EVENTUAL_CONSISTENCY_POLLING: int = 10  # seconds
+SUPPORTED_SOURCES = {"salesforce", "zendesk"}
+
+
+class AppflowOperatorException(Exception):
+    """Alias for Exception."""
+
+
+class AppflowOperatorBase(BaseOperator):
+    """Amazon Appflow Base Operator class (not supposed to be used directly in 
DAGs)."""
+
+    BLUE = "#2bccbd"
+    ui_color = BLUE
+
+    def __init__(
+        self,
+        source: str,
+        name: str,
+        flow_update: bool,
+        source_field: Optional[str] = None,
+        dt: Optional[str] = None,
+        poll_interval: int = 20,
+        aws_conn_id: Optional[str] = "aws_default",
+        region: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        if source not in SUPPORTED_SOURCES:
+            raise AppflowOperatorException(
+                f"{source} is not a supported source (options: 
{SUPPORTED_SOURCES})!"
+            )
+        self.dt = dt
+        self._name = name
+        self._source = source
+        self._source_field = source_field
+        self._poll_interval = poll_interval
+        self._aws_conn_id = aws_conn_id
+        self._region = region
+        self._flow_update = flow_update
+
+    @cached_property
+    def hook(self) -> AppflowHook:
+        """Create and return an AppflowHook."""
+        return AppflowHook(aws_conn_id=self.aws_conn_id, 
region_name=self._region)
+
+    @staticmethod
+    def _dt_to_epoch_str(dt: datetime) -> str:
+        text = str(int(dt.timestamp() * 1000))
+        return text

Review Comment:
   Do you think this may be useful to others?  Maybe consider moving it to the 
utils module.  I'm fine with it either way.



##########
airflow/providers/amazon/aws/operators/appflow.py:
##########
@@ -0,0 +1,562 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import copy
+import json
+import sys
+from datetime import datetime, timedelta, timezone
+from time import sleep
+from typing import TYPE_CHECKING, List, Optional, cast
+
+if sys.version_info >= (3, 8):
+    from functools import cached_property
+else:
+    from cached_property import cached_property
+
+from airflow.models import BaseOperator
+from airflow.operators.python import ShortCircuitOperator
+from airflow.providers.amazon.aws.hooks.appflow import AppflowHook
+
+if TYPE_CHECKING:
+    from mypy_boto3_appflow.client import AppflowClient
+    from mypy_boto3_appflow.type_defs import (
+        DescribeFlowExecutionRecordsResponseTypeDef,
+        ExecutionRecordTypeDef,
+        TaskTypeDef,
+    )
+
+    from airflow.utils.context import Context
+
+EVENTUAL_CONSISTENCY_OFFSET: int = 15  # seconds
+EVENTUAL_CONSISTENCY_POLLING: int = 10  # seconds
+SUPPORTED_SOURCES = {"salesforce", "zendesk"}
+
+
+class AppflowOperatorException(Exception):
+    """Alias for Exception."""
+
+
+class AppflowOperatorBase(BaseOperator):
+    """Amazon Appflow Base Operator class (not supposed to be used directly in 
DAGs)."""
+
+    BLUE = "#2bccbd"
+    ui_color = BLUE
+
+    def __init__(
+        self,
+        source: str,
+        name: str,
+        flow_update: bool,
+        source_field: Optional[str] = None,
+        dt: Optional[str] = None,
+        poll_interval: int = 20,
+        aws_conn_id: Optional[str] = "aws_default",
+        region: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        if source not in SUPPORTED_SOURCES:
+            raise AppflowOperatorException(
+                f"{source} is not a supported source (options: 
{SUPPORTED_SOURCES})!"
+            )
+        self.dt = dt
+        self._name = name
+        self._source = source
+        self._source_field = source_field
+        self._poll_interval = poll_interval
+        self._aws_conn_id = aws_conn_id
+        self._region = region
+        self._flow_update = flow_update
+
+    @cached_property
+    def hook(self) -> AppflowHook:
+        """Create and return an AppflowHook."""
+        return AppflowHook(aws_conn_id=self.aws_conn_id, 
region_name=self._region)
+
+    @staticmethod
+    def _dt_to_epoch_str(dt: datetime) -> str:
+        text = str(int(dt.timestamp() * 1000))
+        return text
+
+    def _get_connector_type(self) -> str:
+        connector_type = self._response["sourceFlowConfig"]["connectorType"]
+        if (self.source == "salesforce" and connector_type != "Salesforce") or 
(
+            self.source == "zendesk" and connector_type != "Zendesk"
+        ):
+            raise AppflowOperatorException(
+                f"Incompatible source ({self.source} and connector type 
({connector_type})!"
+            )
+        return connector_type
+
+    def execute(self, context: "Context") -> None:
+        self._af_client: "AppflowClient" = self.hook.conn
+        self._dt_parsed: Optional[datetime] = datetime.fromisoformat(self.dt) 
if self.dt else None
+        if self._flow_update:
+            self._update_flow()
+        self._run_flow(context)
+
+    def _update_flow(self) -> None:
+        self._response = self._af_client.describe_flow(flowName=self.name)
+        self._connector_type = self._get_connector_type()

Review Comment:
   Related to above, that would make these two lines:
   
   ```
   client = self.hook.conn
   response = client.describe_flow(flowName=self.name)
   connector_type = self._get_connector_type()
   ```
   
   which is much easier to read.



##########
airflow/providers/amazon/aws/operators/appflow.py:
##########
@@ -0,0 +1,562 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import copy
+import json
+import sys
+from datetime import datetime, timedelta, timezone
+from time import sleep
+from typing import TYPE_CHECKING, List, Optional, cast
+
+if sys.version_info >= (3, 8):
+    from functools import cached_property
+else:
+    from cached_property import cached_property
+
+from airflow.models import BaseOperator
+from airflow.operators.python import ShortCircuitOperator
+from airflow.providers.amazon.aws.hooks.appflow import AppflowHook
+
+if TYPE_CHECKING:
+    from mypy_boto3_appflow.client import AppflowClient
+    from mypy_boto3_appflow.type_defs import (
+        DescribeFlowExecutionRecordsResponseTypeDef,
+        ExecutionRecordTypeDef,
+        TaskTypeDef,
+    )
+
+    from airflow.utils.context import Context
+
+EVENTUAL_CONSISTENCY_OFFSET: int = 15  # seconds
+EVENTUAL_CONSISTENCY_POLLING: int = 10  # seconds
+SUPPORTED_SOURCES = {"salesforce", "zendesk"}
+
+
+class AppflowOperatorException(Exception):
+    """Alias for Exception."""
+
+
+class AppflowOperatorBase(BaseOperator):
+    """Amazon Appflow Base Operator class (not supposed to be used directly in 
DAGs)."""
+
+    BLUE = "#2bccbd"
+    ui_color = BLUE
+
+    def __init__(
+        self,
+        source: str,
+        name: str,
+        flow_update: bool,
+        source_field: Optional[str] = None,
+        dt: Optional[str] = None,
+        poll_interval: int = 20,
+        aws_conn_id: Optional[str] = "aws_default",
+        region: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        if source not in SUPPORTED_SOURCES:
+            raise AppflowOperatorException(
+                f"{source} is not a supported source (options: 
{SUPPORTED_SOURCES})!"
+            )
+        self.dt = dt
+        self._name = name
+        self._source = source
+        self._source_field = source_field
+        self._poll_interval = poll_interval
+        self._aws_conn_id = aws_conn_id
+        self._region = region
+        self._flow_update = flow_update
+
+    @cached_property
+    def hook(self) -> AppflowHook:
+        """Create and return an AppflowHook."""
+        return AppflowHook(aws_conn_id=self.aws_conn_id, 
region_name=self._region)
+
+    @staticmethod
+    def _dt_to_epoch_str(dt: datetime) -> str:
+        text = str(int(dt.timestamp() * 1000))
+        return text
+
+    def _get_connector_type(self) -> str:
+        connector_type = self._response["sourceFlowConfig"]["connectorType"]
+        if (self.source == "salesforce" and connector_type != "Salesforce") or 
(
+            self.source == "zendesk" and connector_type != "Zendesk"
+        ):

Review Comment:
   Consider making this more future proof with `if self.source != 
connector_type.lower():`.  If you are also specifically trying to enforce that 
it's supported then perhaps `if self.source in SUPPORTED_SOURCES and 
self.source != connector_type.lower():`, But I don't think you need the `in` 
check here since you catch that in the init.



##########
airflow/providers/amazon/aws/operators/appflow.py:
##########
@@ -0,0 +1,562 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import copy
+import json
+import sys
+from datetime import datetime, timedelta, timezone
+from time import sleep
+from typing import TYPE_CHECKING, List, Optional, cast
+
+if sys.version_info >= (3, 8):
+    from functools import cached_property
+else:
+    from cached_property import cached_property
+
+from airflow.models import BaseOperator
+from airflow.operators.python import ShortCircuitOperator
+from airflow.providers.amazon.aws.hooks.appflow import AppflowHook
+
+if TYPE_CHECKING:
+    from mypy_boto3_appflow.client import AppflowClient
+    from mypy_boto3_appflow.type_defs import (
+        DescribeFlowExecutionRecordsResponseTypeDef,
+        ExecutionRecordTypeDef,
+        TaskTypeDef,
+    )
+
+    from airflow.utils.context import Context
+
+EVENTUAL_CONSISTENCY_OFFSET: int = 15  # seconds
+EVENTUAL_CONSISTENCY_POLLING: int = 10  # seconds
+SUPPORTED_SOURCES = {"salesforce", "zendesk"}
+
+
+class AppflowOperatorException(Exception):
+    """Alias for Exception."""
+
+
+class AppflowOperatorBase(BaseOperator):
+    """Amazon Appflow Base Operator class (not supposed to be used directly in 
DAGs)."""
+
+    BLUE = "#2bccbd"
+    ui_color = BLUE
+
+    def __init__(
+        self,
+        source: str,
+        name: str,
+        flow_update: bool,
+        source_field: Optional[str] = None,
+        dt: Optional[str] = None,
+        poll_interval: int = 20,
+        aws_conn_id: Optional[str] = "aws_default",
+        region: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        if source not in SUPPORTED_SOURCES:
+            raise AppflowOperatorException(
+                f"{source} is not a supported source (options: 
{SUPPORTED_SOURCES})!"
+            )
+        self.dt = dt
+        self._name = name
+        self._source = source
+        self._source_field = source_field
+        self._poll_interval = poll_interval
+        self._aws_conn_id = aws_conn_id
+        self._region = region
+        self._flow_update = flow_update
+
+    @cached_property
+    def hook(self) -> AppflowHook:
+        """Create and return an AppflowHook."""
+        return AppflowHook(aws_conn_id=self.aws_conn_id, 
region_name=self._region)
+
+    @staticmethod
+    def _dt_to_epoch_str(dt: datetime) -> str:
+        text = str(int(dt.timestamp() * 1000))
+        return text
+
+    def _get_connector_type(self) -> str:
+        connector_type = self._response["sourceFlowConfig"]["connectorType"]
+        if (self.source == "salesforce" and connector_type != "Salesforce") or 
(
+            self.source == "zendesk" and connector_type != "Zendesk"
+        ):
+            raise AppflowOperatorException(
+                f"Incompatible source ({self.source} and connector type 
({connector_type})!"
+            )
+        return connector_type
+
+    def execute(self, context: "Context") -> None:
+        self._af_client: "AppflowClient" = self.hook.conn
+        self._dt_parsed: Optional[datetime] = datetime.fromisoformat(self.dt) 
if self.dt else None
+        if self._flow_update:

Review Comment:
   I get what you are thinking, but in general we don't bother with the 
"private" variable naming convention in class fields.  ie `self._af_client` can 
just be `self.af_client`.  I'd personally suggest going one step further, drop 
the `self.` and just use `client = self.hook.conn` as needed unless there is a 
specific reason not to.



##########
airflow/providers/amazon/aws/example_dags/example_appflow.py:
##########
@@ -0,0 +1,101 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from datetime import datetime
+
+from airflow import DAG
+from airflow.operators.bash import BashOperator
+from airflow.providers.amazon.aws.operators.appflow import (
+    AppflowRecordsShortCircuit,
+    AppflowRunAfterOperator,
+    AppflowRunBeforeOperator,
+    AppflowRunDailyOperator,
+    AppflowRunFullOperator,
+    AppflowRunOperator,
+)
+
+SOURCE_NAME = "salesforce"
+FLOW_NAME = "salesforce-campaign"
+
+with DAG(
+    "example_appflow",
+    schedule_interval=None,
+    start_date=datetime(2022, 1, 1),
+    catchup=False,
+    tags=["example"],
+) as dag:
+
+    # [START howto_appflow_run]

Review Comment:
   To be fair, we just finished cleaning that up.  But yes, we should 
definitely push a doc containing all that standardizing.



##########
airflow/providers/amazon/aws/example_dags/example_appflow.py:
##########
@@ -0,0 +1,101 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from datetime import datetime
+
+from airflow import DAG
+from airflow.operators.bash import BashOperator
+from airflow.providers.amazon.aws.operators.appflow import (
+    AppflowRecordsShortCircuit,
+    AppflowRunAfterOperator,
+    AppflowRunBeforeOperator,
+    AppflowRunDailyOperator,
+    AppflowRunFullOperator,
+    AppflowRunOperator,
+)
+
+SOURCE_NAME = "salesforce"
+FLOW_NAME = "salesforce-campaign"
+
+with DAG(
+    "example_appflow",
+    schedule_interval=None,
+    start_date=datetime(2022, 1, 1),
+    catchup=False,
+    tags=["example"],
+) as dag:
+
+    # [START howto_appflow_run]
+    run = AppflowRunOperator(
+        task_id="campaign-dump",

Review Comment:
   For all tasks in the example DAGs, the value of task_id should match.  For 
example, please use either:
   ```
   run = AppflowRunOperator(
           task_id='run'
   ```
   
   or
   ```
   campaign_dump = AppflowRunOperator(
           task_id='campaign_dump'
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to