igorborgest commented on code in PR #24057: URL: https://github.com/apache/airflow/pull/24057#discussion_r898461818
########## airflow/providers/amazon/aws/operators/appflow.py: ########## @@ -0,0 +1,550 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import copy +import json +import sys +from datetime import datetime, timedelta, timezone +from time import sleep +from typing import TYPE_CHECKING, List, Optional, cast + +if sys.version_info >= (3, 8): + from functools import cached_property +else: + from cached_property import cached_property + +from airflow.exceptions import AirflowException +from airflow.models import BaseOperator +from airflow.operators.python import ShortCircuitOperator +from airflow.providers.amazon.aws.hooks.appflow import AppflowHook +from airflow.providers.amazon.aws.utils import datetime_to_epoch_ms, get_airflow_version + +if TYPE_CHECKING: + from mypy_boto3_appflow.type_defs import ( + DescribeFlowExecutionRecordsResponseTypeDef, + DescribeFlowResponseTypeDef, + ExecutionRecordTypeDef, + TaskTypeDef, + ) + + from airflow.utils.context import Context + +EVENTUAL_CONSISTENCY_OFFSET: int = 15 # seconds +EVENTUAL_CONSISTENCY_POLLING: int = 10 # seconds +SUPPORTED_SOURCES = {"salesforce", "zendesk"} +MANDATORY_FILTER_DATE_MSG = "The filter_date argument is mandatory for {entity}!" +NOT_SUPPORTED_SOURCE_MSG = "Source {source} is not supported for {entity}!" + + +class AppflowBaseOperator(BaseOperator): + """ + Amazon Appflow Base Operator class (not supposed to be used directly in DAGs). + + :param source: The source name (Supported: salesforce, zendesk) + :param name: The flow name + :param flow_update: A boolean to enable/disable the a flow update before the run + :param source_field: The field name to apply filters + :param filter_date: The date value (or template) to be used in filters. + :param poll_interval: how often in seconds to check the query status + :param aws_conn_id: aws connection to use + :param region: aws region to use + """ + + BLUE = "#2bccbd" + ui_color = BLUE + + def __init__( + self, + source: str, + name: str, + flow_update: bool, + source_field: Optional[str] = None, + filter_date: Optional[str] = None, + poll_interval: int = 20, + aws_conn_id: str = "aws_default", + region: Optional[str] = None, + **kwargs, + ) -> None: + super().__init__(**kwargs) + if source not in SUPPORTED_SOURCES: + raise ValueError(f"{source} is not a supported source (options: {SUPPORTED_SOURCES})!") + self.filter_date = filter_date + self.name = name + self.source = source + self.source_field = source_field + self.poll_interval = poll_interval + self.aws_conn_id = aws_conn_id + self.region = region + self.flow_update = flow_update + + @cached_property + def hook(self) -> AppflowHook: + """Create and return an AppflowHook.""" + return AppflowHook(aws_conn_id=self.aws_conn_id, region_name=self.region) + + def execute(self, context: "Context") -> None: + self.filter_date_parsed: Optional[datetime] = ( + datetime.fromisoformat(self.filter_date) if self.filter_date else None + ) + if self.flow_update: + self._update_flow() + self._run_flow(context) + + def _get_connector_type(self, response: "DescribeFlowResponseTypeDef") -> str: + connector_type = response["sourceFlowConfig"]["connectorType"] + if self.source != connector_type.lower(): + raise ValueError(f"Incompatible source ({self.source} and connector type ({connector_type})!") + return connector_type + + def _update_flow(self) -> None: + response = self.hook.conn.describe_flow(flowName=self.name) + connector_type = self._get_connector_type(response) + + # cleanup + tasks: List["TaskTypeDef"] = [] + for task in response["tasks"]: + if ( + task["taskType"] == "Filter" + and task.get("connectorOperator", {}).get(connector_type) != "PROJECTION" + ): + self.log.info("Removing task: %s", task) + else: + tasks.append(task) # List of non-filter tasks + + self._add_filter(connector_type, tasks) + + # Clean up to force on-demand trigger + trigger_config = copy.deepcopy(response["triggerConfig"]) + del trigger_config["triggerProperties"] + + self.hook.conn.update_flow( + flowName=response["flowName"], + destinationFlowConfigList=response["destinationFlowConfigList"], + sourceFlowConfig=response["sourceFlowConfig"], + triggerConfig=trigger_config, + description=response.get("description", "Flow description."), + tasks=tasks, + ) + + def _add_filter(self, connector_type: str, tasks: List["TaskTypeDef"]) -> None: # Interface + pass + + def _run_flow(self, context) -> str: + ts_before: datetime = datetime.now(timezone.utc) Review Comment: I can't use `time.monotonic()` in that case because it is not an elapsed time measure situation. I really need to get the UTC time to compare to the UTC time returned further by Amazon AppFlow. Is it OK? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
