kazanzhy commented on a change in pull request #20907: URL: https://github.com/apache/airflow/pull/20907#discussion_r808553527
########## File path: airflow/providers/amazon/aws/operators/rds.py ########## @@ -0,0 +1,605 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import time +from typing import TYPE_CHECKING, List, Optional, Sequence + +from mypy_boto3_rds.type_defs import TagTypeDef + +from airflow.exceptions import AirflowException +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.rds import RdsHook +from airflow.providers.amazon.aws.utils.rds import RdsDbType + +if TYPE_CHECKING: + from airflow.utils.context import Context + + +class BaseRdsOperator(BaseOperator): + """Base operator that implements common functions for all operators""" + + ui_color = "#eeaa88" + ui_fgcolor = "#ffffff" + + def __init__(self, *args, aws_conn_id: str = "aws_conn_id", hook_params: Optional[dict] = None, **kwargs): + hook_params = hook_params or {} + self.hook = RdsHook(aws_conn_id=aws_conn_id, **hook_params) + super().__init__(*args, **kwargs) + + self._wait_interval = 60 # seconds + + def _describe_item(self, **kwargs) -> list: + """Returns information about target item: snapshot, task or event""" + raise NotImplementedError + + def _await_termination( + self, + wait_statuses: Optional[List[str]] = None, + ok_statuses: Optional[List[str]] = None, + error_statuses: Optional[List[str]] = None, + **kwargs, + ) -> list: + """ + Continuously gets item description from `_describe_item()` and waits until: + - status is in `wait_statuses` + - status not in `ok_statuses` and `error_statuses` + - `_describe_item()` returns non-empty list + """ + while True: + items = self._describe_item(**kwargs) + + if len(items) == 0: + break + elif len(items) > 1: + raise AirflowException(f"There is more than one item with the same identifier: {items}") + + if wait_statuses and items[0]['Status'] in wait_statuses: + continue + elif ok_statuses and items[0]['Status'] in ok_statuses: + break + elif error_statuses and items[0]['Status'] in error_statuses: + raise AirflowException(f"All items have error statuses: {items}") + + time.sleep(self._wait_interval) + + return items + + def execute(self, context: 'Context') -> str: + """Different implementations for snapshots, tasks and events""" + raise NotImplementedError + + def on_kill(self) -> None: + """Different implementations for snapshots, tasks and events""" + raise NotImplementedError + + +class RdsCreateDbSnapshotOperator(BaseRdsOperator): + """ + Creates a snapshot of a DB instance or DB cluster. + The source DB instance or cluster must be in the available or storage-optimization state. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:RdsCreateDbSnapshotOperator` + + :param db_type: Type of the DB - either "instance" or "cluster" + :type db_type: RDSDbType + :param db_identifier: The identifier of the instance or cluster that you want to create the snapshot of + :type db_identifier: str + :param db_snapshot_identifier: The identifier for the DB snapshot + :type db_snapshot_identifier: str + :param tags: A list of tags in format `[{"Key": "something", "Value": "something"},] + `USER Tagging <https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_Tagging.html>`__ + :type tags: Sequence[TagTypeDef] or None + """ + + template_fields = ("db_snapshot_identifier", "db_instance_identifier", "tags") + + def __init__( + self, + *, + db_type: str, + db_identifier: str, + db_snapshot_identifier: str, + tags: Optional[Sequence[TagTypeDef]] = None, + aws_conn_id: str = "aws_conn_id", + **kwargs, + ): + super().__init__(aws_conn_id=aws_conn_id, **kwargs) + self.db_type = RdsDbType(db_type) + self.db_identifier = db_identifier + self.db_snapshot_identifier = db_snapshot_identifier + self.tags = tags or [] + + def _describe_item(self, **kwargs) -> list: + """Returns snapshot info""" + if self.db_type.value == "instance": + db_snapshots = self.hook.conn.describe_db_snapshots( + DBInstanceIdentifier=self.db_identifier, + DBSnapshotIdentifier=self.db_snapshot_identifier, + **kwargs, + ) + return db_snapshots['DBSnapshots'] + else: + db_cluster_snapshots = self.hook.conn.describe_db_cluster_snapshots( + DBClusterIdentifier=self.db_identifier, + DBClusterSnapshotIdentifier=self.db_snapshot_identifier, + **kwargs, + ) + return db_cluster_snapshots['DBClusterSnapshots'] + + def execute(self, context: 'Context') -> str: + self.log.info( + "Starting to create snapshot of RDS %s '%s': %s", + self.db_type, + self.db_identifier, + self.db_snapshot_identifier, + ) + + if self.db_type.value == "instance": + create_db_snap = self.hook.conn.create_db_snapshot( + DBInstanceIdentifier=self.db_identifier, + DBSnapshotIdentifier=self.db_snapshot_identifier, + Tags=self.tags, + ) + create_response = json.dumps(create_db_snap, default=str) + else: + create_db_cluster_snap = self.hook.conn.create_db_cluster_snapshot( + DBClusterIdentifier=self.db_identifier, + DBClusterSnapshotIdentifier=self.db_snapshot_identifier, + Tags=self.tags, + ) + create_response = json.dumps(create_db_cluster_snap, default=str) + + self._await_termination(wait_statuses=['creating'], ok_statuses=['available']) + + return create_response + + +class RdsCopyDbSnapshotOperator(BaseRdsOperator): + """ + Copies the specified DB instance or DB cluster snapshot + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:RdsCopyDbSnapshotOperator` + + :param db_type: Type of the DB - either "instance" or "cluster" + :type db_type: RDSDbType + :param source_db_snapshot_identifier: The identifier of the source snapshot + :type source_db_snapshot_identifier: str + :param target_db_snapshot_identifier: The identifier of the target snapshot + :type target_db_snapshot_identifier: str + :param kms_key_id: The AWS KMS key identifier for an encrypted DB snapshot + :type kms_key_id: str or None + :param tags: A list of tags in format `[{"Key": "something", "Value": "something"},] + `USER Tagging <https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_Tagging.html>`__ + :type tags: Sequence[TagTypeDef] or None + :param copy_tags: Whether to copy all tags from the source snapshot to the target snapshot (default False) + :type copy_tags: bool or None + :param pre_signed_url: The URL that contains a Signature Version 4 signed request + :type pre_signed_url: str or None + :param option_group_name: The name of an option group to associate with the copy of the snapshot + Only when db_type='instance' + :type option_group_name: str or None + :param target_custom_availability_zone: The external custom Availability Zone identifier for the target + Only when db_type='instance' + :type target_custom_availability_zone: str or None + :param source_region: The ID of the region that contains the snapshot to be copied + :type source_region: str or None + """ + + template_fields = ( + "source_db_snapshot_identifier", + "target_db_snapshot_identifier", + "tags", + "pre_signed_url", + "option_group_name", + ) + + def __init__( + self, + *, + db_type: str, + source_db_snapshot_identifier: str, + target_db_snapshot_identifier: str, + kms_key_id: str = "", + tags: Optional[Sequence[TagTypeDef]] = None, + copy_tags: bool = False, + pre_signed_url: str = "", + option_group_name: str = "", + target_custom_availability_zone: str = "", + source_region: str = "", + aws_conn_id: str = "aws_default", + **kwargs, + ): + super().__init__(aws_conn_id=aws_conn_id, **kwargs) + + self.db_type = RdsDbType(db_type) + self.source_db_snapshot_identifier = source_db_snapshot_identifier + self.target_db_snapshot_identifier = target_db_snapshot_identifier + self.kms_key_id = kms_key_id + self.tags = tags or [] + self.copy_tags = copy_tags + self.pre_signed_url = pre_signed_url + self.option_group_name = option_group_name + self.target_custom_availability_zone = target_custom_availability_zone + self.source_region = source_region + + def _describe_item(self, **kwargs) -> list: + """Returns existing snapshots""" + if self.db_type.value == "instance": + db_snapshots = self.hook.conn.describe_db_snapshots( + DBSnapshotIdentifier=self.target_db_snapshot_identifier, **kwargs + ) + return db_snapshots['DBSnapshots'] + else: + db_cluster_snapshots = self.hook.conn.describe_db_cluster_snapshots( + DBClusterSnapshotIdentifier=self.target_db_snapshot_identifier, **kwargs + ) + return db_cluster_snapshots['DBClusterSnapshots'] + + def execute(self, context: 'Context') -> str: + self.log.info( + "Starting to copy snapshot '%s' as '%s'", + self.source_db_snapshot_identifier, + self.target_db_snapshot_identifier, + ) + + if self.db_type.value == "instance": Review comment: I implemented one solution. Seems it reduced some duplications -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
