Taragolis commented on code in PR #29168: URL: https://github.com/apache/airflow/pull/29168#discussion_r1087077136
########## airflow/providers/amazon/aws/operators/neptune.py: ########## @@ -0,0 +1,152 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.neptune import NeptuneHook +from airflow.providers.amazon.aws.utils.neptune import NeptuneDbType + +if TYPE_CHECKING: + from airflow.utils.context import Context + + +class NeptuneStartDbOperator(BaseOperator): + """ + Starts a Neptune DB cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:NeptuneStartDbOperator` + + :param db_identifier: The AWS identifier of the DB to start + :param db_type: Type of the DB - either "instance" or "cluster" (default: "cluster") + :param aws_conn_id: The Airflow connection used for AWS credentials. (default: "aws_default") + :param wait_for_completion: If True, waits for DB to start. (default: True) + + Note: In boto3 supports starting db operator only for cluster and not for instance db_type. + So, default is maintained as Cluster, however it can be extended once instance db_type is available, + similar to RDS database implementation + """ + + template_fields = ("db_identifier", "db_type") + STATES_FOR_STARTING = ["available", "starting"] + + def __init__( + self, + *, + db_identifier: str, + db_type: NeptuneDbType | str = NeptuneDbType.CLUSTER, + aws_conn_id: str = "aws_default", + region_name: str = "us-east-1", Review Comment: We should define default region_name ```suggestion region_name: str | None = None, ``` ########## airflow/providers/amazon/aws/operators/neptune.py: ########## @@ -0,0 +1,152 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.neptune import NeptuneHook +from airflow.providers.amazon.aws.utils.neptune import NeptuneDbType + +if TYPE_CHECKING: + from airflow.utils.context import Context + + +class NeptuneStartDbOperator(BaseOperator): + """ + Starts a Neptune DB cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:NeptuneStartDbOperator` + + :param db_identifier: The AWS identifier of the DB to start + :param db_type: Type of the DB - either "instance" or "cluster" (default: "cluster") + :param aws_conn_id: The Airflow connection used for AWS credentials. (default: "aws_default") + :param wait_for_completion: If True, waits for DB to start. (default: True) + + Note: In boto3 supports starting db operator only for cluster and not for instance db_type. + So, default is maintained as Cluster, however it can be extended once instance db_type is available, + similar to RDS database implementation + """ + + template_fields = ("db_identifier", "db_type") + STATES_FOR_STARTING = ["available", "starting"] + + def __init__( + self, + *, + db_identifier: str, + db_type: NeptuneDbType | str = NeptuneDbType.CLUSTER, + aws_conn_id: str = "aws_default", + region_name: str = "us-east-1", + wait_for_completion: bool = True, + **kwargs, + ): + super().__init__(**kwargs) + self.db_identifier = db_identifier + self.hook = NeptuneHook(aws_conn_id=aws_conn_id, region_name=region_name) + self.db_identifier = db_identifier + self.db_type = db_type + self.aws_conn_id = aws_conn_id + self.wait_for_completion = wait_for_completion + + def execute(self, context: Context) -> str: + self.db_type = NeptuneDbType(self.db_type) + start_db_response = None + if ( + self.hook.get_db_cluster_state(self.db_identifier) + not in NeptuneStartDbOperator.STATES_FOR_STARTING + ): + self._start_db() + + if self.wait_for_completion: + self._wait_until_db_available() + return json.dumps(start_db_response, default=str) + + def _start_db(self): + self.log.info("Starting DB %s '%s'", self.db_type.value, self.db_identifier) + self.hook.conn.start_db_cluster(DBClusterIdentifier=self.db_identifier) + + def _wait_until_db_available(self): + self.log.info("Waiting for DB %s to reach 'available' state", self.db_type.value) + self.hook.wait_for_db_cluster_state(self.db_identifier, target_state="available") + + +class NeptuneStopDbOperator(BaseOperator): + """ + Stops a Neptune DB cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:NeptuneStopDbOperator` + + :param db_identifier: The AWS identifier of the DB to start + :param db_type: Type of the DB - either "instance" or "cluster" (default: "cluster") + :param aws_conn_id: The Airflow connection used for AWS credentials. (default: "aws_default") + :param wait_for_completion: If True, waits for DB to start. (default: True) + + Note: In boto3 supports starting db operator only for cluster and not for instance db_type. + So, default is maintained as Cluster, however it can be extended once instance db_type is available, + similar to RDS database implementation + """ + + template_fields = ("db_identifier", "db_type") + STATES_FOR_STOPPING = ["stopped", "stopping"] + + def __init__( + self, + *, + db_identifier: str, + db_type: NeptuneDbType | str = NeptuneDbType.INSTANCE, + aws_conn_id: str = "aws_default", + region_name: str = "us-east-1", Review Comment: Same as above ########## airflow/providers/amazon/aws/operators/neptune.py: ########## @@ -0,0 +1,152 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.neptune import NeptuneHook +from airflow.providers.amazon.aws.utils.neptune import NeptuneDbType + +if TYPE_CHECKING: + from airflow.utils.context import Context + + +class NeptuneStartDbOperator(BaseOperator): + """ + Starts a Neptune DB cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:NeptuneStartDbOperator` + + :param db_identifier: The AWS identifier of the DB to start + :param db_type: Type of the DB - either "instance" or "cluster" (default: "cluster") + :param aws_conn_id: The Airflow connection used for AWS credentials. (default: "aws_default") + :param wait_for_completion: If True, waits for DB to start. (default: True) + + Note: In boto3 supports starting db operator only for cluster and not for instance db_type. + So, default is maintained as Cluster, however it can be extended once instance db_type is available, + similar to RDS database implementation + """ + + template_fields = ("db_identifier", "db_type") + STATES_FOR_STARTING = ["available", "starting"] + + def __init__( + self, + *, + db_identifier: str, + db_type: NeptuneDbType | str = NeptuneDbType.CLUSTER, + aws_conn_id: str = "aws_default", + region_name: str = "us-east-1", + wait_for_completion: bool = True, + **kwargs, + ): + super().__init__(**kwargs) + self.db_identifier = db_identifier + self.hook = NeptuneHook(aws_conn_id=aws_conn_id, region_name=region_name) Review Comment: You should move hook definition to `@cached_property`, e.g.: https://github.com/apache/airflow/blob/b314db9880ca9936cbe82e5527bf80d34d3d7861/airflow/providers/amazon/aws/operators/athena.py#L102-L107 ########## airflow/providers/amazon/aws/hooks/neptune.py: ########## @@ -0,0 +1,113 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Interact with AWS Neptune.""" +from __future__ import annotations + +import time +from typing import Callable + +from airflow.exceptions import AirflowException, AirflowNotFoundException +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook + + +class NeptuneHook(AwsBaseHook): + """ + Interact with AWS Neptune using proper client from the boto3 library. + + Hook attribute `conn` has all methods that listed in documentation + + .. seealso:: + - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/neptune.html + - https://docs.aws.amazon.com/neptune/index.html + + Additional arguments (such as ``aws_conn_id`` or ``region_name``) may be specified and + are passed down to the underlying AwsBaseHook. + + .. seealso:: + :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsGenericHook` + + :param aws_conn_id: The Airflow connection used for AWS credentials. + """ + + def __init__(self, *args, **kwargs) -> None: + kwargs["client_type"] = "neptune" + super().__init__(*args, **kwargs) + + def get_db_cluster_state(self, db_cluster_id: str) -> str: + """ + Get the current state of a DB cluster. + + :param db_cluster_id: The ID of the target DB cluster. + :return: Returns the status of the DB cluster as a string (eg. "available") + :rtype: str + :raises AirflowNotFoundException: If the DB cluster does not exist. + """ + try: + response = self.conn.describe_db_clusters(DBClusterIdentifier=db_cluster_id) + except self.conn.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "DBClusterNotFoundFault": + raise AirflowNotFoundException(e) + raise e + return response["DBClusters"][0]["Status"].lower() + + def wait_for_db_cluster_state( + self, db_cluster_id: str, target_state: str, check_interval: int = 30, max_attempts: int = 40 + ) -> None: + """ + Polls until the target state is reached. + An error is raised after a max number of attempts. + + :param db_cluster_id: The ID of the target DB cluster. + :param target_state: Wait until this state is reached + :param check_interval: The amount of time in seconds to wait between attempts + :param max_attempts: The maximum number of attempts to be made + + """ + + def poke(): + return self.get_db_cluster_state(db_cluster_id) + + target_state = target_state.lower() + self._wait_for_state(poke, target_state, check_interval, max_attempts) + self.log.info("DB cluster snapshot '%s' reached the '%s' state", db_cluster_id, target_state) Review Comment: I thinks currently we have a different method for waiting operations in new hooks? @vincbeck @ferruzzi @vandonr-amz Am I right? ########## airflow/providers/amazon/aws/operators/neptune.py: ########## @@ -0,0 +1,152 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.neptune import NeptuneHook +from airflow.providers.amazon.aws.utils.neptune import NeptuneDbType + +if TYPE_CHECKING: + from airflow.utils.context import Context + + +class NeptuneStartDbOperator(BaseOperator): + """ + Starts a Neptune DB cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:NeptuneStartDbOperator` + + :param db_identifier: The AWS identifier of the DB to start + :param db_type: Type of the DB - either "instance" or "cluster" (default: "cluster") + :param aws_conn_id: The Airflow connection used for AWS credentials. (default: "aws_default") + :param wait_for_completion: If True, waits for DB to start. (default: True) + + Note: In boto3 supports starting db operator only for cluster and not for instance db_type. + So, default is maintained as Cluster, however it can be extended once instance db_type is available, + similar to RDS database implementation + """ + + template_fields = ("db_identifier", "db_type") + STATES_FOR_STARTING = ["available", "starting"] + + def __init__( + self, + *, + db_identifier: str, + db_type: NeptuneDbType | str = NeptuneDbType.CLUSTER, + aws_conn_id: str = "aws_default", + region_name: str = "us-east-1", + wait_for_completion: bool = True, + **kwargs, + ): + super().__init__(**kwargs) + self.db_identifier = db_identifier + self.hook = NeptuneHook(aws_conn_id=aws_conn_id, region_name=region_name) + self.db_identifier = db_identifier + self.db_type = db_type + self.aws_conn_id = aws_conn_id + self.wait_for_completion = wait_for_completion + + def execute(self, context: Context) -> str: + self.db_type = NeptuneDbType(self.db_type) + start_db_response = None + if ( + self.hook.get_db_cluster_state(self.db_identifier) + not in NeptuneStartDbOperator.STATES_FOR_STARTING + ): + self._start_db() + + if self.wait_for_completion: + self._wait_until_db_available() + return json.dumps(start_db_response, default=str) + + def _start_db(self): + self.log.info("Starting DB %s '%s'", self.db_type.value, self.db_identifier) + self.hook.conn.start_db_cluster(DBClusterIdentifier=self.db_identifier) + + def _wait_until_db_available(self): + self.log.info("Waiting for DB %s to reach 'available' state", self.db_type.value) + self.hook.wait_for_db_cluster_state(self.db_identifier, target_state="available") + + +class NeptuneStopDbOperator(BaseOperator): + """ + Stops a Neptune DB cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:NeptuneStopDbOperator` + + :param db_identifier: The AWS identifier of the DB to start + :param db_type: Type of the DB - either "instance" or "cluster" (default: "cluster") + :param aws_conn_id: The Airflow connection used for AWS credentials. (default: "aws_default") + :param wait_for_completion: If True, waits for DB to start. (default: True) + + Note: In boto3 supports starting db operator only for cluster and not for instance db_type. + So, default is maintained as Cluster, however it can be extended once instance db_type is available, + similar to RDS database implementation + """ + + template_fields = ("db_identifier", "db_type") + STATES_FOR_STOPPING = ["stopped", "stopping"] + + def __init__( + self, + *, + db_identifier: str, + db_type: NeptuneDbType | str = NeptuneDbType.INSTANCE, + aws_conn_id: str = "aws_default", + region_name: str = "us-east-1", + wait_for_completion: bool = True, + **kwargs, + ): + super().__init__(**kwargs) + self.hook = NeptuneHook(aws_conn_id=aws_conn_id, region_name=region_name) + self.db_identifier = db_identifier + self.db_type = db_type + self.aws_conn_id = aws_conn_id + self.wait_for_completion = wait_for_completion + + def execute(self, context: Context) -> str: + self.db_type = NeptuneDbType(self.db_type) + stop_db_response = None + if ( + self.hook.get_db_cluster_state(self.db_identifier) + not in NeptuneStopDbOperator.STATES_FOR_STOPPING + ): + stop_db_response = self._stop_db() + if self.wait_for_completion: + self._wait_until_db_stopped() + return json.dumps(stop_db_response, default=str) + + def _stop_db(self): + self.log.info("Stopping DB %s '%s'", self.db_type.value, self.db_identifier) + response = self.hook.conn.stop_db_cluster(DBClusterIdentifier=self.db_identifier) + return response + + def _wait_until_db_stopped(self): + self.log.info("Waiting for DB %s to reach 'stopped' state", self.db_type.value) + self.hook.wait_for_db_cluster_state(self.db_identifier, target_state="stopped") + + +__all__ = ["NeptuneStartDbOperator", "NeptuneStopDbOperator"] Review Comment: If you want to include `__all__` include it in the top of the module ########## airflow/providers/amazon/aws/operators/neptune.py: ########## @@ -0,0 +1,152 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.neptune import NeptuneHook +from airflow.providers.amazon.aws.utils.neptune import NeptuneDbType + +if TYPE_CHECKING: + from airflow.utils.context import Context + + +class NeptuneStartDbOperator(BaseOperator): + """ + Starts a Neptune DB cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:NeptuneStartDbOperator` + + :param db_identifier: The AWS identifier of the DB to start + :param db_type: Type of the DB - either "instance" or "cluster" (default: "cluster") + :param aws_conn_id: The Airflow connection used for AWS credentials. (default: "aws_default") + :param wait_for_completion: If True, waits for DB to start. (default: True) + + Note: In boto3 supports starting db operator only for cluster and not for instance db_type. + So, default is maintained as Cluster, however it can be extended once instance db_type is available, + similar to RDS database implementation + """ + + template_fields = ("db_identifier", "db_type") + STATES_FOR_STARTING = ["available", "starting"] + + def __init__( + self, + *, + db_identifier: str, + db_type: NeptuneDbType | str = NeptuneDbType.CLUSTER, + aws_conn_id: str = "aws_default", Review Comment: `aws_conn_id` could be None in this case default boto3 strategy would use. ```suggestion aws_conn_id: str | None = "aws_default", ``` ########## airflow/providers/amazon/aws/hooks/neptune.py: ########## @@ -0,0 +1,113 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Interact with AWS Neptune.""" +from __future__ import annotations + +import time +from typing import Callable + +from airflow.exceptions import AirflowException, AirflowNotFoundException +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook + + +class NeptuneHook(AwsBaseHook): + """ + Interact with AWS Neptune using proper client from the boto3 library. + + Hook attribute `conn` has all methods that listed in documentation + + .. seealso:: + - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/neptune.html + - https://docs.aws.amazon.com/neptune/index.html + + Additional arguments (such as ``aws_conn_id`` or ``region_name``) may be specified and + are passed down to the underlying AwsBaseHook. + + .. seealso:: + :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsGenericHook` + + :param aws_conn_id: The Airflow connection used for AWS credentials. + """ Review Comment: Please define Hook docsting by the same way it is implemented in other boto3-hooks. See example https://github.com/apache/airflow/blob/b314db9880ca9936cbe82e5527bf80d34d3d7861/airflow/providers/amazon/aws/hooks/ecs.py#L89-L101 You could check in [`main` branch documentation](http://apache-airflow-docs.s3-website.eu-central-1.amazonaws.com/docs/apache-airflow-providers-amazon/latest/_api/airflow/providers/amazon/aws/hooks/ecs/index.html#airflow.providers.amazon.aws.hooks.ecs.EcsHook) how it looks like. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
