[GitHub] [airflow] phanikumv commented on a diff in pull request #31787: Add Alibaba Cloud AnalyticDB Spark Support

via GitHub Mon, 19 Jun 2023 02:09:35 -0700


phanikumv commented on code in PR #31787:
URL: https://github.com/apache/airflow/pull/31787#discussion_r1233754194



##########
airflow/providers/alibaba/cloud/hooks/analyticdb_spark.py:
##########
@@ -0,0 +1,377 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import json
+from enum import Enum
+from typing import Any, Sequence
+
+from airflow.exceptions import AirflowException
+from airflow.hooks.base import BaseHook
+from airflow.utils.log.logging_mixin import LoggingMixin
+
+from alibabacloud_adb20211201.client import Client
+from alibabacloud_adb20211201.models import SubmitSparkAppRequest, 
SubmitSparkAppResponse, GetSparkAppStateRequest, \
+    GetSparkAppLogRequest, KillSparkAppRequest, GetSparkAppWebUiAddressRequest
+from alibabacloud_tea_openapi.models import Config
+
+
+class AppState(Enum):
+    """AnalyticDB Spark application states"""
+
+    SUBMITTED = "SUBMITTED"
+    STARTING = "STARTING"
+    RUNNING = "RUNNING"
+    FAILING = "FAILING"
+    FAILED = "FAILED"
+    KILLING = "KILLING"
+    KILLED = "KILLED"
+    SUCCEEDING = "SUCCEEDING"
+    COMPLETED = "COMPLETED"
+    FATAL = "FATAL"
+    UNKNOWN = "UNKNOWN"
+
+
+class AnalyticDBSparkHook(BaseHook, LoggingMixin):
+    """
+    Hook for AnalyticDB MySQL Spark through the REST API.
+
+    :param adb_spark_conn_id: The Airflow connection used for AnalyticDB MySQL 
Spark credentials.
+    :param region: AnalyticDB MySQL region you want to submit spark 
application.
+    """
+
+    TERMINAL_STATES = {
+        AppState.COMPLETED,
+        AppState.FAILED,
+        AppState.FATAL,
+        AppState.KILLED
+    }
+
+    conn_name_attr = "alibabacloud_conn_id"
+    default_conn_name = "adb_spark_default"
+    conn_type = "adb_spark"
+    hook_name = "AnalyticDB Spark"
+
+    def __init__(
+        self,
+        adb_spark_conn_id: str = "adb_spark_default",
+        region: str | None = None,
+        *args,
+        **kwargs
+    ) -> None:
+        self.adb_spark_conn_id = adb_spark_conn_id
+        self.adb_spark_conn = self.get_connection(adb_spark_conn_id)
+        self.region = self.get_default_region() if region is None else region
+        super().__init__(*args, **kwargs)
+
+    def submit_spark_app(
+        self,
+        cluster_id: str,
+        rg_name: str,
+        *args: Any,
+        **kwargs: Any
+    ) -> SubmitSparkAppResponse:
+        """
+        Perform request to submit spark application
+
+        :param cluster_id: The cluster ID of AnalyticDB MySQL 3.0 Data 
Lakehouse.
+        :param rg_name: The name of resource group in AnalyticDB MySQL 3.0 
Data Lakehouse cluster.
+        """
+        self.log.info("Submitting application")
+        try:
+            request = SubmitSparkAppRequest(
+                dbcluster_id=cluster_id,
+                resource_group_name=rg_name,
+                data=json.dumps(self.build_submit_app_data(*args, **kwargs)),
+                app_type="BATCH"
+            )
+            return self.get_adb_spark_client().submit_spark_app(request)
+        except Exception as e:
+            self.log.error(e)
+            raise AirflowException("Errors when submit spark application") 
from e
+
+    def submit_spark_sql(
+        self,
+        cluster_id: str,
+        rg_name: str,
+        *args: Any,
+        **kwargs: Any
+    ) -> SubmitSparkAppResponse:
+        """
+        Perform request to submit spark sql
+
+        :param cluster_id: The cluster ID of AnalyticDB MySQL 3.0 Data 
Lakehouse.
+        :param rg_name: The name of resource group in AnalyticDB MySQL 3.0 
Data Lakehouse cluster.
+        """
+        self.log.info("Submitting Spark SQL")
+            request = SubmitSparkAppRequest(
+                dbcluster_id=cluster_id,
+                resource_group_name=rg_name,
+                data=self.build_submit_sql_data(*args, **kwargs),
+                app_type="SQL"
+            )
+        try:
+            return self.get_adb_spark_client().submit_spark_app(request)
+        except Exception as e:
+            self.log.error(e)
+            raise AirflowException("Errors when submit spark sql") from e
+
+    def get_spark_state(self, app_id: str) -> str:
+        """
+        Fetch the state of the specified spark application
+
+        :param app_id: identifier of the spark application
+
+        :return: application state
+        """
+        self.log.debug("Fetching state for spark application %s", app_id)
+        try:
+            return self.get_adb_spark_client().get_spark_app_state(
+                GetSparkAppStateRequest(app_id=app_id)
+            ).body.data.state
+        except Exception as e:
+            self.log.error(e)
+            raise AirflowException(f"Errors when fetching state for spark 
application: {app_id}") from e
+
+    def get_spark_web_ui_address(self, app_id: str) -> str:
+        """
+        Fetch the web ui address of the specified spark application
+
+        :param app_id: identifier of the spark application
+
+        :return: web ui address for application

Review Comment:
   ```suggestion
   ```



##########
airflow/providers/alibaba/cloud/hooks/analyticdb_spark.py:
##########
@@ -0,0 +1,377 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import json
+from enum import Enum
+from typing import Any, Sequence
+
+from airflow.exceptions import AirflowException
+from airflow.hooks.base import BaseHook
+from airflow.utils.log.logging_mixin import LoggingMixin
+
+from alibabacloud_adb20211201.client import Client
+from alibabacloud_adb20211201.models import SubmitSparkAppRequest, 
SubmitSparkAppResponse, GetSparkAppStateRequest, \
+    GetSparkAppLogRequest, KillSparkAppRequest, GetSparkAppWebUiAddressRequest
+from alibabacloud_tea_openapi.models import Config
+
+
+class AppState(Enum):
+    """AnalyticDB Spark application states"""
+
+    SUBMITTED = "SUBMITTED"
+    STARTING = "STARTING"
+    RUNNING = "RUNNING"
+    FAILING = "FAILING"
+    FAILED = "FAILED"
+    KILLING = "KILLING"
+    KILLED = "KILLED"
+    SUCCEEDING = "SUCCEEDING"
+    COMPLETED = "COMPLETED"
+    FATAL = "FATAL"
+    UNKNOWN = "UNKNOWN"
+
+
+class AnalyticDBSparkHook(BaseHook, LoggingMixin):
+    """
+    Hook for AnalyticDB MySQL Spark through the REST API.
+
+    :param adb_spark_conn_id: The Airflow connection used for AnalyticDB MySQL 
Spark credentials.
+    :param region: AnalyticDB MySQL region you want to submit spark 
application.
+    """
+
+    TERMINAL_STATES = {
+        AppState.COMPLETED,
+        AppState.FAILED,
+        AppState.FATAL,
+        AppState.KILLED
+    }
+
+    conn_name_attr = "alibabacloud_conn_id"
+    default_conn_name = "adb_spark_default"
+    conn_type = "adb_spark"
+    hook_name = "AnalyticDB Spark"
+
+    def __init__(
+        self,
+        adb_spark_conn_id: str = "adb_spark_default",
+        region: str | None = None,
+        *args,
+        **kwargs
+    ) -> None:
+        self.adb_spark_conn_id = adb_spark_conn_id
+        self.adb_spark_conn = self.get_connection(adb_spark_conn_id)
+        self.region = self.get_default_region() if region is None else region
+        super().__init__(*args, **kwargs)
+
+    def submit_spark_app(
+        self,
+        cluster_id: str,
+        rg_name: str,
+        *args: Any,
+        **kwargs: Any
+    ) -> SubmitSparkAppResponse:
+        """
+        Perform request to submit spark application
+
+        :param cluster_id: The cluster ID of AnalyticDB MySQL 3.0 Data 
Lakehouse.
+        :param rg_name: The name of resource group in AnalyticDB MySQL 3.0 
Data Lakehouse cluster.
+        """
+        self.log.info("Submitting application")
+        try:
+            request = SubmitSparkAppRequest(
+                dbcluster_id=cluster_id,
+                resource_group_name=rg_name,
+                data=json.dumps(self.build_submit_app_data(*args, **kwargs)),
+                app_type="BATCH"
+            )
+            return self.get_adb_spark_client().submit_spark_app(request)
+        except Exception as e:
+            self.log.error(e)
+            raise AirflowException("Errors when submit spark application") 
from e
+
+    def submit_spark_sql(
+        self,
+        cluster_id: str,
+        rg_name: str,
+        *args: Any,
+        **kwargs: Any
+    ) -> SubmitSparkAppResponse:
+        """
+        Perform request to submit spark sql
+
+        :param cluster_id: The cluster ID of AnalyticDB MySQL 3.0 Data 
Lakehouse.
+        :param rg_name: The name of resource group in AnalyticDB MySQL 3.0 
Data Lakehouse cluster.
+        """
+        self.log.info("Submitting Spark SQL")
+            request = SubmitSparkAppRequest(
+                dbcluster_id=cluster_id,
+                resource_group_name=rg_name,
+                data=self.build_submit_sql_data(*args, **kwargs),
+                app_type="SQL"
+            )
+        try:
+            return self.get_adb_spark_client().submit_spark_app(request)
+        except Exception as e:
+            self.log.error(e)
+            raise AirflowException("Errors when submit spark sql") from e
+
+    def get_spark_state(self, app_id: str) -> str:
+        """
+        Fetch the state of the specified spark application
+
+        :param app_id: identifier of the spark application
+
+        :return: application state

Review Comment:
   ```suggestion
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [airflow] phanikumv commented on a diff in pull request #31787: Add Alibaba Cloud AnalyticDB Spark Support

Reply via email to