turbaszek commented on a change in pull request #9593:
URL: https://github.com/apache/airflow/pull/9593#discussion_r449415934
##########
File path: airflow/providers/google/cloud/operators/dataproc.py
##########
@@ -502,32 +506,79 @@ def __init__(self,
self.timeout = timeout
self.metadata = metadata
self.gcp_conn_id = gcp_conn_id
+ self.delete_on_error = delete_on_error
+
+ def _create_cluster(self, hook):
+ operation = hook.create_cluster(
+ project_id=self.project_id,
+ region=self.region,
+ cluster=self.cluster,
+ request_id=self.request_id,
+ retry=self.retry,
+ timeout=self.timeout,
+ metadata=self.metadata,
+ )
+ cluster = operation.result()
+ self.log.info("Cluster created.")
+ return cluster
+
+ def _delete_cluster(self, hook):
+ self.log.info("Deleting the cluster")
+ hook.delete_cluster(
+ region=self.region,
+ cluster_name=self.cluster_name,
+ project_id=self.project_id,
+ )
+ self.log.info("Cluster %s deleted", self.cluster_name)
+
+ def _get_cluster(self, hook):
+ return hook.get_cluster(
+ project_id=self.project_id,
+ region=self.region,
+ cluster_name=self.cluster_name,
+ retry=self.retry,
+ timeout=self.timeout,
+ metadata=self.metadata,
+ )
+
+ def _handle_error_state(self, hook):
+ self.log.info("Cluster is in ERROR state")
+ gcs_uri = hook.diagnose_cluster(
+ region=self.region,
+ cluster_name=self.cluster_name,
+ project_id=self.project_id,
+ )
+ self.log.info(
+ 'Diagnostic information for cluster %s available at: %s',
+ self.cluster_name, gcs_uri
+ )
+ if self.delete_on_error:
+ self._delete_cluster(hook)
def execute(self, context):
self.log.info('Creating cluster: %s', self.cluster_name)
hook = DataprocHook(gcp_conn_id=self.gcp_conn_id)
try:
- operation = hook.create_cluster(
- project_id=self.project_id,
- region=self.region,
- cluster=self.cluster,
- request_id=self.request_id,
- retry=self.retry,
- timeout=self.timeout,
- metadata=self.metadata,
- )
- cluster = operation.result()
- self.log.info("Cluster created.")
+ cluster = self._create_cluster(hook)
except AlreadyExists:
- cluster = hook.get_cluster(
- project_id=self.project_id,
- region=self.region,
- cluster_name=self.cluster_name,
- retry=self.retry,
- timeout=self.timeout,
- metadata=self.metadata,
- )
self.log.info("Cluster already exists.")
+ cluster = self._get_cluster(hook)
+
+ if cluster.status.state == cluster.status.ERROR:
+ self._handle_error_state(hook)
+ elif cluster.status.state == cluster.status.DELETING:
+ # Wait for cluster to delete
+ for time_to_sleep in exponential_sleep_generator(initial=10,
maximum=120):
Review comment:
I've added 5m timeout, @dossett do you think it should be ok in most
cases?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]