pankajkoti commented on code in PR #39130:
URL: https://github.com/apache/airflow/pull/39130#discussion_r1577878165


##########
airflow/providers/google/cloud/triggers/dataproc.py:
##########
@@ -140,24 +153,75 @@ def serialize(self) -> tuple[str, dict[str, Any]]:
                 "gcp_conn_id": self.gcp_conn_id,
                 "impersonation_chain": self.impersonation_chain,
                 "polling_interval_seconds": self.polling_interval_seconds,
+                "delete_on_error": self.delete_on_error,
             },
         )
 
     async def run(self) -> AsyncIterator[TriggerEvent]:
-        while True:
-            cluster = await self.get_async_hook().get_cluster(
-                project_id=self.project_id, region=self.region, 
cluster_name=self.cluster_name
+        """Run the trigger."""

Review Comment:
   ```suggestion
   ```



##########
airflow/providers/google/cloud/triggers/dataproc.py:
##########
@@ -140,24 +153,75 @@ def serialize(self) -> tuple[str, dict[str, Any]]:
                 "gcp_conn_id": self.gcp_conn_id,
                 "impersonation_chain": self.impersonation_chain,
                 "polling_interval_seconds": self.polling_interval_seconds,
+                "delete_on_error": self.delete_on_error,
             },
         )
 
     async def run(self) -> AsyncIterator[TriggerEvent]:
-        while True:
-            cluster = await self.get_async_hook().get_cluster(
-                project_id=self.project_id, region=self.region, 
cluster_name=self.cluster_name
+        """Run the trigger."""
+        try:
+            while True:
+                cluster = await self.fetch_cluster()
+                state = cluster.status.state
+                if state == ClusterStatus.State.ERROR:
+                    await self.delete_when_error_occurred(cluster)
+                    yield TriggerEvent(
+                        {
+                            "cluster_name": self.cluster_name,
+                            "cluster_state": state.ERROR,

Review Comment:
   since `state = ClusterStatus.State.ERROR` after entering the if block, I 
don't think `state.ERROR` which would translate to 
`ClusterStatus.State.ERROR.ERROR` or `ClusterStatus.State.ERROR.DELETING` is 
the right usage. Can you check the type of `state` variable and if 
`state.DELETING` or `state.ERROR` is available on that type? 
   
   I think it could be, but suggest to check the type and ensure that we've the 
right usage and right trigger event value yielded in the worker.
   ```suggestion
                               "cluster_state": ClusterStatus.State.DELETING,
   ```



##########
airflow/providers/google/cloud/triggers/dataproc.py:
##########
@@ -140,24 +153,75 @@ def serialize(self) -> tuple[str, dict[str, Any]]:
                 "gcp_conn_id": self.gcp_conn_id,
                 "impersonation_chain": self.impersonation_chain,
                 "polling_interval_seconds": self.polling_interval_seconds,
+                "delete_on_error": self.delete_on_error,
             },
         )
 
     async def run(self) -> AsyncIterator[TriggerEvent]:
-        while True:
-            cluster = await self.get_async_hook().get_cluster(
-                project_id=self.project_id, region=self.region, 
cluster_name=self.cluster_name
+        """Run the trigger."""

Review Comment:
   I don't think that docstring adds much value unless we need it for some 
pre-commit. WDYT?



##########
tests/providers/google/cloud/triggers/test_dataproc.py:
##########
@@ -215,9 +228,48 @@ async def test_cluster_run_loop_is_still_running(
         await asyncio.sleep(0.5)
 
         assert not task.done()
-        assert f"Current state is: {ClusterStatus.State.CREATING}"
+        assert f"Current state is: {ClusterStatus.State.CREATING}."
         assert f"Sleeping for {TEST_POLL_INTERVAL} seconds."
 
+    @pytest.mark.asyncio
+    
@mock.patch("airflow.providers.google.cloud.hooks.dataproc.DataprocAsyncHook.get_cluster")
+    async def test_fetch_cluster_status(self, mock_get_cluster, 
cluster_trigger, async_get_cluster):
+        mock_get_cluster.return_value = async_get_cluster(
+            status=ClusterStatus(state=ClusterStatus.State.RUNNING)
+        )
+        cluster = await cluster_trigger.fetch_cluster()
+
+        assert cluster.status.state == ClusterStatus.State.RUNNING, "The 
cluster state should be RUNNING"

Review Comment:
   What does  "The cluster state should be RUNNING" mean here in the assert? 



##########
tests/providers/google/cloud/triggers/test_dataproc.py:
##########
@@ -215,9 +228,48 @@ async def test_cluster_run_loop_is_still_running(
         await asyncio.sleep(0.5)
 
         assert not task.done()
-        assert f"Current state is: {ClusterStatus.State.CREATING}"
+        assert f"Current state is: {ClusterStatus.State.CREATING}."
         assert f"Sleeping for {TEST_POLL_INTERVAL} seconds."
 
+    @pytest.mark.asyncio
+    
@mock.patch("airflow.providers.google.cloud.hooks.dataproc.DataprocAsyncHook.get_cluster")
+    async def test_fetch_cluster_status(self, mock_get_cluster, 
cluster_trigger, async_get_cluster):
+        mock_get_cluster.return_value = async_get_cluster(
+            status=ClusterStatus(state=ClusterStatus.State.RUNNING)
+        )
+        cluster = await cluster_trigger.fetch_cluster()
+
+        assert cluster.status.state == ClusterStatus.State.RUNNING, "The 
cluster state should be RUNNING"
+
+    @pytest.mark.asyncio
+    
@mock.patch("airflow.providers.google.cloud.hooks.dataproc.DataprocAsyncHook.delete_cluster")
+    async def test_delete_when_error_occurred(self, mock_delete_cluster, 
cluster_trigger):
+        mock_cluster = mock.MagicMock(spec=Cluster)
+        type(mock_cluster).status = mock.PropertyMock(
+            return_value=mock.MagicMock(state=ClusterStatus.State.ERROR)
+        )
+
+        mock_delete_future = asyncio.Future()
+        mock_delete_future.set_result(None)
+        mock_delete_cluster.return_value = mock_delete_future
+
+        cluster_trigger.delete_on_error = True
+
+        await cluster_trigger.delete_when_error_occurred(mock_cluster)
+
+        mock_delete_cluster.assert_called_once_with(
+            region=cluster_trigger.region,
+            cluster_name=cluster_trigger.cluster_name,
+            project_id=cluster_trigger.project_id,
+        )
+
+        mock_delete_cluster.reset_mock()
+        cluster_trigger.delete_on_error = False
+
+        await cluster_trigger.delete_when_error_occurred(mock_cluster)
+
+        mock_delete_cluster.assert_not_called()
+

Review Comment:
   Can we also add a test case for the CancelledError like mentioned earlier?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to