dstandish commented on code in PR #30618:
URL: https://github.com/apache/airflow/pull/30618#discussion_r1169174297


##########
airflow/providers/google/cloud/sensors/gcs.py:
##########
@@ -307,10 +310,37 @@ def poke(self, context: Context) -> bool:
         self._matches = hook.list(self.bucket, prefix=self.prefix)
         return bool(self._matches)
 
-    def execute(self, context: Context) -> list[str]:
+    def execute(self, context: Context):
         """Overridden to allow matches to be passed"""
-        super().execute(context)
-        return self._matches
+        if not self.deferrable:
+            super().execute(context)
+            return self._matches
+        else:
+            self.defer(
+                timeout=timedelta(seconds=self.timeout),
+                trigger=GCSPrefixBlobTrigger(
+                    bucket=self.bucket,
+                    prefix=self.prefix,
+                    poke_interval=self.poke_interval,
+                    google_cloud_conn_id=self.google_cloud_conn_id,
+                    hook_params={
+                        "delegate_to": self.delegate_to,
+                        "impersonation_chain": self.impersonation_chain,
+                    },
+                ),
+                method_name="execute_complete",
+            )
+
+    def execute_complete(self, context: dict[str, Any], event: dict[str, str | 
list[str]]) -> str | list[str]:
+        """
+        Callback for when the trigger fires - returns immediately.
+        Relies on trigger to throw an exception, otherwise it assumes 
execution was
+        successful.
+        """
+        self.log.info("Sensor checks existence of objects: %s, %s", 
self.bucket, self.prefix)

Review Comment:
   ```suggestion
           self.log.info("Checking for existence of object: %s, %s", 
self.bucket, self.prefix)
   ```
   
   maybe tiny bit better?



##########
airflow/providers/google/cloud/triggers/gcs.py:
##########
@@ -97,3 +97,76 @@ async def _object_exists(self, hook: GCSAsyncHook, 
bucket_name: str, object_name
             if object_response:
                 return "success"
             return "pending"
+
+
+class GCSPrefixBlobTrigger(GCSBlobTrigger):
+    """
+    A trigger that fires and it looks for all the objects in the given bucket
+    which matches the given prefix if not found sleep for certain interval and 
checks again.
+
+    :param bucket: the bucket in the google cloud storage where the objects 
are residing.
+    :param prefix: The prefix of the blob_names to match in the Google cloud 
storage bucket
+    :param google_cloud_conn_id: reference to the Google Connection
+    :param poke_interval: polling period in seconds to check
+    """
+
+    def __init__(
+        self,
+        bucket: str,
+        prefix: str,
+        poke_interval: float,
+        google_cloud_conn_id: str,
+        hook_params: dict[str, Any],
+    ):
+        super().__init__(
+            bucket=bucket,
+            object_name=prefix,
+            poke_interval=poke_interval,
+            google_cloud_conn_id=google_cloud_conn_id,
+            hook_params=hook_params,
+        )
+        self.prefix = prefix
+
+    def serialize(self) -> tuple[str, dict[str, Any]]:
+        """Serializes GCSPrefixBlobTrigger arguments and classpath."""
+        return (
+            "airflow.providers.google.cloud.triggers.gcs.GCSPrefixBlobTrigger",
+            {
+                "bucket": self.bucket,
+                "prefix": self.prefix,
+                "poke_interval": self.poke_interval,
+                "google_cloud_conn_id": self.google_cloud_conn_id,
+                "hook_params": self.hook_params,
+            },
+        )
+
+    async def run(self) -> AsyncIterator["TriggerEvent"]:
+        """Simple loop until the matches are found for the given prefix on the 
bucket."""
+        try:
+            hook = self._get_async_hook()
+            while True:
+                res = await self._list_blobs_with_prefix(
+                    hook=hook, bucket_name=self.bucket, prefix=self.prefix
+                )
+                if len(res) > 0:
+                    yield TriggerEvent(
+                        {"status": "success", "message": "Successfully 
completed", "matches": res}
+                    )
+                await asyncio.sleep(self.poke_interval)
+        except Exception as e:
+            yield TriggerEvent({"status": "error", "message": str(e)})
+            return
+
+    async def _list_blobs_with_prefix(self, hook: GCSAsyncHook, bucket_name: 
str, prefix: str) -> list[str]:
+        """
+        Returns list of blobs which matches the given prefix for the given 
bucket.

Review Comment:
   ```suggestion
           Returns names of blobs which matches the given prefix for the given 
bucket.
   ```



##########
airflow/providers/google/cloud/sensors/gcs.py:
##########
@@ -307,10 +310,37 @@ def poke(self, context: Context) -> bool:
         self._matches = hook.list(self.bucket, prefix=self.prefix)
         return bool(self._matches)
 
-    def execute(self, context: Context) -> list[str]:
+    def execute(self, context: Context):
         """Overridden to allow matches to be passed"""
-        super().execute(context)
-        return self._matches
+        if not self.deferrable:
+            super().execute(context)
+            return self._matches
+        else:
+            self.defer(
+                timeout=timedelta(seconds=self.timeout),
+                trigger=GCSPrefixBlobTrigger(
+                    bucket=self.bucket,
+                    prefix=self.prefix,
+                    poke_interval=self.poke_interval,
+                    google_cloud_conn_id=self.google_cloud_conn_id,
+                    hook_params={
+                        "delegate_to": self.delegate_to,
+                        "impersonation_chain": self.impersonation_chain,
+                    },
+                ),
+                method_name="execute_complete",
+            )
+
+    def execute_complete(self, context: dict[str, Any], event: dict[str, str | 
list[str]]) -> str | list[str]:
+        """
+        Callback for when the trigger fires - returns immediately.

Review Comment:
   ```suggestion
           Callback for when the trigger fires; returns immediately.
   ```
   
   saw same in https://github.com/apache/airflow/pull/30579



##########
airflow/providers/google/cloud/sensors/gcs.py:
##########
@@ -307,10 +310,37 @@ def poke(self, context: Context) -> bool:
         self._matches = hook.list(self.bucket, prefix=self.prefix)
         return bool(self._matches)
 
-    def execute(self, context: Context) -> list[str]:
+    def execute(self, context: Context):
         """Overridden to allow matches to be passed"""
-        super().execute(context)
-        return self._matches
+        if not self.deferrable:
+            super().execute(context)
+            return self._matches
+        else:
+            self.defer(
+                timeout=timedelta(seconds=self.timeout),
+                trigger=GCSPrefixBlobTrigger(
+                    bucket=self.bucket,
+                    prefix=self.prefix,
+                    poke_interval=self.poke_interval,
+                    google_cloud_conn_id=self.google_cloud_conn_id,
+                    hook_params={
+                        "delegate_to": self.delegate_to,
+                        "impersonation_chain": self.impersonation_chain,
+                    },
+                ),
+                method_name="execute_complete",
+            )
+
+    def execute_complete(self, context: dict[str, Any], event: dict[str, str | 
list[str]]) -> str | list[str]:
+        """
+        Callback for when the trigger fires - returns immediately.
+        Relies on trigger to throw an exception, otherwise it assumes 
execution was

Review Comment:
   Is this true anymore?  Looks like it checks now whether success.  Saw same 
in https://github.com/apache/airflow/pull/30579
   
   



##########
docs/apache-airflow-providers-google/operators/cloud/gcs.rst:
##########
@@ -199,8 +199,20 @@ Use the 
:class:`~airflow.providers.google.cloud.sensors.gcs.GCSObjectsWithPrefix
     :start-after: [START howto_sensor_object_with_prefix_exists_task]
     :end-before: [END howto_sensor_object_with_prefix_exists_task]
 
+You can set the ``deferrable`` param to True if you want this sensor to run 
asynchronously - leading to efficient
+utilization of resources in your Airflow deployment. However the triggerer 
component needs to be up and running

Review Comment:
   ```suggestion
   utilization of resources in your Airflow deployment. However the triggerer 
component needs to be enabled
   ```



##########
airflow/providers/google/cloud/triggers/gcs.py:
##########
@@ -97,3 +97,76 @@ async def _object_exists(self, hook: GCSAsyncHook, 
bucket_name: str, object_name
             if object_response:
                 return "success"
             return "pending"
+
+
+class GCSPrefixBlobTrigger(GCSBlobTrigger):
+    """
+    A trigger that fires and it looks for all the objects in the given bucket
+    which matches the given prefix if not found sleep for certain interval and 
checks again.

Review Comment:
   ```suggestion
       Looks for objects in bucket matching the prefix.  
       
       If none found, sleep for interval and check again.  Otherwise, return 
matches.
   ```



##########
docs/apache-airflow-providers-google/operators/cloud/gcs.rst:
##########
@@ -199,8 +199,20 @@ Use the 
:class:`~airflow.providers.google.cloud.sensors.gcs.GCSObjectsWithPrefix
     :start-after: [START howto_sensor_object_with_prefix_exists_task]
     :end-before: [END howto_sensor_object_with_prefix_exists_task]
 
+You can set the ``deferrable`` param to True if you want this sensor to run 
asynchronously - leading to efficient

Review Comment:
   ```suggestion
   You can set the ``deferrable`` param to True if you want this sensor to run 
asynchronously, leading to more efficient
   ```



##########
docs/apache-airflow-providers-google/operators/cloud/gcs.rst:
##########
@@ -199,8 +199,20 @@ Use the 
:class:`~airflow.providers.google.cloud.sensors.gcs.GCSObjectsWithPrefix
     :start-after: [START howto_sensor_object_with_prefix_exists_task]
     :end-before: [END howto_sensor_object_with_prefix_exists_task]
 
+You can set the ``deferrable`` param to True if you want this sensor to run 
asynchronously - leading to efficient
+utilization of resources in your Airflow deployment. However the triggerer 
component needs to be up and running

Review Comment:
   above two comments also apply to https://github.com/apache/airflow/pull/30579



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to