dstandish commented on code in PR #30618:
URL: https://github.com/apache/airflow/pull/30618#discussion_r1178249248


##########
airflow/providers/google/cloud/sensors/gcs.py:
##########
@@ -307,10 +314,35 @@ def poke(self, context: Context) -> bool:
         self._matches = hook.list(self.bucket, prefix=self.prefix)
         return bool(self._matches)
 
-    def execute(self, context: Context) -> list[str]:
+    def execute(self, context: Context):
         """Overridden to allow matches to be passed"""
-        super().execute(context)
-        return self._matches
+        if not self.deferrable:
+            super().execute(context)
+            return self._matches
+        else:
+            self.defer(
+                timeout=timedelta(seconds=self.timeout),
+                trigger=GCSPrefixBlobTrigger(
+                    bucket=self.bucket,
+                    prefix=self.prefix,
+                    poke_interval=self.poke_interval,
+                    google_cloud_conn_id=self.google_cloud_conn_id,
+                    hook_params={
+                        "impersonation_chain": self.impersonation_chain,
+                    },
+                ),
+                method_name="execute_complete",
+            )
+
+    def execute_complete(self, context: dict[str, Any], event: dict[str, str | 
list[str]]) -> str | list[str]:
+        """
+        Callback for when the trigger fires; returns immediately.
+        Relies on trigger to throw a success event
+        """
+        self.log.info("Checking for existence of object: %s, %s", self.bucket, 
self.prefix)

Review Comment:
   ```suggestion
           self.log.info("Checking for existence of object: %s, %s", 
self.bucket, self.prefix)
   ```
   sorry to nit pick this but here you are not checking -- the checking has 
already happened (in the trigger) and you're  resuming from trigger and 
checking status



##########
airflow/providers/google/cloud/triggers/gcs.py:
##########
@@ -200,3 +200,76 @@ async def _is_blob_updated_after(
                 if blob_updated_time > target_date:
                     return True, {"status": "success", "message": "success"}
             return False, {"status": "pending", "message": "pending"}
+
+
+class GCSPrefixBlobTrigger(GCSBlobTrigger):
+    """
+    Looks for objects in bucket matching a prefix.
+    If none found, sleep for interval and check again. Otherwise, return 
matches.
+
+    :param bucket: the bucket in the google cloud storage where the objects 
are residing.
+    :param prefix: The prefix of the blob_names to match in the Google cloud 
storage bucket
+    :param google_cloud_conn_id: reference to the Google Connection
+    :param poke_interval: polling period in seconds to check
+    """
+
+    def __init__(
+        self,
+        bucket: str,
+        prefix: str,
+        poke_interval: float,
+        google_cloud_conn_id: str,
+        hook_params: dict[str, Any],
+    ):
+        super().__init__(
+            bucket=bucket,
+            object_name=prefix,
+            poke_interval=poke_interval,
+            google_cloud_conn_id=google_cloud_conn_id,
+            hook_params=hook_params,
+        )
+        self.prefix = prefix
+
+    def serialize(self) -> tuple[str, dict[str, Any]]:
+        """Serializes GCSPrefixBlobTrigger arguments and classpath."""
+        return (
+            "airflow.providers.google.cloud.triggers.gcs.GCSPrefixBlobTrigger",
+            {
+                "bucket": self.bucket,
+                "prefix": self.prefix,
+                "poke_interval": self.poke_interval,
+                "google_cloud_conn_id": self.google_cloud_conn_id,
+                "hook_params": self.hook_params,
+            },
+        )
+
+    async def run(self) -> AsyncIterator[TriggerEvent]:
+        """Simple loop until the matches are found for the given prefix on the 
bucket."""
+        try:
+            hook = self._get_async_hook()
+            while True:
+                res = await self._list_blobs_with_prefix(
+                    hook=hook, bucket_name=self.bucket, prefix=self.prefix
+                )
+                if len(res) > 0:
+                    yield TriggerEvent(
+                        {"status": "success", "message": "Successfully 
completed", "matches": res}
+                    )
+                await asyncio.sleep(self.poke_interval)
+        except Exception as e:
+            yield TriggerEvent({"status": "error", "message": str(e)})
+            return
+
+    async def _list_blobs_with_prefix(self, hook: GCSAsyncHook, bucket_name: 
str, prefix: str) -> list[str]:
+        """
+        Returns names of blobs which match the given prefix for a given bucket.
+
+        :param bucket_name: The Google Cloud Storage bucket where the object 
is.

Review Comment:
   docstring missing param `hook`



##########
airflow/providers/google/cloud/triggers/gcs.py:
##########
@@ -200,3 +200,76 @@ async def _is_blob_updated_after(
                 if blob_updated_time > target_date:
                     return True, {"status": "success", "message": "success"}
             return False, {"status": "pending", "message": "pending"}
+
+
+class GCSPrefixBlobTrigger(GCSBlobTrigger):
+    """
+    Looks for objects in bucket matching a prefix.
+    If none found, sleep for interval and check again. Otherwise, return 
matches.
+
+    :param bucket: the bucket in the google cloud storage where the objects 
are residing.
+    :param prefix: The prefix of the blob_names to match in the Google cloud 
storage bucket
+    :param google_cloud_conn_id: reference to the Google Connection
+    :param poke_interval: polling period in seconds to check
+    """
+
+    def __init__(
+        self,
+        bucket: str,
+        prefix: str,
+        poke_interval: float,

Review Comment:
   missing from docstring



##########
airflow/providers/google/cloud/triggers/gcs.py:
##########
@@ -200,3 +200,76 @@ async def _is_blob_updated_after(
                 if blob_updated_time > target_date:
                     return True, {"status": "success", "message": "success"}
             return False, {"status": "pending", "message": "pending"}
+
+
+class GCSPrefixBlobTrigger(GCSBlobTrigger):
+    """
+    Looks for objects in bucket matching a prefix.
+    If none found, sleep for interval and check again. Otherwise, return 
matches.
+
+    :param bucket: the bucket in the google cloud storage where the objects 
are residing.
+    :param prefix: The prefix of the blob_names to match in the Google cloud 
storage bucket
+    :param google_cloud_conn_id: reference to the Google Connection
+    :param poke_interval: polling period in seconds to check
+    """
+
+    def __init__(
+        self,
+        bucket: str,
+        prefix: str,
+        poke_interval: float,
+        google_cloud_conn_id: str,
+        hook_params: dict[str, Any],
+    ):
+        super().__init__(
+            bucket=bucket,
+            object_name=prefix,
+            poke_interval=poke_interval,
+            google_cloud_conn_id=google_cloud_conn_id,
+            hook_params=hook_params,
+        )
+        self.prefix = prefix
+
+    def serialize(self) -> tuple[str, dict[str, Any]]:
+        """Serializes GCSPrefixBlobTrigger arguments and classpath."""
+        return (
+            "airflow.providers.google.cloud.triggers.gcs.GCSPrefixBlobTrigger",
+            {
+                "bucket": self.bucket,
+                "prefix": self.prefix,
+                "poke_interval": self.poke_interval,
+                "google_cloud_conn_id": self.google_cloud_conn_id,
+                "hook_params": self.hook_params,
+            },
+        )
+
+    async def run(self) -> AsyncIterator[TriggerEvent]:
+        """Simple loop until the matches are found for the given prefix on the 
bucket."""
+        try:
+            hook = self._get_async_hook()
+            while True:
+                res = await self._list_blobs_with_prefix(

Review Comment:
   post-2.6 with logging in triggerer,  we can now consider logging every now 
and again messages like "checking for files".  nice to know from logs that the 
trigger is still doing something.  not saying you must do in this PR just a 
reminder that it's something that makes more sense now with 2.6 than it did 
previously.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to