potiuk commented on a change in pull request #7791: Add ability to specify a 
maximum modified time for objects in GCSToGCSOperator
URL: https://github.com/apache/airflow/pull/7791#discussion_r396090850
 
 

 ##########
 File path: airflow/providers/google/cloud/hooks/gcs.py
 ##########
 @@ -307,6 +307,79 @@ def is_updated_after(self, bucket_name, object_name, ts):
 
         return False
 
+    def is_updated_between(self, bucket_name, object_name, min_ts, max_ts):
+        """
+        Checks if an blob_name is updated in Google Cloud Storage.
+
+        :param bucket_name: The Google Cloud Storage bucket where the object 
is.
+        :type bucket_name: str
+        :param object_name: The name of the object to check in the Google cloud
+                storage bucket.
+        :type object_name: str
+        :param min_ts: The minimum timestamp to check against.
+        :type min_ts: datetime.datetime
+        :param max_ts: The maximum timestamp to check against.
+        :type max_ts: datetime.datetime
+        """
+        client = self.get_conn()
+        bucket = client.bucket(bucket_name)
+        blob = bucket.get_blob(blob_name=object_name)
+
+        if blob is None:
+            raise ValueError("Object ({}) not found in Bucket ({})".format(
+                object_name, bucket_name))
+
+        blob_update_time = blob.updated
+
+        if blob_update_time is not None:
+            import dateutil.tz
+
+            if not min_ts.tzinfo:
+                min_ts = min_ts.replace(tzinfo=dateutil.tz.tzutc())
+            if not max_ts.tzinfo:
+                max_ts = max_ts.replace(tzinfo=dateutil.tz.tzutc())
+
+            self.log.info("Verify object date: %s is between %s and %s", 
blob_update_time, min_ts, max_ts)
+
+            if min_ts < blob_update_time < max_ts:
+                return True
+        return False
+
+    def is_updated_before(self, bucket_name, object_name, ts):
+        """
+        Checks if an blob_name is updated before given time in Google Cloud 
Storage.
+
+        :param bucket_name: The Google Cloud Storage bucket where the object 
is.
+        :type bucket_name: str
+        :param object_name: The name of the object to check in the Google cloud
+            storage bucket.
+        :type object_name: str
+        :param ts: The timestamp to check against.
+        :type ts: datetime.datetime
+        """
+        client = self.get_conn()
+        bucket = client.bucket(bucket_name)
+        blob = bucket.get_blob(blob_name=object_name)
+
+        if blob is None:
+            raise ValueError("Object ({}) not found in Bucket ({})".format(
 
 Review comment:
   BTW. It was really from the top of my head. You'd have to use 
functool.partial to do it in the way I described 
https://stackoverflow.com/questions/15331726/how-does-functools-partial-do-what-it-does/15331841

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to