potiuk commented on a change in pull request #7791: Add ability to specify a
maximum modified time for objects in GCSToGCSOperator
URL: https://github.com/apache/airflow/pull/7791#discussion_r395970053
##########
File path: airflow/providers/google/cloud/hooks/gcs.py
##########
@@ -307,6 +307,79 @@ def is_updated_after(self, bucket_name, object_name, ts):
return False
+ def is_updated_between(self, bucket_name, object_name, min_ts, max_ts):
+ """
+ Checks if an blob_name is updated in Google Cloud Storage.
+
+ :param bucket_name: The Google Cloud Storage bucket where the object
is.
+ :type bucket_name: str
+ :param object_name: The name of the object to check in the Google cloud
+ storage bucket.
+ :type object_name: str
+ :param min_ts: The minimum timestamp to check against.
+ :type min_ts: datetime.datetime
+ :param max_ts: The maximum timestamp to check against.
+ :type max_ts: datetime.datetime
+ """
+ client = self.get_conn()
+ bucket = client.bucket(bucket_name)
+ blob = bucket.get_blob(blob_name=object_name)
+
+ if blob is None:
+ raise ValueError("Object ({}) not found in Bucket ({})".format(
+ object_name, bucket_name))
+
+ blob_update_time = blob.updated
+
+ if blob_update_time is not None:
+ import dateutil.tz
+
+ if not min_ts.tzinfo:
+ min_ts = min_ts.replace(tzinfo=dateutil.tz.tzutc())
+ if not max_ts.tzinfo:
+ max_ts = max_ts.replace(tzinfo=dateutil.tz.tzutc())
+
+ self.log.info("Verify object date: %s is between %s and %s",
blob_update_time, min_ts, max_ts)
+
+ if min_ts < blob_update_time < max_ts:
+ return True
+ return False
+
+ def is_updated_before(self, bucket_name, object_name, ts):
Review comment:
Should we add is_updated_after?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services