potiuk commented on a change in pull request #7791: Add ability to specify a 
maximum modified time for objects in GCSToGCSOperator
URL: https://github.com/apache/airflow/pull/7791#discussion_r396065490
 
 

 ##########
 File path: airflow/providers/google/cloud/hooks/gcs.py
 ##########
 @@ -307,6 +307,79 @@ def is_updated_after(self, bucket_name, object_name, ts):
 
         return False
 
+    def is_updated_between(self, bucket_name, object_name, min_ts, max_ts):
+        """
+        Checks if an blob_name is updated in Google Cloud Storage.
+
+        :param bucket_name: The Google Cloud Storage bucket where the object 
is.
+        :type bucket_name: str
+        :param object_name: The name of the object to check in the Google cloud
+                storage bucket.
+        :type object_name: str
+        :param min_ts: The minimum timestamp to check against.
+        :type min_ts: datetime.datetime
+        :param max_ts: The maximum timestamp to check against.
+        :type max_ts: datetime.datetime
+        """
+        client = self.get_conn()
+        bucket = client.bucket(bucket_name)
+        blob = bucket.get_blob(blob_name=object_name)
+
+        if blob is None:
+            raise ValueError("Object ({}) not found in Bucket ({})".format(
+                object_name, bucket_name))
+
+        blob_update_time = blob.updated
+
+        if blob_update_time is not None:
+            import dateutil.tz
+
+            if not min_ts.tzinfo:
+                min_ts = min_ts.replace(tzinfo=dateutil.tz.tzutc())
+            if not max_ts.tzinfo:
+                max_ts = max_ts.replace(tzinfo=dateutil.tz.tzutc())
+
+            self.log.info("Verify object date: %s is between %s and %s", 
blob_update_time, min_ts, max_ts)
+
+            if min_ts < blob_update_time < max_ts:
+                return True
+        return False
+
+    def is_updated_before(self, bucket_name, object_name, ts):
+        """
+        Checks if an blob_name is updated before given time in Google Cloud 
Storage.
+
+        :param bucket_name: The Google Cloud Storage bucket where the object 
is.
+        :type bucket_name: str
+        :param object_name: The name of the object to check in the Google cloud
+            storage bucket.
+        :type object_name: str
+        :param ts: The timestamp to check against.
+        :type ts: datetime.datetime
+        """
+        client = self.get_conn()
+        bucket = client.bucket(bucket_name)
+        blob = bucket.get_blob(blob_name=object_name)
+
+        if blob is None:
+            raise ValueError("Object ({}) not found in Bucket ({})".format(
 
 Review comment:
   No worries - that was just a suggestion -> you do not need to implement it. 
   
   I thought that there is quite a lot of common code that could be extracted. 
I thought that we could do something like below pattern (I have not compiled 
the code - just tried to show you the idea):
   ```
   def verify_the_object(self, bucket_name, object_name, verify_method):
              client = self.get_conn()
              bucket = client.bucket(bucket_name)
              blob = bucket.get_blob(blob_name=object_name)
              if blob is None:
                   raise ValueError("Object ({}) not found in Bucket 
({})".format(object_name, bucket_name))
             return verify_method(blob)
   ```
   then is_updated_between could look like
   ```
   def is_updated_between(self, bucket_name, object_name, min_ts, max_ts):
       def _update_between(blob):
            blob_update_time = blob.updated
           if blob_update_time is not None:
               import dateutil.tz
               if not min_ts.tzinfo:
                   min_ts = min_ts.replace(tzinfo=dateutil.tz.tzutc())
               if not max_ts.tzinfo:
                   max_ts = max_ts.replace(tzinfo=dateutil.tz.tzutc())
               self.log.info("Verify object date: %s is between %s and %s", 
blob_update_time, min_ts, max_ts)
               if min_ts < blob_update_time < max_ts:
                   return True
           return False
       return self.verify_the_object(bucket_name, object_name, _update_between)
   ```
   
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to