uranusjr commented on code in PR #60164:
URL: https://github.com/apache/airflow/pull/60164#discussion_r2674408518


##########
providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py:
##########
@@ -1755,11 +1767,33 @@ def _sync_to_local_dir_if_changed(self, s3_bucket, 
s3_object, local_target_path:
                 download_msg = (
                     f"S3 object size ({s3_object.size}) and local file size 
({local_stats.st_size}) differ."
                 )
-
-            s3_last_modified = s3_object.last_modified
-            if local_stats.st_mtime < s3_last_modified.microsecond:
-                should_download = True
-                download_msg = f"S3 object last modified 
({s3_last_modified.microsecond}) and local file last modified 
({local_stats.st_mtime}) differ."
+            else:
+                s3_etag = s3_object.e_tag
+                if s3_etag:
+                    if "-" not in s3_etag:
+                        local_md5 = 
self._compute_local_file_md5(local_target_path)
+                        if local_md5 != s3_etag:
+                            should_download = True
+                            download_msg = (
+                                f"S3 object ETag ({s3_etag}) and local file 
MD5 ({local_md5}) differ "
+                                f"(content changed while size remained the 
same)."
+                            )
+                    else:
+                        s3_last_modified = s3_object.last_modified
+                        if s3_last_modified and local_stats.st_mtime < 
s3_last_modified.timestamp():
+                            should_download = True
+                            download_msg = (
+                                f"S3 object last modified ({s3_last_modified}) 
is newer than "
+                                f"local file last modified 
({datetime.fromtimestamp(local_stats.st_mtime)})."
+                            )
+                else:
+                    s3_last_modified = s3_object.last_modified
+                    if s3_last_modified and local_stats.st_mtime < 
s3_last_modified.timestamp():
+                        should_download = True
+                        download_msg = (
+                            f"S3 object last modified ({s3_last_modified}) is 
newer than "
+                            f"local file last modified 
({datetime.fromtimestamp(local_stats.st_mtime)})."
+                        )

Review Comment:
   You can probably reduce some duplicate code if this is extracted into a 
separate function with early returns.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to