VietDucFCB commented on code in PR #60164:
URL: https://github.com/apache/airflow/pull/60164#discussion_r2674599323
##########
providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py:
##########
@@ -1755,11 +1767,33 @@ def _sync_to_local_dir_if_changed(self, s3_bucket,
s3_object, local_target_path:
download_msg = (
f"S3 object size ({s3_object.size}) and local file size
({local_stats.st_size}) differ."
)
-
- s3_last_modified = s3_object.last_modified
- if local_stats.st_mtime < s3_last_modified.microsecond:
- should_download = True
- download_msg = f"S3 object last modified
({s3_last_modified.microsecond}) and local file last modified
({local_stats.st_mtime}) differ."
+ else:
+ s3_etag = s3_object.e_tag
+ if s3_etag:
+ if "-" not in s3_etag:
+ local_md5 =
self._compute_local_file_md5(local_target_path)
+ if local_md5 != s3_etag:
+ should_download = True
+ download_msg = (
+ f"S3 object ETag ({s3_etag}) and local file
MD5 ({local_md5}) differ "
+ f"(content changed while size remained the
same)."
+ )
+ else:
+ s3_last_modified = s3_object.last_modified
+ if s3_last_modified and local_stats.st_mtime <
s3_last_modified.timestamp():
+ should_download = True
+ download_msg = (
+ f"S3 object last modified ({s3_last_modified})
is newer than "
+ f"local file last modified
({datetime.fromtimestamp(local_stats.st_mtime)})."
+ )
+ else:
+ s3_last_modified = s3_object.last_modified
+ if s3_last_modified and local_stats.st_mtime <
s3_last_modified.timestamp():
+ should_download = True
+ download_msg = (
+ f"S3 object last modified ({s3_last_modified}) is
newer than "
+ f"local file last modified
({datetime.fromtimestamp(local_stats.st_mtime)})."
+ )
Review Comment:
Thanks for the suggestion, Ive extracted the duplicated logic into a helper
function and refactored with early returns, updated in the latest commit.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]