This is an automated email from the ASF dual-hosted git repository.
vincbeck pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 78dccfdbceb fix(amazon): flush file buffer in S3Hook.download_file()
before returning path (#62078)
78dccfdbceb is described below
commit 78dccfdbceb3e50394225a5f6de7c1833bd08eb6
Author: Filippo Scotti <[email protected]>
AuthorDate: Tue Feb 17 21:26:49 2026 +0100
fix(amazon): flush file buffer in S3Hook.download_file() before returning
path (#62078)
* fix(amazon): flush file buffer in S3Hook.download_file() before returning
path
S3Hook.download_file() writes S3 object content to a file via
download_fileobj() but never flushes the write buffer before returning
the file path. When the caller immediately opens the returned path,
the file may contain 0 bytes because the data is still in Python's
write buffer.
This particularly affects small files (< ~8KB) that fit entirely in
the buffer, and was exposed by apache-airflow-providers-common-compat
1.13.1 which changed execution timing of get_hook_lineage_collector().
See also: https://github.com/boto/boto3/issues/1304
* fix(amazon): use file-like mock in S3Hook.download_file() tests
The tests for download_file() mocked NamedTemporaryFile to return a
PosixPath instead of a file-like object. PosixPath lacks flush() and
its .name property returns just the filename, not the full path like
a real NamedTemporaryFile. Use the default MagicMock return value
which properly supports file-like operations.
---------
Co-authored-by: Filippo Scotti <[email protected]>
---
.../amazon/src/airflow/providers/amazon/aws/hooks/s3.py | 1 +
providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py | 12 ++++++------
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
b/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
index 24e0e012e6d..d71a8383e8a 100644
--- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
+++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
@@ -1616,6 +1616,7 @@ class S3Hook(AwsBaseHook):
ExtraArgs=extra_args,
Config=self.transfer_config,
)
+ file.flush()
get_hook_lineage_collector().add_input_asset(
context=self, scheme="s3", asset_kwargs={"bucket": bucket_name,
"key": key}
)
diff --git a/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
b/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
index 74c8eb3ab3b..f84f7b8ba0a 100644
--- a/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
+++ b/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
@@ -1449,8 +1449,8 @@ class TestAwsS3Hook:
@mock.patch("airflow.providers.amazon.aws.hooks.s3.NamedTemporaryFile")
def test_download_file(self, mock_temp_file, tmp_path):
- path = tmp_path / "airflow_tmp_test_s3_hook"
- mock_temp_file.return_value = path
+ mock_file = mock_temp_file.return_value
+ mock_file.name = str(tmp_path / "airflow_tmp_test_s3_hook")
s3_hook = S3Hook(aws_conn_id="s3_test", requester_pays=True)
s3_hook.check_for_key = Mock(return_value=True)
s3_obj = Mock()
@@ -1463,17 +1463,17 @@ class TestAwsS3Hook:
s3_hook.get_key.assert_called_once_with(key, bucket)
s3_obj.download_fileobj.assert_called_once_with(
- path,
+ mock_file,
Config=s3_hook.transfer_config,
ExtraArgs={"RequestPayer": "requester"},
)
- assert path.name == output_file
+ assert mock_file.name == output_file
@mock.patch("airflow.providers.amazon.aws.hooks.s3.NamedTemporaryFile")
def test_download_file_exposes_lineage(self, mock_temp_file, tmp_path,
hook_lineage_collector):
- path = tmp_path / "airflow_tmp_test_s3_hook"
- mock_temp_file.return_value = path
+ mock_file = mock_temp_file.return_value
+ mock_file.name = str(tmp_path / "airflow_tmp_test_s3_hook")
s3_hook = S3Hook(aws_conn_id="s3_test")
s3_hook.check_for_key = Mock(return_value=True)
s3_obj = Mock()