This is an automated email from the ASF dual-hosted git repository.

vincbeck pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 78dccfdbceb fix(amazon): flush file buffer in S3Hook.download_file() 
before returning path (#62078)
78dccfdbceb is described below

commit 78dccfdbceb3e50394225a5f6de7c1833bd08eb6
Author: Filippo Scotti <[email protected]>
AuthorDate: Tue Feb 17 21:26:49 2026 +0100

    fix(amazon): flush file buffer in S3Hook.download_file() before returning 
path (#62078)
    
    * fix(amazon): flush file buffer in S3Hook.download_file() before returning 
path
    
    S3Hook.download_file() writes S3 object content to a file via
    download_fileobj() but never flushes the write buffer before returning
    the file path. When the caller immediately opens the returned path,
    the file may contain 0 bytes because the data is still in Python's
    write buffer.
    
    This particularly affects small files (< ~8KB) that fit entirely in
    the buffer, and was exposed by apache-airflow-providers-common-compat
    1.13.1 which changed execution timing of get_hook_lineage_collector().
    
    See also: https://github.com/boto/boto3/issues/1304
    
    * fix(amazon): use file-like mock in S3Hook.download_file() tests
    
    The tests for download_file() mocked NamedTemporaryFile to return a
    PosixPath instead of a file-like object. PosixPath lacks flush() and
    its .name property returns just the filename, not the full path like
    a real NamedTemporaryFile. Use the default MagicMock return value
    which properly supports file-like operations.
    
    ---------
    
    Co-authored-by: Filippo Scotti <[email protected]>
---
 .../amazon/src/airflow/providers/amazon/aws/hooks/s3.py      |  1 +
 providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py      | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py 
b/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
index 24e0e012e6d..d71a8383e8a 100644
--- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
+++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
@@ -1616,6 +1616,7 @@ class S3Hook(AwsBaseHook):
             ExtraArgs=extra_args,
             Config=self.transfer_config,
         )
+        file.flush()
         get_hook_lineage_collector().add_input_asset(
             context=self, scheme="s3", asset_kwargs={"bucket": bucket_name, 
"key": key}
         )
diff --git a/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py 
b/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
index 74c8eb3ab3b..f84f7b8ba0a 100644
--- a/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
+++ b/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
@@ -1449,8 +1449,8 @@ class TestAwsS3Hook:
 
     @mock.patch("airflow.providers.amazon.aws.hooks.s3.NamedTemporaryFile")
     def test_download_file(self, mock_temp_file, tmp_path):
-        path = tmp_path / "airflow_tmp_test_s3_hook"
-        mock_temp_file.return_value = path
+        mock_file = mock_temp_file.return_value
+        mock_file.name = str(tmp_path / "airflow_tmp_test_s3_hook")
         s3_hook = S3Hook(aws_conn_id="s3_test", requester_pays=True)
         s3_hook.check_for_key = Mock(return_value=True)
         s3_obj = Mock()
@@ -1463,17 +1463,17 @@ class TestAwsS3Hook:
 
         s3_hook.get_key.assert_called_once_with(key, bucket)
         s3_obj.download_fileobj.assert_called_once_with(
-            path,
+            mock_file,
             Config=s3_hook.transfer_config,
             ExtraArgs={"RequestPayer": "requester"},
         )
 
-        assert path.name == output_file
+        assert mock_file.name == output_file
 
     @mock.patch("airflow.providers.amazon.aws.hooks.s3.NamedTemporaryFile")
     def test_download_file_exposes_lineage(self, mock_temp_file, tmp_path, 
hook_lineage_collector):
-        path = tmp_path / "airflow_tmp_test_s3_hook"
-        mock_temp_file.return_value = path
+        mock_file = mock_temp_file.return_value
+        mock_file.name = str(tmp_path / "airflow_tmp_test_s3_hook")
         s3_hook = S3Hook(aws_conn_id="s3_test")
         s3_hook.check_for_key = Mock(return_value=True)
         s3_obj = Mock()

Reply via email to