This is an automated email from the ASF dual-hosted git repository.

shahar1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 28ca0b66260 Harden S3 sync target paths (#67847)
28ca0b66260 is described below

commit 28ca0b6626022c08b01de67bdbc554fad4e336ba
Author: dfgvaetyj3456356-hash <[email protected]>
AuthorDate: Wed Jun 17 00:57:38 2026 -0500

    Harden S3 sync target paths (#67847)
---
 .../amazon/src/airflow/providers/amazon/aws/exceptions.py   |  4 ++++
 .../amazon/src/airflow/providers/amazon/aws/hooks/s3.py     |  9 ++++++++-
 providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py     | 13 ++++++++++++-
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/providers/amazon/src/airflow/providers/amazon/aws/exceptions.py 
b/providers/amazon/src/airflow/providers/amazon/aws/exceptions.py
index ae1863bec9c..9a022bf9879 100644
--- a/providers/amazon/src/airflow/providers/amazon/aws/exceptions.py
+++ b/providers/amazon/src/airflow/providers/amazon/aws/exceptions.py
@@ -50,3 +50,7 @@ class EcsOperatorError(Exception):
 
 class S3HookUriParseFailure(AirflowException):
     """When parse_s3_url fails to parse URL, this error is thrown."""
+
+
+class S3HookPathTraversalError(AirflowException):
+    """Raise when an S3 object key resolves outside the target local 
directory."""
diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py 
b/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
index 5aba0e2b979..616506e2134 100644
--- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
+++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
@@ -58,7 +58,7 @@ from boto3.s3.transfer import S3Transfer, TransferConfig
 from botocore.exceptions import ClientError
 
 from airflow.exceptions import AirflowProviderDeprecationWarning
-from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure
+from airflow.providers.amazon.aws.exceptions import S3HookPathTraversalError, 
S3HookUriParseFailure
 from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
 from airflow.providers.amazon.aws.utils.tags import format_tags
 from airflow.providers.common.compat.lineage.hook import 
get_hook_lineage_collector
@@ -1804,6 +1804,7 @@ class S3Hook(AwsBaseHook):
         """Download S3 files from the S3 bucket to the local directory."""
         self.log.debug("Downloading data from s3://%s/%s to %s", bucket_name, 
s3_prefix, local_dir)
 
+        local_dir_resolved = local_dir.resolve()
         local_s3_objects = []
         s3_bucket = self.get_bucket(bucket_name)
         for obj in s3_bucket.objects.filter(Prefix=s3_prefix):
@@ -1811,6 +1812,12 @@ class S3Hook(AwsBaseHook):
                 continue
             obj_path = Path(obj.key)
             local_target_path = 
local_dir.joinpath(obj_path.relative_to(s3_prefix))
+            try:
+                local_target_path.resolve().relative_to(local_dir_resolved)
+            except ValueError:
+                raise S3HookPathTraversalError(
+                    f"S3 object key {obj.key!r} resolves outside local 
directory {local_dir}"
+                ) from None
             if not local_target_path.parent.exists():
                 local_target_path.parent.mkdir(parents=True, exist_ok=True)
                 self.log.debug("Created local directory: %s", 
local_target_path.parent)
diff --git a/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py 
b/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
index 551e7739887..8b2d70c0c09 100644
--- a/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
+++ b/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
@@ -35,7 +35,7 @@ from moto import mock_aws
 
 from airflow.models import Connection
 from airflow.providers.amazon.aws.assets.s3 import Asset
-from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure
+from airflow.providers.amazon.aws.exceptions import S3HookPathTraversalError, 
S3HookUriParseFailure
 from airflow.providers.amazon.aws.hooks.s3 import (
     NO_ACL,
     S3Hook,
@@ -1976,6 +1976,17 @@ class TestAwsS3Hook:
         assert "local file last modified" in logs_string
         assert "Downloaded dag_04.py to" in logs_string
 
+    def test_sync_to_local_dir_rejects_key_path_traversal(self, s3_bucket, 
s3_client, tmp_path):
+        s3_client.put_object(Bucket=s3_bucket, Key="dags/../../outside.py", 
Body=b"test data")
+
+        sync_local_dir = tmp_path / "s3_sync_dir"
+        hook = S3Hook()
+
+        with pytest.raises(S3HookPathTraversalError, match="resolves outside 
local directory"):
+            hook.sync_to_local_dir(bucket_name=s3_bucket, 
local_dir=sync_local_dir, s3_prefix="dags/")
+
+        assert not (tmp_path / "outside.py").exists()
+
 
 @pytest.mark.parametrize(
     ("key_kind", "has_conn", "has_bucket", "precedence", "expected"),

Reply via email to