This is an automated email from the ASF dual-hosted git repository.
shahar1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 28ca0b66260 Harden S3 sync target paths (#67847)
28ca0b66260 is described below
commit 28ca0b6626022c08b01de67bdbc554fad4e336ba
Author: dfgvaetyj3456356-hash <[email protected]>
AuthorDate: Wed Jun 17 00:57:38 2026 -0500
Harden S3 sync target paths (#67847)
---
.../amazon/src/airflow/providers/amazon/aws/exceptions.py | 4 ++++
.../amazon/src/airflow/providers/amazon/aws/hooks/s3.py | 9 ++++++++-
providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py | 13 ++++++++++++-
3 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/providers/amazon/src/airflow/providers/amazon/aws/exceptions.py
b/providers/amazon/src/airflow/providers/amazon/aws/exceptions.py
index ae1863bec9c..9a022bf9879 100644
--- a/providers/amazon/src/airflow/providers/amazon/aws/exceptions.py
+++ b/providers/amazon/src/airflow/providers/amazon/aws/exceptions.py
@@ -50,3 +50,7 @@ class EcsOperatorError(Exception):
class S3HookUriParseFailure(AirflowException):
"""When parse_s3_url fails to parse URL, this error is thrown."""
+
+
+class S3HookPathTraversalError(AirflowException):
+ """Raise when an S3 object key resolves outside the target local
directory."""
diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
b/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
index 5aba0e2b979..616506e2134 100644
--- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
+++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py
@@ -58,7 +58,7 @@ from boto3.s3.transfer import S3Transfer, TransferConfig
from botocore.exceptions import ClientError
from airflow.exceptions import AirflowProviderDeprecationWarning
-from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure
+from airflow.providers.amazon.aws.exceptions import S3HookPathTraversalError,
S3HookUriParseFailure
from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
from airflow.providers.amazon.aws.utils.tags import format_tags
from airflow.providers.common.compat.lineage.hook import
get_hook_lineage_collector
@@ -1804,6 +1804,7 @@ class S3Hook(AwsBaseHook):
"""Download S3 files from the S3 bucket to the local directory."""
self.log.debug("Downloading data from s3://%s/%s to %s", bucket_name,
s3_prefix, local_dir)
+ local_dir_resolved = local_dir.resolve()
local_s3_objects = []
s3_bucket = self.get_bucket(bucket_name)
for obj in s3_bucket.objects.filter(Prefix=s3_prefix):
@@ -1811,6 +1812,12 @@ class S3Hook(AwsBaseHook):
continue
obj_path = Path(obj.key)
local_target_path =
local_dir.joinpath(obj_path.relative_to(s3_prefix))
+ try:
+ local_target_path.resolve().relative_to(local_dir_resolved)
+ except ValueError:
+ raise S3HookPathTraversalError(
+ f"S3 object key {obj.key!r} resolves outside local
directory {local_dir}"
+ ) from None
if not local_target_path.parent.exists():
local_target_path.parent.mkdir(parents=True, exist_ok=True)
self.log.debug("Created local directory: %s",
local_target_path.parent)
diff --git a/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
b/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
index 551e7739887..8b2d70c0c09 100644
--- a/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
+++ b/providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py
@@ -35,7 +35,7 @@ from moto import mock_aws
from airflow.models import Connection
from airflow.providers.amazon.aws.assets.s3 import Asset
-from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure
+from airflow.providers.amazon.aws.exceptions import S3HookPathTraversalError,
S3HookUriParseFailure
from airflow.providers.amazon.aws.hooks.s3 import (
NO_ACL,
S3Hook,
@@ -1976,6 +1976,17 @@ class TestAwsS3Hook:
assert "local file last modified" in logs_string
assert "Downloaded dag_04.py to" in logs_string
+ def test_sync_to_local_dir_rejects_key_path_traversal(self, s3_bucket,
s3_client, tmp_path):
+ s3_client.put_object(Bucket=s3_bucket, Key="dags/../../outside.py",
Body=b"test data")
+
+ sync_local_dir = tmp_path / "s3_sync_dir"
+ hook = S3Hook()
+
+ with pytest.raises(S3HookPathTraversalError, match="resolves outside
local directory"):
+ hook.sync_to_local_dir(bucket_name=s3_bucket,
local_dir=sync_local_dir, s3_prefix="dags/")
+
+ assert not (tmp_path / "outside.py").exists()
+
@pytest.mark.parametrize(
("key_kind", "has_conn", "has_bucket", "precedence", "expected"),