This is an automated email from the ASF dual-hosted git repository. rahulvats pushed a commit to branch py-client-sync in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 1d78eca83bb3051172343bbc0aefd82705c2a454 Author: Elad Kalif <[email protected]> AuthorDate: Tue Mar 24 23:54:16 2026 +0200 Fix SSRF in Bid Manager report download via URL allowlist (#64180) * Fix SSRF in Bid Manager report download via URL allowlist * fixes --- .../marketing_platform/operators/bid_manager.py | 23 +++++++++++++++++----- .../operators/test_bid_manager.py | 17 ++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/providers/google/src/airflow/providers/google/marketing_platform/operators/bid_manager.py b/providers/google/src/airflow/providers/google/marketing_platform/operators/bid_manager.py index 2f4a43ea4b0..7481f0deac0 100644 --- a/providers/google/src/airflow/providers/google/marketing_platform/operators/bid_manager.py +++ b/providers/google/src/airflow/providers/google/marketing_platform/operators/bid_manager.py @@ -25,7 +25,7 @@ import tempfile import urllib.request from collections.abc import Sequence from typing import TYPE_CHECKING, Any -from urllib.parse import urlsplit +from urllib.parse import urlparse, urlsplit from airflow.exceptions import AirflowException from airflow.providers.google.cloud.hooks.gcs import GCSHook @@ -317,15 +317,28 @@ class GoogleBidManagerDownloadReportOperator(BaseOperator): # If no custom report_name provided, use Bid Manager name file_url = resource["metadata"]["googleCloudStoragePath"] - if urllib.parse.urlparse(file_url).scheme == "file": - raise AirflowException("Accessing local file is not allowed in this operator") + parsed_url = urlparse(file_url) + if parsed_url.scheme != "https" or parsed_url.hostname not in ( + "storage.googleapis.com", + "storage.cloud.google.com", + ): + raise AirflowException( + f"Unexpected report URL: {file_url!r}. " + "Only https://storage.googleapis.com and https://storage.cloud.google.com URLs are allowed." + ) report_name = self.report_name or urlsplit(file_url).path.split("/")[-1] report_name = self._resolve_file_name(report_name) - # Download the report + # Download the report using an opener that rejects redirects so a crafted + # 302 from a compromised GCS endpoint cannot bounce to an internal host. + class _NoRedirect(urllib.request.HTTPRedirectHandler): + def redirect_request(self, req, fp, code, msg, headers, newurl): + raise AirflowException(f"Redirect from GCS report URL to {newurl!r} is not allowed.") + + no_redirect_opener = urllib.request.build_opener(_NoRedirect) self.log.info("Starting downloading report %s", self.report_id) with tempfile.NamedTemporaryFile(delete=False) as temp_file: - with urllib.request.urlopen(file_url) as response: # nosec + with no_redirect_opener.open(file_url) as response: # nosec shutil.copyfileobj(response, temp_file, length=self.chunk_size) temp_file.flush() diff --git a/providers/google/tests/unit/google/marketing_platform/operators/test_bid_manager.py b/providers/google/tests/unit/google/marketing_platform/operators/test_bid_manager.py index 20c323e57c0..ed9a6c9efd5 100644 --- a/providers/google/tests/unit/google/marketing_platform/operators/test_bid_manager.py +++ b/providers/google/tests/unit/google/marketing_platform/operators/test_bid_manager.py @@ -74,7 +74,17 @@ class TestGoogleBidManagerDownloadReportOperator: session.execute(delete(TI)) @pytest.mark.parametrize( - ("file_path", "should_except"), [("https://host/path", False), ("file:/path/to/file", True)] + ("file_path", "should_except"), + [ + ("https://storage.googleapis.com/bucket/report.csv", False), + ("https://storage.cloud.google.com/bucket/report.csv", False), + ("file:/path/to/file", True), + ("http://storage.googleapis.com/bucket/report.csv", True), + ("https://evil.com/report.csv", True), + ("https://internal-service.local/secret", True), + ("ftp://storage.googleapis.com/bucket/report.csv", True), + ("https://[email protected]/report.csv", True), + ], ) @mock.patch("airflow.providers.google.marketing_platform.operators.bid_manager.shutil") @mock.patch("airflow.providers.google.marketing_platform.operators.bid_manager.urllib.request") @@ -156,7 +166,10 @@ class TestGoogleBidManagerDownloadReportOperator: ): mock_temp.NamedTemporaryFile.return_value.__enter__.return_value.name = FILENAME mock_hook.return_value.get_report.return_value = { - "metadata": {"status": {"state": "DONE"}, "googleCloudStoragePath": "TEST"} + "metadata": { + "status": {"state": "DONE"}, + "googleCloudStoragePath": "https://storage.googleapis.com/bucket/report.csv", + } } with dag_maker(dag_id="test_set_bucket_name", start_date=DEFAULT_DATE) as dag: if BUCKET_NAME not in test_bucket_name:
