This is an automated email from the ASF dual-hosted git repository.
eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 00af5c007e Replace urlparse with urlsplit (#27389)
00af5c007e is described below
commit 00af5c007ef2200401b53c40236e664758e47f27
Author: Weston King-Leatham
<[email protected]>
AuthorDate: Mon Nov 14 14:00:22 2022 -0500
Replace urlparse with urlsplit (#27389)
* Replace urlparse with urlsplit in s3 files
Co-authored-by: eladkal <[email protected]>
---
airflow/cli/commands/connection_command.py | 13 ++++---------
airflow/config_templates/airflow_local_settings.py | 6 +++---
airflow/configuration.py | 4 ++--
airflow/datasets/__init__.py | 4 ++--
airflow/models/connection.py | 4 ++--
airflow/models/dataset.py | 4 ++--
airflow/providers/alibaba/cloud/hooks/oss.py | 4 ++--
airflow/providers/alibaba/cloud/sensors/oss_key.py | 6 +++---
airflow/providers/amazon/aws/hooks/s3.py | 6 +++---
airflow/providers/amazon/aws/transfers/s3_to_sftp.py | 4 ++--
airflow/providers/amazon/aws/transfers/sftp_to_s3.py | 4 ++--
airflow/providers/databricks/hooks/databricks_base.py | 4 ++--
airflow/providers/databricks/operators/databricks_repos.py | 4 ++--
.../providers/google/cloud/example_dags/example_dataflow.py | 4 ++--
airflow/providers/google/cloud/hooks/gcs.py | 4 ++--
airflow/providers/google/cloud/operators/cloud_build.py | 6 +++---
.../google/marketing_platform/operators/display_video.py | 4 ++--
airflow/providers/slack/hooks/slack_webhook.py | 4 ++--
airflow/www/extensions/init_wsgi_middlewares.py | 4 ++--
tests/models/test_taskinstance.py | 2 +-
tests/providers/amazon/aws/hooks/test_eks.py | 4 ++--
.../google/cloud/log/test_stackdriver_task_handler.py | 4 ++--
.../google/cloud/operators/test_dataflow_system.py | 4 ++--
tests/providers/odbc/hooks/test_odbc.py | 8 ++++----
tests/system/providers/apache/beam/utils.py | 10 +++++-----
.../google/cloud/cloud_build/example_cloud_build.py | 4 ++--
.../system/providers/google/cloud/gcs/example_firestore.py | 4 ++--
27 files changed, 64 insertions(+), 69 deletions(-)
diff --git a/airflow/cli/commands/connection_command.py
b/airflow/cli/commands/connection_command.py
index 4e78a4ef5b..68594e2e80 100644
--- a/airflow/cli/commands/connection_command.py
+++ b/airflow/cli/commands/connection_command.py
@@ -24,7 +24,7 @@ import sys
import warnings
from pathlib import Path
from typing import Any
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlsplit, urlunsplit
from sqlalchemy.orm import exc
@@ -133,12 +133,12 @@ def _is_stdout(fileio: io.TextIOWrapper) -> bool:
def _valid_uri(uri: str) -> bool:
"""Check if a URI is valid, by checking if both scheme and netloc are
available"""
- uri_parts = urlparse(uri)
+ uri_parts = urlsplit(uri)
return uri_parts.scheme != "" and uri_parts.netloc != ""
@cache
-def _get_connection_types():
+def _get_connection_types() -> list[str]:
"""Returns connection types available."""
_connection_types = ["fs", "mesos_framework-id", "email", "generic"]
providers_manager = ProvidersManager()
@@ -148,10 +148,6 @@ def _get_connection_types():
return _connection_types
-def _valid_conn_type(conn_type: str) -> bool:
- return conn_type in _get_connection_types()
-
-
def connections_export(args):
"""Exports all connections to a file"""
file_formats = [".yaml", ".json", ".env"]
@@ -269,7 +265,7 @@ def connections_add(args):
msg = msg.format(
conn_id=new_conn.conn_id,
uri=args.conn_uri
- or urlunparse(
+ or urlunsplit(
(
new_conn.conn_type,
f"{new_conn.login or ''}:{'******' if
new_conn.password else ''}"
@@ -277,7 +273,6 @@ def connections_add(args):
new_conn.schema or "",
"",
"",
- "",
)
),
)
diff --git a/airflow/config_templates/airflow_local_settings.py
b/airflow/config_templates/airflow_local_settings.py
index 26172aea39..1cedc1e3df 100644
--- a/airflow/config_templates/airflow_local_settings.py
+++ b/airflow/config_templates/airflow_local_settings.py
@@ -21,7 +21,7 @@ from __future__ import annotations
import os
from pathlib import Path
from typing import Any
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow.configuration import conf
from airflow.exceptions import AirflowException
@@ -221,7 +221,7 @@ if REMOTE_LOGGING:
DEFAULT_LOGGING_CONFIG["handlers"].update(S3_REMOTE_HANDLERS)
elif REMOTE_BASE_LOG_FOLDER.startswith("cloudwatch://"):
- url_parts = urlparse(REMOTE_BASE_LOG_FOLDER)
+ url_parts = urlsplit(REMOTE_BASE_LOG_FOLDER)
CLOUDWATCH_REMOTE_HANDLERS: dict[str, dict[str, str | None]] = {
"task": {
"class":
"airflow.providers.amazon.aws.log.cloudwatch_task_handler.CloudwatchTaskHandler",
@@ -264,7 +264,7 @@ if REMOTE_LOGGING:
elif REMOTE_BASE_LOG_FOLDER.startswith("stackdriver://"):
key_path = conf.get_mandatory_value("logging", "GOOGLE_KEY_PATH",
fallback=None)
# stackdriver:///airflow-tasks => airflow-tasks
- log_name = urlparse(REMOTE_BASE_LOG_FOLDER).path[1:]
+ log_name = urlsplit(REMOTE_BASE_LOG_FOLDER).path[1:]
STACKDRIVER_REMOTE_HANDLERS = {
"task": {
"class":
"airflow.providers.google.cloud.log.stackdriver_task_handler.StackdriverTaskHandler",
diff --git a/airflow/configuration.py b/airflow/configuration.py
index bef78d4397..9908aa55b0 100644
--- a/airflow/configuration.py
+++ b/airflow/configuration.py
@@ -37,7 +37,7 @@ from contextlib import suppress
from json.decoder import JSONDecodeError
from re import Pattern
from typing import IO, Any, Dict, Iterable, Tuple, Union
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from typing_extensions import overload
@@ -403,7 +403,7 @@ class AirflowConfigParser(ConfigParser):
old_value = self.get(section, key)
bad_schemes = ["postgres+psycopg2", "postgres"]
good_scheme = "postgresql"
- parsed = urlparse(old_value)
+ parsed = urlsplit(old_value)
if parsed.scheme in bad_schemes:
warnings.warn(
f"Bad scheme in Airflow configuration core > sql_alchemy_conn:
`{parsed.scheme}`. "
diff --git a/airflow/datasets/__init__.py b/airflow/datasets/__init__.py
index 709cea16b3..e35f180c91 100644
--- a/airflow/datasets/__init__.py
+++ b/airflow/datasets/__init__.py
@@ -17,7 +17,7 @@
from __future__ import annotations
from typing import Any
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
import attr
@@ -37,6 +37,6 @@ class Dataset:
uri.encode("ascii")
except UnicodeEncodeError:
raise ValueError(f"{attr.name!r} must be ascii")
- parsed = urlparse(uri)
+ parsed = urlsplit(uri)
if parsed.scheme and parsed.scheme.lower() == "airflow":
raise ValueError(f"{attr.name!r} scheme `airflow` is reserved")
diff --git a/airflow/models/connection.py b/airflow/models/connection.py
index 1ee4f90a01..73a284be9e 100644
--- a/airflow/models/connection.py
+++ b/airflow/models/connection.py
@@ -21,7 +21,7 @@ import json
import logging
import warnings
from json import JSONDecodeError
-from urllib.parse import parse_qsl, quote, unquote, urlencode, urlparse
+from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit
from sqlalchemy import Boolean, Column, Integer, String, Text
from sqlalchemy.ext.declarative import declared_attr
@@ -188,7 +188,7 @@ class Connection(Base, LoggingMixin):
return conn_type
def _parse_from_uri(self, uri: str):
- uri_parts = urlparse(uri)
+ uri_parts = urlsplit(uri)
conn_type = uri_parts.scheme
self.conn_type = self._normalize_conn_type(conn_type)
self.host = _parse_netloc_to_hostname(uri_parts)
diff --git a/airflow/models/dataset.py b/airflow/models/dataset.py
index f4efc411fe..e52d5c756f 100644
--- a/airflow/models/dataset.py
+++ b/airflow/models/dataset.py
@@ -17,7 +17,7 @@
# under the License.
from __future__ import annotations
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
import sqlalchemy_jsonfield
from sqlalchemy import (
@@ -83,7 +83,7 @@ class DatasetModel(Base):
uri.encode("ascii")
except UnicodeEncodeError:
raise ValueError("URI must be ascii")
- parsed = urlparse(uri)
+ parsed = urlsplit(uri)
if parsed.scheme and parsed.scheme.lower() == "airflow":
raise ValueError("Scheme `airflow` is reserved.")
super().__init__(uri=uri, **kwargs)
diff --git a/airflow/providers/alibaba/cloud/hooks/oss.py
b/airflow/providers/alibaba/cloud/hooks/oss.py
index c08a411072..ab29958a00 100644
--- a/airflow/providers/alibaba/cloud/hooks/oss.py
+++ b/airflow/providers/alibaba/cloud/hooks/oss.py
@@ -20,7 +20,7 @@ from __future__ import annotations
from functools import wraps
from inspect import signature
from typing import TYPE_CHECKING, Callable, TypeVar, cast
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
import oss2
from oss2.exceptions import ClientError
@@ -108,7 +108,7 @@ class OSSHook(BaseHook):
:param ossurl: The OSS Url to parse.
:return: the parsed bucket name and key
"""
- parsed_url = urlparse(ossurl)
+ parsed_url = urlsplit(ossurl)
if not parsed_url.netloc:
raise AirflowException(f'Please provide a bucket_name instead of
"{ossurl}"')
diff --git a/airflow/providers/alibaba/cloud/sensors/oss_key.py
b/airflow/providers/alibaba/cloud/sensors/oss_key.py
index 14c7ac88c9..98b2c25bea 100644
--- a/airflow/providers/alibaba/cloud/sensors/oss_key.py
+++ b/airflow/providers/alibaba/cloud/sensors/oss_key.py
@@ -18,7 +18,7 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Sequence
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow.compat.functools import cached_property
from airflow.exceptions import AirflowException
@@ -69,13 +69,13 @@ class OSSKeySensor(BaseSensorOperator):
@returns True if the object exists, False otherwise
"""
if self.bucket_name is None:
- parsed_url = urlparse(self.bucket_key)
+ parsed_url = urlsplit(self.bucket_key)
if parsed_url.netloc == "":
raise AirflowException("If key is a relative path from root,
please provide a bucket_name")
self.bucket_name = parsed_url.netloc
self.bucket_key = parsed_url.path.lstrip("/")
else:
- parsed_url = urlparse(self.bucket_key)
+ parsed_url = urlsplit(self.bucket_key)
if parsed_url.scheme != "" or parsed_url.netloc != "":
raise AirflowException(
"If bucket_name is provided, bucket_key"
diff --git a/airflow/providers/amazon/aws/hooks/s3.py
b/airflow/providers/amazon/aws/hooks/s3.py
index 1eaba1f808..e5b1bad580 100644
--- a/airflow/providers/amazon/aws/hooks/s3.py
+++ b/airflow/providers/amazon/aws/hooks/s3.py
@@ -31,7 +31,7 @@ from io import BytesIO
from pathlib import Path
from tempfile import NamedTemporaryFile, gettempdir
from typing import Any, Callable, TypeVar, cast
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from uuid import uuid4
from boto3.s3.transfer import S3Transfer, TransferConfig
@@ -153,7 +153,7 @@ class S3Hook(AwsBaseHook):
"""
format = s3url.split("//")
if format[0].lower() == "s3:":
- parsed_url = urlparse(s3url)
+ parsed_url = urlsplit(s3url)
if not parsed_url.netloc:
raise AirflowException(f'Please provide a bucket name using a
valid format: "{s3url}"')
@@ -190,7 +190,7 @@ class S3Hook(AwsBaseHook):
if bucket is None:
return S3Hook.parse_s3_url(key)
- parsed_url = urlparse(key)
+ parsed_url = urlsplit(key)
if parsed_url.scheme != "" or parsed_url.netloc != "":
raise TypeError(
f"If `{bucket_param_name}` is provided, {key_param_name}
should be a relative path "
diff --git a/airflow/providers/amazon/aws/transfers/s3_to_sftp.py
b/airflow/providers/amazon/aws/transfers/s3_to_sftp.py
index 4ead3d3479..3038c17cb0 100644
--- a/airflow/providers/amazon/aws/transfers/s3_to_sftp.py
+++ b/airflow/providers/amazon/aws/transfers/s3_to_sftp.py
@@ -20,7 +20,7 @@ from __future__ import annotations
import warnings
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Sequence
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow.models import BaseOperator
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
@@ -80,7 +80,7 @@ class S3ToSFTPOperator(BaseOperator):
@staticmethod
def get_s3_key(s3_key: str) -> str:
"""This parses the correct format for S3 keys regardless of how the S3
url is passed."""
- parsed_s3_key = urlparse(s3_key)
+ parsed_s3_key = urlsplit(s3_key)
return parsed_s3_key.path.lstrip("/")
def execute(self, context: Context) -> None:
diff --git a/airflow/providers/amazon/aws/transfers/sftp_to_s3.py
b/airflow/providers/amazon/aws/transfers/sftp_to_s3.py
index ec235dafe6..546c471026 100644
--- a/airflow/providers/amazon/aws/transfers/sftp_to_s3.py
+++ b/airflow/providers/amazon/aws/transfers/sftp_to_s3.py
@@ -19,7 +19,7 @@ from __future__ import annotations
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Sequence
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow.models import BaseOperator
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
@@ -76,7 +76,7 @@ class SFTPToS3Operator(BaseOperator):
@staticmethod
def get_s3_key(s3_key: str) -> str:
"""This parses the correct format for S3 keys regardless of how the S3
url is passed."""
- parsed_s3_key = urlparse(s3_key)
+ parsed_s3_key = urlsplit(s3_key)
return parsed_s3_key.path.lstrip("/")
def execute(self, context: Context) -> None:
diff --git a/airflow/providers/databricks/hooks/databricks_base.py
b/airflow/providers/databricks/hooks/databricks_base.py
index f7fae36104..50ab2eff6a 100644
--- a/airflow/providers/databricks/hooks/databricks_base.py
+++ b/airflow/providers/databricks/hooks/databricks_base.py
@@ -28,7 +28,7 @@ import copy
import platform
import time
from typing import Any
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
import aiohttp
import requests
@@ -186,7 +186,7 @@ class BaseDatabricksHook(BaseHook):
assert h._parse_host('xx.cloud.databricks.com') ==
'xx.cloud.databricks.com'
"""
- urlparse_host = urlparse(host).hostname
+ urlparse_host = urlsplit(host).hostname
if urlparse_host:
# In this case, host = https://xx.cloud.databricks.com
return urlparse_host
diff --git a/airflow/providers/databricks/operators/databricks_repos.py
b/airflow/providers/databricks/operators/databricks_repos.py
index 3187fec5c7..f42114d474 100644
--- a/airflow/providers/databricks/operators/databricks_repos.py
+++ b/airflow/providers/databricks/operators/databricks_repos.py
@@ -20,7 +20,7 @@ from __future__ import annotations
import re
from typing import TYPE_CHECKING, Sequence
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow.compat.functools import cached_property
from airflow.exceptions import AirflowException
@@ -106,7 +106,7 @@ class DatabricksReposCreateOperator(BaseOperator):
def __detect_repo_provider__(url):
provider = None
try:
- netloc = urlparse(url).netloc
+ netloc = urlsplit(url).netloc
idx = netloc.rfind("@")
if idx != -1:
netloc = netloc[(idx + 1) :]
diff --git a/airflow/providers/google/cloud/example_dags/example_dataflow.py
b/airflow/providers/google/cloud/example_dags/example_dataflow.py
index b7b0c017dc..f2a0860fc0 100644
--- a/airflow/providers/google/cloud/example_dags/example_dataflow.py
+++ b/airflow/providers/google/cloud/example_dags/example_dataflow.py
@@ -23,7 +23,7 @@ from __future__ import annotations
import os
from datetime import datetime
from typing import Callable
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow import models
from airflow.exceptions import AirflowException
@@ -53,7 +53,7 @@ GCS_OUTPUT = os.environ.get("GCP_DATAFLOW_GCS_OUTPUT",
"gs://INVALID BUCKET NAME
GCS_JAR = os.environ.get("GCP_DATAFLOW_JAR", "gs://INVALID BUCKET
NAME/word-count-beam-bundled-0.1.jar")
GCS_PYTHON = os.environ.get("GCP_DATAFLOW_PYTHON", "gs://INVALID BUCKET
NAME/wordcount_debugging.py")
-GCS_JAR_PARTS = urlparse(GCS_JAR)
+GCS_JAR_PARTS = urlsplit(GCS_JAR)
GCS_JAR_BUCKET_NAME = GCS_JAR_PARTS.netloc
GCS_JAR_OBJECT_NAME = GCS_JAR_PARTS.path[1:]
diff --git a/airflow/providers/google/cloud/hooks/gcs.py
b/airflow/providers/google/cloud/hooks/gcs.py
index 9bd7e9ed37..7e2e081a46 100644
--- a/airflow/providers/google/cloud/hooks/gcs.py
+++ b/airflow/providers/google/cloud/hooks/gcs.py
@@ -30,7 +30,7 @@ from io import BytesIO
from os import path
from tempfile import NamedTemporaryFile
from typing import IO, Callable, Generator, Sequence, TypeVar, cast, overload
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from google.api_core.exceptions import NotFound
@@ -1161,7 +1161,7 @@ def _parse_gcs_url(gsurl: str) -> tuple[str, str]:
Given a Google Cloud Storage URL (gs://<bucket>/<blob>), returns a
tuple containing the corresponding bucket and blob.
"""
- parsed_url = urlparse(gsurl)
+ parsed_url = urlsplit(gsurl)
if not parsed_url.netloc:
raise AirflowException("Please provide a bucket name")
if parsed_url.scheme.lower() != "gs":
diff --git a/airflow/providers/google/cloud/operators/cloud_build.py
b/airflow/providers/google/cloud/operators/cloud_build.py
index 57a6d60555..c33fa36c64 100644
--- a/airflow/providers/google/cloud/operators/cloud_build.py
+++ b/airflow/providers/google/cloud/operators/cloud_build.py
@@ -22,7 +22,7 @@ import json
import re
from copy import deepcopy
from typing import TYPE_CHECKING, Any, Sequence
-from urllib.parse import unquote, urlparse
+from urllib.parse import unquote, urlsplit
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
from google.api_core.retry import Retry
@@ -972,7 +972,7 @@ class BuildProcessor:
https://source.cloud.google.com/airflow-project/airflow-repo/+/branch-name:
"""
- url_parts = urlparse(source)
+ url_parts = urlsplit(source)
match = REGEX_REPO_PATH.search(url_parts.path)
@@ -1006,7 +1006,7 @@ class BuildProcessor:
gs://bucket-name/object-name.tar.gz
"""
- url_parts = urlparse(storage_url)
+ url_parts = urlsplit(storage_url)
if url_parts.scheme != "gs" or not url_parts.hostname or not
url_parts.path or url_parts.path == "/":
raise AirflowException(
diff --git
a/airflow/providers/google/marketing_platform/operators/display_video.py
b/airflow/providers/google/marketing_platform/operators/display_video.py
index 796d4fcab4..d33ffb46d7 100644
--- a/airflow/providers/google/marketing_platform/operators/display_video.py
+++ b/airflow/providers/google/marketing_platform/operators/display_video.py
@@ -24,7 +24,7 @@ import shutil
import tempfile
import urllib.request
from typing import TYPE_CHECKING, Any, Sequence
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
@@ -282,7 +282,7 @@ class
GoogleDisplayVideo360DownloadReportOperator(BaseOperator):
# If no custom report_name provided, use DV360 name
file_url =
resource["metadata"]["googleCloudStoragePathForLatestReport"]
- report_name = self.report_name or
urlparse(file_url).path.split("/")[-1]
+ report_name = self.report_name or
urlsplit(file_url).path.split("/")[-1]
report_name = self._resolve_file_name(report_name)
# Download the report
diff --git a/airflow/providers/slack/hooks/slack_webhook.py
b/airflow/providers/slack/hooks/slack_webhook.py
index 33f403d7fe..d44a176673 100644
--- a/airflow/providers/slack/hooks/slack_webhook.py
+++ b/airflow/providers/slack/hooks/slack_webhook.py
@@ -21,7 +21,7 @@ import json
import warnings
from functools import wraps
from typing import TYPE_CHECKING, Any, Callable
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from slack_sdk import WebhookClient
@@ -285,7 +285,7 @@ class SlackWebhookHook(BaseHook):
base_url = base_url.rstrip("/")
if not webhook_token:
- parsed_token = (urlparse(base_url).path or "").strip("/")
+ parsed_token = (urlsplit(base_url).path or "").strip("/")
if base_url == DEFAULT_SLACK_WEBHOOK_ENDPOINT or not
parsed_token:
# Raise an error in case of password not specified and
# 1. Result of constructing base_url equal
https://hooks.slack.com/services
diff --git a/airflow/www/extensions/init_wsgi_middlewares.py
b/airflow/www/extensions/init_wsgi_middlewares.py
index 88963374b0..809d8b2533 100644
--- a/airflow/www/extensions/init_wsgi_middlewares.py
+++ b/airflow/www/extensions/init_wsgi_middlewares.py
@@ -18,7 +18,7 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Iterable
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from flask import Flask
from werkzeug.middleware.dispatcher import DispatcherMiddleware
@@ -38,7 +38,7 @@ def _root_app(env: WSGIEnvironment, resp: StartResponse) ->
Iterable[bytes]:
def init_wsgi_middleware(flask_app: Flask) -> None:
"""Handle X-Forwarded-* headers and base_url support"""
# Apply DispatcherMiddleware
- base_url = urlparse(conf.get("webserver", "base_url"))[2]
+ base_url = urlsplit(conf.get("webserver", "base_url"))[2]
if not base_url or base_url == "/":
base_url = ""
if base_url:
diff --git a/tests/models/test_taskinstance.py
b/tests/models/test_taskinstance.py
index bfd81e3690..3ea9b6acc6 100644
--- a/tests/models/test_taskinstance.py
+++ b/tests/models/test_taskinstance.py
@@ -1618,7 +1618,7 @@ class TestTaskInstance:
now = pendulum.now("Europe/Brussels")
ti = create_task_instance(dag_id="dag", task_id="op",
execution_date=now)
query = urllib.parse.parse_qs(
- urllib.parse.urlparse(ti.mark_success_url).query,
keep_blank_values=True, strict_parsing=True
+ urllib.parse.urlsplit(ti.mark_success_url).query,
keep_blank_values=True, strict_parsing=True
)
assert query["dag_id"][0] == "dag"
assert query["task_id"][0] == "op"
diff --git a/tests/providers/amazon/aws/hooks/test_eks.py
b/tests/providers/amazon/aws/hooks/test_eks.py
index 73638f5f7e..3d3e51f94a 100644
--- a/tests/providers/amazon/aws/hooks/test_eks.py
+++ b/tests/providers/amazon/aws/hooks/test_eks.py
@@ -22,7 +22,7 @@ from copy import deepcopy
from datetime import datetime
from pathlib import Path
from unittest import mock
-from urllib.parse import ParseResult, urlparse
+from urllib.parse import ParseResult, urlsplit
import pytest
import yaml
@@ -1347,7 +1347,7 @@ def assert_result_matches_expected_list(
def assert_is_valid_uri(value: str) -> None:
- result: ParseResult = urlparse(value)
+ result: ParseResult = urlsplit(value)
assert all([result.scheme, result.netloc, result.path])
assert REGION in value
diff --git a/tests/providers/google/cloud/log/test_stackdriver_task_handler.py
b/tests/providers/google/cloud/log/test_stackdriver_task_handler.py
index cbe1e59109..ca489efb45 100644
--- a/tests/providers/google/cloud/log/test_stackdriver_task_handler.py
+++ b/tests/providers/google/cloud/log/test_stackdriver_task_handler.py
@@ -18,7 +18,7 @@ from __future__ import annotations
import logging
from unittest import mock
-from urllib.parse import parse_qs, urlparse
+from urllib.parse import parse_qs, urlsplit
import pytest
from google.cloud.logging import Resource
@@ -364,7 +364,7 @@ labels.try_number="3"'''
stackdriver_task_handler =
StackdriverTaskHandler(gcp_key_path="KEY_PATH")
url = stackdriver_task_handler.get_external_log_url(self.ti,
self.ti.try_number)
- parsed_url = urlparse(url)
+ parsed_url = urlsplit(url)
parsed_qs = parse_qs(parsed_url.query)
assert "https" == parsed_url.scheme
assert "console.cloud.google.com" == parsed_url.netloc
diff --git a/tests/providers/google/cloud/operators/test_dataflow_system.py
b/tests/providers/google/cloud/operators/test_dataflow_system.py
index f0940d59be..9efd8b2325 100644
--- a/tests/providers/google/cloud/operators/test_dataflow_system.py
+++ b/tests/providers/google/cloud/operators/test_dataflow_system.py
@@ -22,7 +22,7 @@ import os
import shlex
import textwrap
from tempfile import NamedTemporaryFile
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
import pytest
import requests
@@ -68,7 +68,7 @@ GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID",
"example-project")
GCR_FLEX_TEMPLATE_IMAGE =
f"gcr.io/{GCP_PROJECT_ID}/samples-dataflow-streaming-beam-sql:latest"
#
https://github.com/GoogleCloudPlatform/java-docs-samples/tree/954553c/dataflow/flex-templates/streaming_beam_sql
-GCS_TEMPLATE_PARTS = urlparse(GCS_FLEX_TEMPLATE_TEMPLATE_PATH)
+GCS_TEMPLATE_PARTS = urlsplit(GCS_FLEX_TEMPLATE_TEMPLATE_PATH)
GCS_FLEX_TEMPLATE_BUCKET_NAME = GCS_TEMPLATE_PARTS.netloc
diff --git a/tests/providers/odbc/hooks/test_odbc.py
b/tests/providers/odbc/hooks/test_odbc.py
index 5fa33bd9e9..58dcaf10d7 100644
--- a/tests/providers/odbc/hooks/test_odbc.py
+++ b/tests/providers/odbc/hooks/test_odbc.py
@@ -19,7 +19,7 @@ from __future__ import annotations
import json
from unittest import mock
-from urllib.parse import quote_plus, urlparse
+from urllib.parse import quote_plus, urlsplit
import pyodbc
@@ -191,14 +191,14 @@ class TestOdbcHook:
def test_sqlalchemy_scheme_default(self):
hook = self.get_hook()
uri = hook.get_uri()
- assert urlparse(uri).scheme == "mssql+pyodbc"
+ assert urlsplit(uri).scheme == "mssql+pyodbc"
def test_sqlalchemy_scheme_param(self):
hook = self.get_hook(hook_params=dict(sqlalchemy_scheme="my-scheme"))
uri = hook.get_uri()
- assert urlparse(uri).scheme == "my-scheme"
+ assert urlsplit(uri).scheme == "my-scheme"
def test_sqlalchemy_scheme_extra(self):
hook =
self.get_hook(conn_params=dict(extra=json.dumps(dict(sqlalchemy_scheme="my-scheme"))))
uri = hook.get_uri()
- assert urlparse(uri).scheme == "my-scheme"
+ assert urlsplit(uri).scheme == "my-scheme"
diff --git a/tests/system/providers/apache/beam/utils.py
b/tests/system/providers/apache/beam/utils.py
index 511a1b352e..d852c8fe8b 100644
--- a/tests/system/providers/apache/beam/utils.py
+++ b/tests/system/providers/apache/beam/utils.py
@@ -22,7 +22,7 @@ from __future__ import annotations
import os
from datetime import datetime
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow.utils.trigger_rule import TriggerRule
@@ -55,16 +55,16 @@ GCS_JAR_FLINK_RUNNER = os.environ.get(
"gs://INVALID BUCKET
NAME/tests/dataflow-templates-bundled-java=11-beam-v2.25.0-FlinkRunner.jar",
)
-GCS_JAR_DIRECT_RUNNER_PARTS = urlparse(GCS_JAR_DIRECT_RUNNER)
+GCS_JAR_DIRECT_RUNNER_PARTS = urlsplit(GCS_JAR_DIRECT_RUNNER)
GCS_JAR_DIRECT_RUNNER_BUCKET_NAME = GCS_JAR_DIRECT_RUNNER_PARTS.netloc
GCS_JAR_DIRECT_RUNNER_OBJECT_NAME = GCS_JAR_DIRECT_RUNNER_PARTS.path[1:]
-GCS_JAR_DATAFLOW_RUNNER_PARTS = urlparse(GCS_JAR_DATAFLOW_RUNNER)
+GCS_JAR_DATAFLOW_RUNNER_PARTS = urlsplit(GCS_JAR_DATAFLOW_RUNNER)
GCS_JAR_DATAFLOW_RUNNER_BUCKET_NAME = GCS_JAR_DATAFLOW_RUNNER_PARTS.netloc
GCS_JAR_DATAFLOW_RUNNER_OBJECT_NAME = GCS_JAR_DATAFLOW_RUNNER_PARTS.path[1:]
-GCS_JAR_SPARK_RUNNER_PARTS = urlparse(GCS_JAR_SPARK_RUNNER)
+GCS_JAR_SPARK_RUNNER_PARTS = urlsplit(GCS_JAR_SPARK_RUNNER)
GCS_JAR_SPARK_RUNNER_BUCKET_NAME = GCS_JAR_SPARK_RUNNER_PARTS.netloc
GCS_JAR_SPARK_RUNNER_OBJECT_NAME = GCS_JAR_SPARK_RUNNER_PARTS.path[1:]
-GCS_JAR_FLINK_RUNNER_PARTS = urlparse(GCS_JAR_FLINK_RUNNER)
+GCS_JAR_FLINK_RUNNER_PARTS = urlsplit(GCS_JAR_FLINK_RUNNER)
GCS_JAR_FLINK_RUNNER_BUCKET_NAME = GCS_JAR_FLINK_RUNNER_PARTS.netloc
GCS_JAR_FLINK_RUNNER_OBJECT_NAME = GCS_JAR_FLINK_RUNNER_PARTS.path[1:]
diff --git
a/tests/system/providers/google/cloud/cloud_build/example_cloud_build.py
b/tests/system/providers/google/cloud/cloud_build/example_cloud_build.py
index 298e44d5b0..bb770e46a7 100644
--- a/tests/system/providers/google/cloud/cloud_build/example_cloud_build.py
+++ b/tests/system/providers/google/cloud/cloud_build/example_cloud_build.py
@@ -33,7 +33,7 @@ from pathlib import Path
from typing import Any, cast
import yaml
-from future.backports.urllib.parse import urlparse
+from future.backports.urllib.parse import urlsplit
from airflow import models
from airflow.models.baseoperator import chain
@@ -60,7 +60,7 @@ BUCKET_NAME_SRC = f"bucket-src-{DAG_ID}-{ENV_ID}"
GCP_SOURCE_ARCHIVE_URL = os.environ.get("GCP_CLOUD_BUILD_ARCHIVE_URL",
f"gs://{BUCKET_NAME_SRC}/file.tar.gz")
GCP_SOURCE_REPOSITORY_NAME = "test-cloud-build-repo"
-GCP_SOURCE_ARCHIVE_URL_PARTS = urlparse(GCP_SOURCE_ARCHIVE_URL)
+GCP_SOURCE_ARCHIVE_URL_PARTS = urlsplit(GCP_SOURCE_ARCHIVE_URL)
GCP_SOURCE_BUCKET_NAME = GCP_SOURCE_ARCHIVE_URL_PARTS.netloc
CURRENT_FOLDER = Path(__file__).parent
diff --git a/tests/system/providers/google/cloud/gcs/example_firestore.py
b/tests/system/providers/google/cloud/gcs/example_firestore.py
index 90591c085a..9be3b8dd8d 100644
--- a/tests/system/providers/google/cloud/gcs/example_firestore.py
+++ b/tests/system/providers/google/cloud/gcs/example_firestore.py
@@ -45,7 +45,7 @@ from __future__ import annotations
import os
from datetime import datetime
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from airflow import models
from airflow.providers.google.cloud.operators.bigquery import (
@@ -66,7 +66,7 @@ FIRESTORE_PROJECT_ID =
os.environ.get("G_FIRESTORE_PROJECT_ID", "example-firebas
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"
DATASET_NAME = f"dataset_{DAG_ID}_{ENV_ID}"
EXPORT_DESTINATION_URL = os.environ.get("GCP_FIRESTORE_ARCHIVE_URL",
"gs://INVALID BUCKET NAME/namespace/")
-EXPORT_PREFIX = urlparse(EXPORT_DESTINATION_URL).path
+EXPORT_PREFIX = urlsplit(EXPORT_DESTINATION_URL).path
EXPORT_COLLECTION_ID = os.environ.get("GCP_FIRESTORE_COLLECTION_ID",
"firestore_collection_id")
DATASET_LOCATION = os.environ.get("GCP_FIRESTORE_DATASET_LOCATION", "EU")