This is an automated email from the ASF dual-hosted git repository.
ephraimanierobi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new bd608a56ab Fix updating account url for WasbHook (#33457)
bd608a56ab is described below
commit bd608a56abd1a6c2a98987daf7f092d2dabea555
Author: Ephraim Anierobi <[email protected]>
AuthorDate: Thu Aug 17 08:55:58 2023 +0100
Fix updating account url for WasbHook (#33457)
* Fix updating account url for WasbHook
There are different ways users supply the hostname(account url) in azure,
sometimes the host doesn't have a urlparse.scheme but has urlparse.path e.g
name.blob.windows.net
and other times, it will just be Azure ID e.g aldhjf9dads.
While working on #32980, I assumed that if there's no scheme, then the
hostname is not valid, that's
incorrect since DNS can serve as the host.
The fix was to check if we don't have netloc and that urlparse.path does
not include a dot and if it does not, use the login/account_name to construct
the account_url
* fixup! Fix updating account url for WasbHook
---
airflow/providers/microsoft/azure/hooks/wasb.py | 25 ++++++++++++--------
tests/providers/microsoft/azure/hooks/test_wasb.py | 27 ++++++++++++++++++++++
2 files changed, 42 insertions(+), 10 deletions(-)
diff --git a/airflow/providers/microsoft/azure/hooks/wasb.py
b/airflow/providers/microsoft/azure/hooks/wasb.py
index 5d29f6b0b6..912992afa5 100644
--- a/airflow/providers/microsoft/azure/hooks/wasb.py
+++ b/airflow/providers/microsoft/azure/hooks/wasb.py
@@ -28,6 +28,7 @@ from __future__ import annotations
import logging
import os
from typing import Any, Union
+from urllib.parse import urlparse
from asgiref.sync import sync_to_async
from azure.core.exceptions import HttpResponseError, ResourceExistsError,
ResourceNotFoundError
@@ -152,11 +153,13 @@ class WasbHook(BaseHook):
# connection_string auth takes priority
return BlobServiceClient.from_connection_string(connection_string,
**extra)
- account_url = (
- conn.host
- if conn.host and conn.host.startswith("https://")
- else f"https://{conn.login}.blob.core.windows.net/"
- )
+ account_url = conn.host if conn.host else
f"https://{conn.login}.blob.core.windows.net/"
+ parsed_url = urlparse(account_url)
+
+ if not parsed_url.netloc and "." not in parsed_url.path:
+ # if there's no netloc and no dots in the path, then user only
+ # provided the Active Directory ID, not the full URL or DNS name
+ account_url = f"https://{conn.login}.blob.core.windows.net/"
tenant = self._get_field(extra, "tenant_id")
if tenant:
@@ -555,11 +558,13 @@ class WasbAsyncHook(WasbHook):
)
return self.blob_service_client
- account_url = (
- conn.host
- if conn.host and conn.host.startswith("https://")
- else f"https://{conn.login}.blob.core.windows.net/"
- )
+ account_url = conn.host if conn.host else
f"https://{conn.login}.blob.core.windows.net/"
+ parsed_url = urlparse(account_url)
+
+ if not parsed_url.netloc and "." not in parsed_url.path:
+ # if there's no netloc and no dots in the path, then user only
+ # provided the Active Directory ID, not the full URL or DNS name
+ account_url = f"https://{conn.login}.blob.core.windows.net/"
tenant = self._get_field(extra, "tenant_id")
if tenant:
diff --git a/tests/providers/microsoft/azure/hooks/test_wasb.py
b/tests/providers/microsoft/azure/hooks/test_wasb.py
index 6e5d3ebd7b..1f48a7b011 100644
--- a/tests/providers/microsoft/azure/hooks/test_wasb.py
+++ b/tests/providers/microsoft/azure/hooks/test_wasb.py
@@ -359,6 +359,33 @@ class TestWasbHook:
conn = hook.get_conn()
assert conn.credential._authority == self.authority
+ @pytest.mark.parametrize(
+ "provided_host, expected_host",
+ [
+ (
+ "https://testaccountname.blob.core.windows.net",
+ "https://testaccountname.blob.core.windows.net",
+ ),
+ ("testhost", "https://accountlogin.blob.core.windows.net/"),
+ ("testhost.dns", "testhost.dns"),
+ ("testhost.blob.net", "testhost.blob.net"),
+ ],
+ )
+
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.BlobServiceClient")
+
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.WasbHook.get_connection")
+ def test_proper_account_url_update(
+ self, mock_get_conn, mock_blob_service_client, provided_host,
expected_host
+ ):
+ mock_get_conn.return_value = Connection(
+ conn_id="test_conn",
+ conn_type=self.connection_type,
+ password="testpass",
+ login="accountlogin",
+ host=provided_host,
+ )
+ WasbHook(wasb_conn_id=self.shared_key_conn_id)
+
mock_blob_service_client.assert_called_once_with(account_url=expected_host,
credential="testpass")
+
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.BlobServiceClient")
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.WasbHook.get_connection")
def test_check_for_blob(self, mock_get_conn, mock_service):