This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new bd608a56ab Fix updating account url for WasbHook (#33457)
bd608a56ab is described below

commit bd608a56abd1a6c2a98987daf7f092d2dabea555
Author: Ephraim Anierobi <[email protected]>
AuthorDate: Thu Aug 17 08:55:58 2023 +0100

    Fix updating account url for WasbHook (#33457)
    
    * Fix updating account url for WasbHook
    
    There are different ways users supply the hostname(account url) in azure,
    sometimes the host doesn't have a urlparse.scheme but has urlparse.path e.g 
name.blob.windows.net
    and other times, it will just be Azure ID e.g aldhjf9dads.
    While working on #32980, I assumed that if there's no scheme, then the 
hostname is not valid, that's
    incorrect since DNS can serve as the host.
    The fix was to check if we don't have netloc and that urlparse.path does 
not include a dot and if it does not, use the login/account_name to construct
    the account_url
    
    * fixup! Fix updating account url for WasbHook
---
 airflow/providers/microsoft/azure/hooks/wasb.py    | 25 ++++++++++++--------
 tests/providers/microsoft/azure/hooks/test_wasb.py | 27 ++++++++++++++++++++++
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/airflow/providers/microsoft/azure/hooks/wasb.py 
b/airflow/providers/microsoft/azure/hooks/wasb.py
index 5d29f6b0b6..912992afa5 100644
--- a/airflow/providers/microsoft/azure/hooks/wasb.py
+++ b/airflow/providers/microsoft/azure/hooks/wasb.py
@@ -28,6 +28,7 @@ from __future__ import annotations
 import logging
 import os
 from typing import Any, Union
+from urllib.parse import urlparse
 
 from asgiref.sync import sync_to_async
 from azure.core.exceptions import HttpResponseError, ResourceExistsError, 
ResourceNotFoundError
@@ -152,11 +153,13 @@ class WasbHook(BaseHook):
             # connection_string auth takes priority
             return BlobServiceClient.from_connection_string(connection_string, 
**extra)
 
-        account_url = (
-            conn.host
-            if conn.host and conn.host.startswith("https://";)
-            else f"https://{conn.login}.blob.core.windows.net/";
-        )
+        account_url = conn.host if conn.host else 
f"https://{conn.login}.blob.core.windows.net/";
+        parsed_url = urlparse(account_url)
+
+        if not parsed_url.netloc and "." not in parsed_url.path:
+            # if there's no netloc and no dots in the path, then user only
+            # provided the Active Directory ID, not the full URL or DNS name
+            account_url = f"https://{conn.login}.blob.core.windows.net/";
 
         tenant = self._get_field(extra, "tenant_id")
         if tenant:
@@ -555,11 +558,13 @@ class WasbAsyncHook(WasbHook):
             )
             return self.blob_service_client
 
-        account_url = (
-            conn.host
-            if conn.host and conn.host.startswith("https://";)
-            else f"https://{conn.login}.blob.core.windows.net/";
-        )
+        account_url = conn.host if conn.host else 
f"https://{conn.login}.blob.core.windows.net/";
+        parsed_url = urlparse(account_url)
+
+        if not parsed_url.netloc and "." not in parsed_url.path:
+            # if there's no netloc and no dots in the path, then user only
+            # provided the Active Directory ID, not the full URL or DNS name
+            account_url = f"https://{conn.login}.blob.core.windows.net/";
 
         tenant = self._get_field(extra, "tenant_id")
         if tenant:
diff --git a/tests/providers/microsoft/azure/hooks/test_wasb.py 
b/tests/providers/microsoft/azure/hooks/test_wasb.py
index 6e5d3ebd7b..1f48a7b011 100644
--- a/tests/providers/microsoft/azure/hooks/test_wasb.py
+++ b/tests/providers/microsoft/azure/hooks/test_wasb.py
@@ -359,6 +359,33 @@ class TestWasbHook:
         conn = hook.get_conn()
         assert conn.credential._authority == self.authority
 
+    @pytest.mark.parametrize(
+        "provided_host, expected_host",
+        [
+            (
+                "https://testaccountname.blob.core.windows.net";,
+                "https://testaccountname.blob.core.windows.net";,
+            ),
+            ("testhost", "https://accountlogin.blob.core.windows.net/";),
+            ("testhost.dns", "testhost.dns"),
+            ("testhost.blob.net", "testhost.blob.net"),
+        ],
+    )
+    
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.BlobServiceClient")
+    
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.WasbHook.get_connection")
+    def test_proper_account_url_update(
+        self, mock_get_conn, mock_blob_service_client, provided_host, 
expected_host
+    ):
+        mock_get_conn.return_value = Connection(
+            conn_id="test_conn",
+            conn_type=self.connection_type,
+            password="testpass",
+            login="accountlogin",
+            host=provided_host,
+        )
+        WasbHook(wasb_conn_id=self.shared_key_conn_id)
+        
mock_blob_service_client.assert_called_once_with(account_url=expected_host, 
credential="testpass")
+
     
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.BlobServiceClient")
     
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.WasbHook.get_connection")
     def test_check_for_blob(self, mock_get_conn, mock_service):

Reply via email to