This is an automated email from the ASF dual-hosted git repository.

eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 9e2d607f43 Truncate Wasb storage account name if it's more than 24 
characters (#33851)
9e2d607f43 is described below

commit 9e2d607f4305f34095cf80b106714802cff5aaf3
Author: Ephraim Anierobi <[email protected]>
AuthorDate: Tue Aug 29 07:59:12 2023 +0100

    Truncate Wasb storage account name if it's more than 24 characters (#33851)
    
    * Truncate Wasb storage account name if it's more than 24 characters
    
    Storage account names must be between 3 and 24 characters in length but for 
some
    reasons that I can't explain, we saw a situation where the storage name is 
more than 24 characters
    and had to be truncated before it could work. Maybe it was possible in the 
past to have more than 24 characters or
    it could come from cluster but whichever way, the solution that worked was 
truncating the account name to
    24 characters.
    
    * Apply suggestions from code review
    
    Co-authored-by: Jed Cunningham 
<[email protected]>
    
    * Also add the change to the async part
    
    ---------
    
    Co-authored-by: Jed Cunningham 
<[email protected]>
---
 airflow/providers/microsoft/azure/hooks/wasb.py    | 32 ++++++++++++++++------
 tests/providers/microsoft/azure/hooks/test_wasb.py |  8 ++++--
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/airflow/providers/microsoft/azure/hooks/wasb.py 
b/airflow/providers/microsoft/azure/hooks/wasb.py
index 59f4b26de5..72579b8bf9 100644
--- a/airflow/providers/microsoft/azure/hooks/wasb.py
+++ b/airflow/providers/microsoft/azure/hooks/wasb.py
@@ -163,10 +163,18 @@ class WasbHook(BaseHook):
         account_url = conn.host if conn.host else 
f"https://{conn.login}.blob.core.windows.net/";
         parsed_url = urlparse(account_url)
 
-        if not parsed_url.netloc and "." not in parsed_url.path:
-            # if there's no netloc and no dots in the path, then user only
-            # provided the Active Directory ID, not the full URL or DNS name
-            account_url = f"https://{conn.login}.blob.core.windows.net/";
+        if not parsed_url.netloc:
+            if "." not in parsed_url.path:
+                # if there's no netloc and no dots in the path, then user only
+                # provided the Active Directory ID, not the full URL or DNS 
name
+                account_url = f"https://{conn.login}.blob.core.windows.net/";
+            else:
+                # if there's no netloc but there are dots in the path, then 
user
+                # provided the DNS name without the https:// prefix.
+                # Azure storage account name can only be 3 to 24 characters in 
length
+                # 
https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#storage-account-name
+                acc_name = account_url.split(".")[0][:24]
+                account_url = f"https://{acc_name}."; + 
".".join(account_url.split(".")[1:])
 
         tenant = self._get_field(extra, "tenant_id")
         if tenant:
@@ -568,10 +576,18 @@ class WasbAsyncHook(WasbHook):
         account_url = conn.host if conn.host else 
f"https://{conn.login}.blob.core.windows.net/";
         parsed_url = urlparse(account_url)
 
-        if not parsed_url.netloc and "." not in parsed_url.path:
-            # if there's no netloc and no dots in the path, then user only
-            # provided the Active Directory ID, not the full URL or DNS name
-            account_url = f"https://{conn.login}.blob.core.windows.net/";
+        if not parsed_url.netloc:
+            if "." not in parsed_url.path:
+                # if there's no netloc and no dots in the path, then user only
+                # provided the Active Directory ID, not the full URL or DNS 
name
+                account_url = f"https://{conn.login}.blob.core.windows.net/";
+            else:
+                # if there's no netloc but there are dots in the path, then 
user
+                # provided the DNS name without the https:// prefix.
+                # Azure storage account name can only be 3 to 24 characters in 
length
+                # 
https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#storage-account-name
+                acc_name = account_url.split(".")[0][:24]
+                account_url = f"https://{acc_name}."; + 
".".join(account_url.split(".")[1:])
 
         tenant = self._get_field(extra, "tenant_id")
         if tenant:
diff --git a/tests/providers/microsoft/azure/hooks/test_wasb.py 
b/tests/providers/microsoft/azure/hooks/test_wasb.py
index 06ef4eedfb..80cd627fa0 100644
--- a/tests/providers/microsoft/azure/hooks/test_wasb.py
+++ b/tests/providers/microsoft/azure/hooks/test_wasb.py
@@ -346,8 +346,12 @@ class TestWasbHook:
                 "https://testaccountname.blob.core.windows.net";,
             ),
             ("testhost", "https://accountlogin.blob.core.windows.net/";),
-            ("testhost.dns", "testhost.dns"),
-            ("testhost.blob.net", "testhost.blob.net"),
+            ("testhost.dns", "https://testhost.dns";),
+            ("testhost.blob.net", "https://testhost.blob.net";),
+            (
+                "testhostakjhdisdfbearioyo.blob.core.windows.net",
+                "https://testhostakjhdisdfbearioy.blob.core.windows.net";,
+            ),  # more than 24 characters
         ],
     )
     def test_proper_account_url_update(

Reply via email to