This is an automated email from the ASF dual-hosted git repository.
eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 9e2d607f43 Truncate Wasb storage account name if it's more than 24
characters (#33851)
9e2d607f43 is described below
commit 9e2d607f4305f34095cf80b106714802cff5aaf3
Author: Ephraim Anierobi <[email protected]>
AuthorDate: Tue Aug 29 07:59:12 2023 +0100
Truncate Wasb storage account name if it's more than 24 characters (#33851)
* Truncate Wasb storage account name if it's more than 24 characters
Storage account names must be between 3 and 24 characters in length but for
some
reasons that I can't explain, we saw a situation where the storage name is
more than 24 characters
and had to be truncated before it could work. Maybe it was possible in the
past to have more than 24 characters or
it could come from cluster but whichever way, the solution that worked was
truncating the account name to
24 characters.
* Apply suggestions from code review
Co-authored-by: Jed Cunningham
<[email protected]>
* Also add the change to the async part
---------
Co-authored-by: Jed Cunningham
<[email protected]>
---
airflow/providers/microsoft/azure/hooks/wasb.py | 32 ++++++++++++++++------
tests/providers/microsoft/azure/hooks/test_wasb.py | 8 ++++--
2 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/airflow/providers/microsoft/azure/hooks/wasb.py
b/airflow/providers/microsoft/azure/hooks/wasb.py
index 59f4b26de5..72579b8bf9 100644
--- a/airflow/providers/microsoft/azure/hooks/wasb.py
+++ b/airflow/providers/microsoft/azure/hooks/wasb.py
@@ -163,10 +163,18 @@ class WasbHook(BaseHook):
account_url = conn.host if conn.host else
f"https://{conn.login}.blob.core.windows.net/"
parsed_url = urlparse(account_url)
- if not parsed_url.netloc and "." not in parsed_url.path:
- # if there's no netloc and no dots in the path, then user only
- # provided the Active Directory ID, not the full URL or DNS name
- account_url = f"https://{conn.login}.blob.core.windows.net/"
+ if not parsed_url.netloc:
+ if "." not in parsed_url.path:
+ # if there's no netloc and no dots in the path, then user only
+ # provided the Active Directory ID, not the full URL or DNS
name
+ account_url = f"https://{conn.login}.blob.core.windows.net/"
+ else:
+ # if there's no netloc but there are dots in the path, then
user
+ # provided the DNS name without the https:// prefix.
+ # Azure storage account name can only be 3 to 24 characters in
length
+ #
https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#storage-account-name
+ acc_name = account_url.split(".")[0][:24]
+ account_url = f"https://{acc_name}." +
".".join(account_url.split(".")[1:])
tenant = self._get_field(extra, "tenant_id")
if tenant:
@@ -568,10 +576,18 @@ class WasbAsyncHook(WasbHook):
account_url = conn.host if conn.host else
f"https://{conn.login}.blob.core.windows.net/"
parsed_url = urlparse(account_url)
- if not parsed_url.netloc and "." not in parsed_url.path:
- # if there's no netloc and no dots in the path, then user only
- # provided the Active Directory ID, not the full URL or DNS name
- account_url = f"https://{conn.login}.blob.core.windows.net/"
+ if not parsed_url.netloc:
+ if "." not in parsed_url.path:
+ # if there's no netloc and no dots in the path, then user only
+ # provided the Active Directory ID, not the full URL or DNS
name
+ account_url = f"https://{conn.login}.blob.core.windows.net/"
+ else:
+ # if there's no netloc but there are dots in the path, then
user
+ # provided the DNS name without the https:// prefix.
+ # Azure storage account name can only be 3 to 24 characters in
length
+ #
https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#storage-account-name
+ acc_name = account_url.split(".")[0][:24]
+ account_url = f"https://{acc_name}." +
".".join(account_url.split(".")[1:])
tenant = self._get_field(extra, "tenant_id")
if tenant:
diff --git a/tests/providers/microsoft/azure/hooks/test_wasb.py
b/tests/providers/microsoft/azure/hooks/test_wasb.py
index 06ef4eedfb..80cd627fa0 100644
--- a/tests/providers/microsoft/azure/hooks/test_wasb.py
+++ b/tests/providers/microsoft/azure/hooks/test_wasb.py
@@ -346,8 +346,12 @@ class TestWasbHook:
"https://testaccountname.blob.core.windows.net",
),
("testhost", "https://accountlogin.blob.core.windows.net/"),
- ("testhost.dns", "testhost.dns"),
- ("testhost.blob.net", "testhost.blob.net"),
+ ("testhost.dns", "https://testhost.dns"),
+ ("testhost.blob.net", "https://testhost.blob.net"),
+ (
+ "testhostakjhdisdfbearioyo.blob.core.windows.net",
+ "https://testhostakjhdisdfbearioy.blob.core.windows.net",
+ ), # more than 24 characters
],
)
def test_proper_account_url_update(