This is an automated email from the ASF dual-hosted git repository.
dabla pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 15a4048db81 refactor: Added Azure, Fabric and instance MSSQL Asset URI
validation (#67999)
15a4048db81 is described below
commit 15a4048db818604098708afd8dce5edf085652db
Author: David Blain <[email protected]>
AuthorDate: Thu Jun 4 15:56:02 2026 +0200
refactor: Added Azure, Fabric and instance MSSQL Asset URI validation
(#67999)
---
.../providers/microsoft/mssql/assets/mssql.py | 14 ++++--
.../unit/microsoft/mssql/assets/test_mssql.py | 58 ++++++++++++++++++++--
2 files changed, 64 insertions(+), 8 deletions(-)
diff --git
a/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/mssql.py
b/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/mssql.py
index b7251048a48..ff8523b372f 100644
---
a/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/mssql.py
+++
b/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/mssql.py
@@ -33,8 +33,10 @@ def sanitize_uri(uri: SplitResult) -> SplitResult:
if uri.port is None:
host = uri.netloc.rstrip(":")
uri = uri._replace(netloc=f"{host}:1433")
- if len(uri.path.split("/")) != 4: # Leading slash, database, schema, and
table names.
- raise ValueError("URI format mssql:// must contain database, schema,
and table names")
+ if len(uri.path.split("/")) not in {4, 5}:
+ raise ValueError(
+ "URI format mssql:// must contain database, schema, and table/view
names with optional instance name"
+ )
return uri
@@ -50,6 +52,10 @@ def convert_asset_to_openlineage(asset: Asset,
lineage_context) -> OpenLineageDa
from airflow.providers.common.compat.openlineage.facet import Dataset as
OpenLineageDataset
- parsed = urlsplit(asset.uri)
- _, database, schema, table = parsed.path.split("/") # Leading slash,
database, schema, and table names.
+ parsed = sanitize_uri(urlsplit(asset.uri))
+ path_parts = parsed.path.split("/")
+ if len(path_parts) == 4:
+ _, database, schema, table = path_parts # Leading slash, database,
schema, and table names.
+ else:
+ _, _instance, database, schema, table = path_parts
return OpenLineageDataset(namespace=f"mssql://{parsed.netloc}",
name=f"{database}.{schema}.{table}")
diff --git
a/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/test_mssql.py
b/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/test_mssql.py
index d44b2205b43..586d151dbe9 100644
--- a/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/test_mssql.py
+++ b/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/test_mssql.py
@@ -42,6 +42,31 @@ from airflow.providers.microsoft.mssql.assets.mssql import (
"mssql://example.com:1433/database/schema/table",
id="default-port",
),
+ pytest.param(
+ "mssql://example.com/instance/database/schema/table",
+ "mssql://example.com:1433/instance/database/schema/table",
+ id="with-instance-default-port",
+ ),
+ pytest.param(
+
"mssql://my-azure-server.database.windows.net/database/schema/table",
+
"mssql://my-azure-server.database.windows.net:1433/database/schema/table",
+ id="azure-sql-default-port",
+ ),
+ pytest.param(
+
"mssql://my-azure-server.database.windows.net/instance/database/schema/table",
+
"mssql://my-azure-server.database.windows.net:1433/instance/database/schema/table",
+ id="azure-sql-with-instance-default-port",
+ ),
+ pytest.param(
+
"mssql://my-fabric-server.my-tenant.fabric.microsoft.com/database/schema/table",
+
"mssql://my-fabric-server.my-tenant.fabric.microsoft.com:1433/database/schema/table",
+ id="fabric-default-port",
+ ),
+ pytest.param(
+
"mssql://my-fabric-server.my-tenant.fabric.microsoft.com/instance/database/schema/table",
+
"mssql://my-fabric-server.my-tenant.fabric.microsoft.com:1433/instance/database/schema/table",
+ id="fabric-with-instance-default-port",
+ ),
],
)
def test_sanitize_uri_pass(original: str, normalized: str) -> None:
@@ -50,18 +75,37 @@ def test_sanitize_uri_pass(original: str, normalized: str)
-> None:
assert urllib.parse.urlunsplit(uri_o) == normalized
[email protected](
+ "value",
+ [
+ pytest.param("mssql://example.com/database", id="missing-component"),
+ pytest.param("mssql://example.com/database/schema/table/column/extra",
id="extra-component"),
+ pytest.param("mssql://my-azure-server.database.windows.net/database",
id="azure-missing-component"),
+ pytest.param(
+
"mssql://my-fabric-server.my-tenant.fabric.microsoft.com/database/schema/table/column/extra",
+ id="fabric-extra-component",
+ ),
+ ],
+)
+def test_sanitize_uri_fail_invalid_path(value: str) -> None:
+ uri_i = urllib.parse.urlsplit(value)
+ with pytest.raises(
+ ValueError,
+ match="URI format mssql:// must contain database, schema, and
table/view names with optional instance name",
+ ):
+ sanitize_uri(uri_i)
+
+
@pytest.mark.parametrize(
"value",
[
pytest.param("mssql://", id="blank"),
pytest.param("mssql:///database/schema/table", id="no-host"),
- pytest.param("mssql://example.com/database/table",
id="missing-component"),
- pytest.param("mssql://example.com/database/schema/table/column",
id="extra-component"),
],
)
-def test_sanitize_uri_fail(value: str) -> None:
+def test_sanitize_uri_fail_missing_host(value: str) -> None:
uri_i = urllib.parse.urlsplit(value)
- with pytest.raises(ValueError, match="URI format mssql:// must contain"):
+ with pytest.raises(ValueError, match="URI format mssql:// must contain a
host"):
sanitize_uri(uri_i)
@@ -116,6 +160,12 @@ def test_create_asset(
"testdb.schema1.events",
id="custom-port",
),
+ pytest.param(
+ "mssql://db-host:1434/sql2019/testdb/schema1/events",
+ "mssql://db-host:1434",
+ "testdb.schema1.events",
+ id="with-instance",
+ ),
],
)
def test_convert_asset_to_openlineage(uri: str, expected_namespace: str,
expected_name: str) -> None: