This is an automated email from the ASF dual-hosted git repository.
kaxil pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 007ce9d11eb Add Snowflake Workload Identity Federation (WIF) support
(#68107)
007ce9d11eb is described below
commit 007ce9d11eb2d5ff73a5a99b0eeb2bcfe6723cfb
Author: Kaxil Naik <[email protected]>
AuthorDate: Sat Jun 6 02:50:58 2026 +0100
Add Snowflake Workload Identity Federation (WIF) support (#68107)
SnowflakeHook accepted authenticator=WORKLOAD_IDENTITY but dropped the
workload_identity_provider param, so the connector rejected the connect
with "251017: workload_identity_provider must be set". Forward the param
from the connection extra so keyless Workload Identity Federation works.
One value (AWS, AZURE, GCP or OIDC) covers all clouds. Also expose it as
a connection form widget, document it, and bump snowflake-connector-python
to >=3.17.0 (the first version with WIF).
Closes: https://github.com/apache/airflow/issues/54983
* Forward OIDC token for Snowflake WORKLOAD_IDENTITY auth
AWS, AZURE and GCP fetch the workload identity token from the cloud
metadata service, but the OIDC provider requires the caller to supply
the token. Without it the connector raises "token must be provided if
workload_identity_provider=OIDC". Forward ``token`` (inline JWT) and
``token_file_path`` (a file the connector reads, suited to rotated
tokens) from the connection extra, and document both.
---
docs/spelling_wordlist.txt | 1 +
generated/provider_dependencies.json | 2 +-
generated/provider_dependencies.json.sha256sum | 2 +-
providers/snowflake/README.rst | 2 +-
providers/snowflake/docs/connections/snowflake.rst | 42 +++++++++-
providers/snowflake/docs/index.rst | 2 +-
providers/snowflake/pyproject.toml | 3 +-
.../airflow/providers/snowflake/hooks/snowflake.py | 23 +++++-
.../tests/unit/snowflake/hooks/test_snowflake.py | 95 ++++++++++++++++++++++
uv.lock | 2 +-
10 files changed, 166 insertions(+), 8 deletions(-)
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index 202d24f00a3..00c557d1eb2 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -1140,6 +1140,7 @@ observability
od
odbc
odps
+OIDC
ok
oklch
Okta
diff --git a/generated/provider_dependencies.json
b/generated/provider_dependencies.json
index ebe2ce88b40..d9c5dac2752 100644
--- a/generated/provider_dependencies.json
+++ b/generated/provider_dependencies.json
@@ -1789,7 +1789,7 @@
"pyarrow>=18.0.0; python_version >= '3.13' and python_version < '3.14'",
"pyarrow>=22.0.0; python_version >= '3.14'",
"setuptools>=80.0.0,<9999",
- "snowflake-connector-python>=3.16.0",
+ "snowflake-connector-python>=3.17.0",
"snowflake-snowpark-python>=1.17.0,<9999;python_version<'3.12'",
"snowflake-snowpark-python>=1.27.0,<9999;python_version>='3.12' and
python_version<'3.14'",
"snowflake-sqlalchemy>=1.7.0"
diff --git a/generated/provider_dependencies.json.sha256sum
b/generated/provider_dependencies.json.sha256sum
index dffaafcb16b..8a645f848dd 100644
--- a/generated/provider_dependencies.json.sha256sum
+++ b/generated/provider_dependencies.json.sha256sum
@@ -1 +1 @@
-e7408d69a7c8076d9e114aea31c0240d4dcf577977ed0de1b2f0dec8a7251a91
+86e39c620f3926c99e1c702a496d6161032e1a3ac69eba7da10214a2c4ba24f1
diff --git a/providers/snowflake/README.rst b/providers/snowflake/README.rst
index 68d72c3b2e2..50c7567a7c5 100644
--- a/providers/snowflake/README.rst
+++ b/providers/snowflake/README.rst
@@ -62,7 +62,7 @@ PIP package Version required
``pyarrow`` ``>=16.1.0; python_version <
"3.13"``
``pyarrow`` ``>=18.0.0; python_version >=
"3.13" and python_version < "3.14"``
``pyarrow`` ``>=22.0.0; python_version >=
"3.14"``
-``snowflake-connector-python`` ``>=3.16.0``
+``snowflake-connector-python`` ``>=3.17.0``
``snowflake-sqlalchemy`` ``>=1.7.0``
``snowflake-snowpark-python`` ``>=1.17.0,<9999; python_version <
"3.12"``
``snowflake-snowpark-python`` ``>=1.27.0,<9999; python_version
>= "3.12" and python_version < "3.14"``
diff --git a/providers/snowflake/docs/connections/snowflake.rst
b/providers/snowflake/docs/connections/snowflake.rst
index ba73dd4a5b9..c4e6eb2f0fc 100644
--- a/providers/snowflake/docs/connections/snowflake.rst
+++ b/providers/snowflake/docs/connections/snowflake.rst
@@ -58,9 +58,24 @@ Extra (optional)
* ``region``: Warehouse region.
* ``warehouse``: Snowflake warehouse name.
* ``role``: Snowflake role.
- * ``authenticator``: To connect using OAuth set this parameter ``oauth``.
For Programmatic Access
+ * ``authenticator``: To connect using OAuth set this parameter ``oauth``.
To connect without a stored secret using
+ `Workload Identity Federation
<https://docs.snowflake.com/en/user-guide/workload-identity-federation>`_,
+ set it to ``WORKLOAD_IDENTITY`` and also set
``workload_identity_provider`` (see below). For Programmatic Access
Token (PAT) authentication, no special authenticator is required —
simply set the PAT token as
the Password field. See `Snowflake PAT documentation
<https://docs.snowflake.com/en/user-guide/programmatic-access-tokens>`_.
+ * ``workload_identity_provider``: The cloud whose workload identity is
used as the Snowflake credential
+ when ``authenticator`` is ``WORKLOAD_IDENTITY``. One of ``AWS``,
``AZURE``, ``GCP`` or ``OIDC``. With
+ Workload Identity Federation no long-lived secret (password, key-pair or
PAT) is stored; the workload's
+ cloud identity is the credential. Requires
``snowflake-connector-python>=3.17.0`` and the workload to
+ run on the matching cloud. ``AWS``, ``AZURE`` and ``GCP`` fetch the
identity token from the cloud's
+ metadata service. ``OIDC`` instead requires the token to be supplied via
``token`` or ``token_file_path``
+ (see below); see `custom OIDC configuration
+
<https://docs.snowflake.com/en/user-guide/workload-identity-federation#label-wif-oidc-custom-configure-custom>`_.
+ * ``token``: The OIDC ID token (JWT) used when
``workload_identity_provider`` is ``OIDC``. Prefer
+ ``token_file_path`` for tokens that rotate.
+ * ``token_file_path``: Path to a file holding the OIDC ID token used when
``workload_identity_provider``
+ is ``OIDC``. The connector reads the token from this file, which suits
projected or rotated tokens
+ (for example a Kubernetes service-account token).
* ``token_endpoint``: Specify token endpoint for external OAuth provider.
* ``grant_type``: Specify grant type for OAuth authentication. Currently
supported: ``refresh_token`` (default), ``client_credentials``.
* ``scope``: Specify OAuth scope to include in the access token request
for any OAuth grant type.
@@ -138,3 +153,28 @@ set the PAT token as the password with no special
authenticator required:
"role": "role"
}
}'
+
+JSON format example with Workload Identity Federation (WIF)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To authenticate without a stored secret using
+`Workload Identity Federation
<https://docs.snowflake.com/en/user-guide/workload-identity-federation>`_,
+set ``authenticator`` to ``WORKLOAD_IDENTITY`` and
``workload_identity_provider`` to the cloud the
+workload runs on (here ``GCP``). No password, key-pair or token is stored; the
workload's cloud identity
+is the credential. The Snowflake side needs a ``TYPE = SERVICE`` user that
trusts the workload's identity
+and is granted a role with access to the target objects.
+
+.. code-block:: bash
+
+ export AIRFLOW_CONN_SNOWFLAKE_DEFAULT='{
+ "conn_type": "snowflake",
+ "login": "service-user",
+ "extra": {
+ "account": "account",
+ "database": "database",
+ "warehouse": "snow-warehouse",
+ "role": "role",
+ "authenticator": "WORKLOAD_IDENTITY",
+ "workload_identity_provider": "GCP"
+ }
+ }'
diff --git a/providers/snowflake/docs/index.rst
b/providers/snowflake/docs/index.rst
index 0bd88ded522..ecf5950f93b 100644
--- a/providers/snowflake/docs/index.rst
+++ b/providers/snowflake/docs/index.rst
@@ -111,7 +111,7 @@ PIP package Version required
``pyarrow`` ``>=16.1.0; python_version <
"3.13"``
``pyarrow`` ``>=18.0.0; python_version >=
"3.13" and python_version < "3.14"``
``pyarrow`` ``>=22.0.0; python_version >=
"3.14"``
-``snowflake-connector-python`` ``>=3.16.0``
+``snowflake-connector-python`` ``>=3.17.0``
``snowflake-sqlalchemy`` ``>=1.7.0``
``snowflake-snowpark-python`` ``>=1.17.0,<9999; python_version <
"3.12"``
``snowflake-snowpark-python`` ``>=1.27.0,<9999; python_version
>= "3.12" and python_version < "3.14"``
diff --git a/providers/snowflake/pyproject.toml
b/providers/snowflake/pyproject.toml
index 2375cdf6c6f..e9ed033f8b8 100644
--- a/providers/snowflake/pyproject.toml
+++ b/providers/snowflake/pyproject.toml
@@ -69,7 +69,8 @@ dependencies = [
"pyarrow>=18.0.0; python_version >= '3.13' and python_version < '3.14'",
"pyarrow>=22.0.0; python_version >= '3.14'",
# TODO(potiuk): We should bump the snowflake-connector-python to >=4.0.0
when sqlalchemy>=2.0 is required
- "snowflake-connector-python>=3.16.0",
+ # 3.17.0 is the first version with Workload Identity Federation
(WORKLOAD_IDENTITY authenticator).
+ "snowflake-connector-python>=3.17.0",
"snowflake-sqlalchemy>=1.7.0",
# The "<9999" is a hint to the pip resolver to resolve this requirement
early,
# can be removed when the pip resolver is improved
diff --git
a/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake.py
b/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake.py
index b4a8a91e31d..e25b8605241 100644
--- a/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake.py
+++ b/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake.py
@@ -149,6 +149,9 @@ class SnowflakeHook(DbApiHook):
"private_key_content": PasswordField(
lazy_gettext("Private key (Text)"),
widget=BS3PasswordFieldWidget()
),
+ "workload_identity_provider": StringField(
+ lazy_gettext("Workload Identity Provider"),
widget=BS3TextFieldWidget()
+ ),
"insecure_mode": BooleanField(
label=lazy_gettext("Insecure mode"), description="Turns off
OCSP certificate checks"
),
@@ -173,7 +176,7 @@ class SnowflakeHook(DbApiHook):
"placeholders": {
"extra": json.dumps(
{
- "authenticator": "snowflake oauth",
+ "authenticator": "snowflake oauth / WORKLOAD_IDENTITY",
"private_key_file": "private key",
"session_parameters": "session parameters",
"client_request_mfa_token": "client request mfa token",
@@ -199,6 +202,7 @@ class SnowflakeHook(DbApiHook):
"role": "snowflake role",
"private_key_file": "Path of snowflake private key (PEM
Format)",
"private_key_content": "Content to snowflake private key (PEM
format)",
+ "workload_identity_provider": "AWS, AZURE, GCP or OIDC",
"insecure_mode": "insecure mode",
"proxy_host": "Proxy server hostname",
"proxy_port": "Proxy server port",
@@ -380,6 +384,7 @@ class SnowflakeHook(DbApiHook):
# authenticator and session_parameters never supported long name so we
don't use _get_field
authenticator = extra_dict.get("authenticator", "snowflake")
session_parameters = extra_dict.get("session_parameters")
+ workload_identity_provider = self._get_field(extra_dict,
"workload_identity_provider")
conn_config = {
"user": conn.login,
@@ -407,6 +412,22 @@ class SnowflakeHook(DbApiHook):
if client_store_temporary_credential:
conn_config["client_store_temporary_credential"] =
client_store_temporary_credential
+ # Workload Identity Federation (keyless auth): when the connection sets
+ # ``authenticator=WORKLOAD_IDENTITY``, the connector also needs to
know which
+ # cloud the workload runs on. One value (AWS, AZURE, GCP or OIDC)
covers all
+ # providers. See
https://docs.snowflake.com/en/user-guide/workload-identity-federation.
+ if workload_identity_provider:
+ conn_config["workload_identity_provider"] =
workload_identity_provider
+ # AWS, AZURE and GCP fetch the identity token from the cloud's
metadata
+ # service. OIDC instead requires the caller to supply the token,
either
+ # inline (``token``) or from a file (``token_file_path``).
+ token = self._get_field(extra_dict, "token")
+ token_file_path = self._get_field(extra_dict, "token_file_path")
+ if token:
+ conn_config["token"] = token
+ if token_file_path:
+ conn_config["token_file_path"] = token_file_path
+
p_key = self.get_private_key()
if p_key:
diff --git a/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake.py
b/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake.py
index 111a79d7021..5b96484933e 100644
--- a/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake.py
+++ b/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake.py
@@ -1821,3 +1821,98 @@ class TestPytestSnowflakeHook:
invalid_form = form_cls(MultiDict([("proxy_port", "not-an-int")]))
assert invalid_form.validate() is False
assert "proxy_port" in invalid_form.errors
+
+ @pytest.mark.parametrize("provider", ["AWS", "AZURE", "GCP", "OIDC"])
+ def test_get_conn_params_forwards_workload_identity_provider(self,
provider):
+ """When authenticator is WORKLOAD_IDENTITY, workload_identity_provider
must reach the connector.
+
+ The connector raises ``251017: workload_identity_provider must be set
...`` if the param is
+ dropped, so the hook has to forward it for keyless Workload Identity
Federation to work.
+ """
+ connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+ connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+ connection_kwargs["extra"]["workload_identity_provider"] = provider
+
+ with mock.patch.dict("os.environ",
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()):
+ conn_params =
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+ assert conn_params["authenticator"] == "WORKLOAD_IDENTITY"
+ assert conn_params["workload_identity_provider"] == provider
+
+ def test_get_conn_params_omits_workload_identity_provider_when_unset(self):
+ """workload_identity_provider must not appear in conn params unless
configured."""
+ with mock.patch.dict(
+ "os.environ",
AIRFLOW_CONN_TEST_CONN=Connection(**BASE_CONNECTION_KWARGS).get_uri()
+ ):
+ conn_params =
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+ assert "workload_identity_provider" not in conn_params
+
+ def
test_get_conn_params_workload_identity_provider_backcompat_prefix(self):
+ """The backcompat ``extra__snowflake__`` prefix is honored for
workload_identity_provider."""
+ connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+ connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+
connection_kwargs["extra"]["extra__snowflake__workload_identity_provider"] =
"GCP"
+
+ with mock.patch.dict("os.environ",
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()):
+ conn_params =
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+ assert conn_params["workload_identity_provider"] == "GCP"
+
+ def test_get_conn_passes_workload_identity_provider_to_connect(self):
+ """The forwarded param has to land in the actual
``snowflake.connector.connect()`` call."""
+ connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+ connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+ connection_kwargs["extra"]["workload_identity_provider"] = "GCP"
+
+ with (
+ mock.patch.dict("os.environ",
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()),
+ mock.patch("snowflake.connector.connect") as mock_connect,
+ ):
+ SnowflakeHook(snowflake_conn_id="test_conn").get_conn()
+
+ call_kwargs = mock_connect.call_args[1]
+ assert call_kwargs["authenticator"] == "WORKLOAD_IDENTITY"
+ assert call_kwargs["workload_identity_provider"] == "GCP"
+
+ def
test_get_connection_form_widgets_exposes_workload_identity_provider(self):
+ """The connection form must expose a workload_identity_provider field
so users can set it in the UI."""
+ pytest.importorskip("flask_appbuilder")
+ pytest.importorskip("flask_babel")
+
+ widgets = SnowflakeHook.get_connection_form_widgets()
+
+ assert "workload_identity_provider" in widgets
+
+ @pytest.mark.parametrize(
+ ("field", "value"),
+ [("token", "an-oidc-jwt"), ("token_file_path",
"/var/run/secrets/oidc/token")],
+ )
+ def test_get_conn_params_forwards_oidc_token(self, field, value):
+ """OIDC WIF needs a caller-supplied token; the connector raises if it
is missing.
+
+ Unlike AWS/AZURE/GCP (which fetch the token from cloud metadata), OIDC
requires
+ ``token`` or ``token_file_path`` to be forwarded.
+ """
+ connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+ connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+ connection_kwargs["extra"]["workload_identity_provider"] = "OIDC"
+ connection_kwargs["extra"][field] = value
+
+ with mock.patch.dict("os.environ",
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()):
+ conn_params =
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+ assert conn_params["workload_identity_provider"] == "OIDC"
+ assert conn_params[field] == value
+
+ def test_get_conn_params_omits_oidc_token_when_unset(self):
+ """token/token_file_path must not appear unless explicitly
configured."""
+ connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+ connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+ connection_kwargs["extra"]["workload_identity_provider"] = "GCP"
+
+ with mock.patch.dict("os.environ",
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()):
+ conn_params =
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+ assert "token" not in conn_params
+ assert "token_file_path" not in conn_params
diff --git a/uv.lock b/uv.lock
index c3402fbff19..79698138589 100644
--- a/uv.lock
+++ b/uv.lock
@@ -7621,7 +7621,7 @@ requires-dist = [
{ name = "pyarrow", marker = "python_full_version == '3.13.*'", specifier
= ">=18.0.0" },
{ name = "pyarrow", marker = "python_full_version >= '3.14'", specifier =
">=22.0.0" },
{ name = "setuptools", specifier = ">=80.0.0,<9999" },
- { name = "snowflake-connector-python", specifier = ">=3.16.0" },
+ { name = "snowflake-connector-python", specifier = ">=3.17.0" },
{ name = "snowflake-snowpark-python", marker = "python_full_version <
'3.12'", specifier = ">=1.17.0,<9999" },
{ name = "snowflake-snowpark-python", marker = "python_full_version >=
'3.12' and python_full_version < '3.14'", specifier = ">=1.27.0,<9999" },
{ name = "snowflake-sqlalchemy", specifier = ">=1.7.0" },