kacpermuda commented on code in PR #66342: URL: https://github.com/apache/airflow/pull/66342#discussion_r3257769716
########## providers/openlineage/tests/unit/openlineage/test_token_provider.py: ########## @@ -0,0 +1,141 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from airflow.providers.common.compat.sdk import BaseHook, Connection +from airflow.providers.openlineage.token_provider import ( + AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE, + AirflowConnectionConfigProvider, + AirflowConnectionTokenProvider, + OpenLineageAirflowConnectionAuthError, + OpenLineageAirflowConnectionConfigError, + resolve_airflow_connection_auth, +) + + [email protected](BaseHook, "get_connection") +def test_get_api_key_from_connection_password(mock_get_connection): + mock_get_connection.return_value = Connection( + conn_id="openlineage_default", conn_type="http", password="api-key" + ) + + provider = AirflowConnectionTokenProvider({"conn_id": "openlineage_default"}) + + assert provider.get_api_key() == "api-key" + + [email protected](BaseHook, "get_connection") +def test_get_api_key_from_default_connection_id(mock_get_connection): + mock_get_connection.return_value = Connection( + conn_id="openlineage_default", conn_type="http", password="api-key" + ) + + provider = AirflowConnectionTokenProvider({}, default_conn_id="openlineage_default") + + assert provider.get_api_key() == "api-key" + + [email protected](BaseHook, "get_connection") +def test_get_api_key_from_connection_extra(mock_get_connection): + mock_get_connection.return_value = Connection( + conn_id="openlineage_default", conn_type="http", extra='{"api_key": "api-key-from-extra"}' + ) + + provider = AirflowConnectionTokenProvider({"conn_id": "openlineage_default"}) + + assert provider.get_api_key() == "api-key-from-extra" + + +def test_missing_conn_id_raises_custom_exception(): + with pytest.raises(OpenLineageAirflowConnectionAuthError, match="requires a non-empty `conn_id`"): + AirflowConnectionTokenProvider({}) + + [email protected](BaseHook, "get_connection") +def test_missing_token_raises_custom_exception(mock_get_connection): + mock_get_connection.return_value = Connection(conn_id="openlineage_default", conn_type="http") + + provider = AirflowConnectionTokenProvider({"conn_id": "openlineage_default"}) + + with pytest.raises(OpenLineageAirflowConnectionAuthError, match="could not find a token"): + provider.get_api_key() + + [email protected](BaseHook, "get_connection") +def test_resolve_connection_auth_in_composite_transport(mock_get_connection): Review Comment: Can we add one more tests here for nested composite transport? So composite(transports=[http, composite(transports=[http, console])]), and make sure that it also works as expected? ########## providers/openlineage/src/airflow/providers/openlineage/token_provider.py: ########## @@ -0,0 +1,137 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +from airflow.providers.common.compat.sdk import AirflowException, BaseHook + +AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE = "airflow_connection_api_key" +_DEFAULT_EXTRA_KEYS = ("apiKey", "api_key", "apikey", "token", "access_token") + + +class OpenLineageAirflowConnectionAuthError(AirflowException): + """Raised when OpenLineage API key auth cannot be resolved from an Airflow connection.""" + + +class OpenLineageAirflowConnectionConfigError(AirflowException): + """Raised when OpenLineage config cannot be resolved from an Airflow connection.""" + + +class AirflowConnectionConfigProvider: + """ + Resolve OpenLineage client configuration from an Airflow connection. + + The connection extra contains the full OpenLineage client config, for example + ``{"transport": {"type": "console"}}``. + """ + + def __init__(self, conn_id: str) -> None: + if not conn_id: + raise OpenLineageAirflowConnectionConfigError( + "OpenLineage connection config requires a non-empty connection ID." + ) + self.conn_id = conn_id + + def get_config(self) -> dict[str, Any]: + connection = BaseHook.get_connection(self.conn_id) + extra = connection.extra_dejson + config = self._get_config_from_extra(extra) + if config is not None: + return config + + raise OpenLineageAirflowConnectionConfigError( + "OpenLineage connection config could not find configuration in connection " + f"`{self.conn_id}`. Expected OpenLineage config with `transport` in connection extra." + ) + + def _get_config_from_extra(self, extra: dict[str, Any]) -> dict[str, Any] | None: + if "transport" in extra: + return self._validate_config(extra) + + return None Review Comment: Looks like this method is not needed anymore? It only points to `self._validate_config` that will already throw error when transport is not in extra, so we can probably just use `self._validate_config` in `get_config` directly? ########## providers/openlineage/src/airflow/providers/openlineage/token_provider.py: ########## @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +from airflow.providers.common.compat.sdk import AirflowException, BaseHook + +AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE = "airflow_connection_api_key" +OPENLINEAGE_CONFIG_EXTRA_KEY = "openlineage_config" +_DEFAULT_EXTRA_KEYS = ("apiKey", "api_key", "apikey", "token", "access_token") + + +class OpenLineageAirflowConnectionAuthError(AirflowException): + """Raised when OpenLineage API key auth cannot be resolved from an Airflow connection.""" + + +class OpenLineageAirflowConnectionConfigError(AirflowException): + """Raised when OpenLineage config cannot be resolved from an Airflow connection.""" Review Comment: Ok ! ########## providers/openlineage/src/airflow/providers/openlineage/token_provider.py: ########## @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +from airflow.providers.common.compat.sdk import AirflowException, BaseHook + +AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE = "airflow_connection_api_key" +OPENLINEAGE_CONFIG_EXTRA_KEY = "openlineage_config" +_DEFAULT_EXTRA_KEYS = ("apiKey", "api_key", "apikey", "token", "access_token") + + +class OpenLineageAirflowConnectionAuthError(AirflowException): + """Raised when OpenLineage API key auth cannot be resolved from an Airflow connection.""" + + +class OpenLineageAirflowConnectionConfigError(AirflowException): + """Raised when OpenLineage config cannot be resolved from an Airflow connection.""" + + +class AirflowConnectionConfigProvider: + """ + Resolve OpenLineage client configuration from an Airflow connection. + + The connection extra can contain the full OpenLineage client config, for example + ``{"transport": {"type": "console"}}``. For convenience, it can also contain only the transport + config, for example ``{"type": "console"}``. + """ + + def __init__(self, conn_id: str) -> None: + if not conn_id: + raise OpenLineageAirflowConnectionConfigError( + "OpenLineage connection config requires a non-empty connection ID." + ) + self.conn_id = conn_id + + def get_config(self) -> dict[str, Any]: + connection = BaseHook.get_connection(self.conn_id) + extra = connection.extra_dejson + config = self._get_config_from_extra(extra) + if config is not None: + return config + + raise OpenLineageAirflowConnectionConfigError( + "OpenLineage connection config could not find configuration in connection " + f"`{self.conn_id}`. Expected full OpenLineage config or transport config in connection extra." + ) + + def _get_config_from_extra(self, extra: dict[str, Any]) -> dict[str, Any] | None: + if OPENLINEAGE_CONFIG_EXTRA_KEY in extra: + return self._validate_config(extra[OPENLINEAGE_CONFIG_EXTRA_KEY]) + + if "transport" in extra: + return self._validate_config(extra) + + if "type" in extra: + return {"transport": extra} + + return None + + def _validate_config(self, config: Any) -> dict[str, Any]: Review Comment: Makes sense ! ########## providers/openlineage/src/airflow/providers/openlineage/token_provider.py: ########## @@ -0,0 +1,137 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +from airflow.providers.common.compat.sdk import AirflowException, BaseHook + +AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE = "airflow_connection_api_key" +_DEFAULT_EXTRA_KEYS = ("apiKey", "api_key", "apikey", "token", "access_token") + + +class OpenLineageAirflowConnectionAuthError(AirflowException): + """Raised when OpenLineage API key auth cannot be resolved from an Airflow connection.""" + + +class OpenLineageAirflowConnectionConfigError(AirflowException): + """Raised when OpenLineage config cannot be resolved from an Airflow connection.""" + + +class AirflowConnectionConfigProvider: + """ + Resolve OpenLineage client configuration from an Airflow connection. + + The connection extra contains the full OpenLineage client config, for example + ``{"transport": {"type": "console"}}``. + """ + + def __init__(self, conn_id: str) -> None: + if not conn_id: + raise OpenLineageAirflowConnectionConfigError( + "OpenLineage connection config requires a non-empty connection ID." + ) + self.conn_id = conn_id + + def get_config(self) -> dict[str, Any]: + connection = BaseHook.get_connection(self.conn_id) + extra = connection.extra_dejson + config = self._get_config_from_extra(extra) + if config is not None: + return config + + raise OpenLineageAirflowConnectionConfigError( + "OpenLineage connection config could not find configuration in connection " + f"`{self.conn_id}`. Expected OpenLineage config with `transport` in connection extra." + ) + + def _get_config_from_extra(self, extra: dict[str, Any]) -> dict[str, Any] | None: + if "transport" in extra: + return self._validate_config(extra) + + return None + + def _validate_config(self, config: Any) -> dict[str, Any]: + if not isinstance(config, dict): + raise OpenLineageAirflowConnectionConfigError( + f"OpenLineage connection config `{config}` is not a dict." + ) + if not isinstance(config.get("transport"), dict): + raise OpenLineageAirflowConnectionConfigError( + "OpenLineage connection config must contain a `transport` JSON object." + ) + return config + + +class AirflowConnectionTokenProvider: + """ + Resolve an OpenLineage API key from an Airflow connection. + + The connection password is preferred. If it is empty and ``extra_key`` is configured, that key + is read from connection ``extra``. Otherwise, common extra keys are checked. + """ + + def __init__(self, config: dict[str, Any], default_conn_id: str | None = None) -> None: + self.conn_id = config.get("conn_id") or default_conn_id or "" + self.extra_key = config.get("extra_key") + if not self.conn_id: + raise OpenLineageAirflowConnectionAuthError( + "OpenLineage `airflow_connection_api_key` auth requires a non-empty `conn_id`." + ) + + def get_api_key(self) -> str: + connection = BaseHook.get_connection(self.conn_id) + if connection.password: + return connection.password.strip() + api_key = self._get_api_key_from_extra(connection.extra_dejson) + if api_key: + return api_key + + raise OpenLineageAirflowConnectionAuthError( + "OpenLineage `airflow_connection_api_key` auth could not find a token in connection " + f"`{self.conn_id}`. Expected connection password or token in connection extra." + ) + + def _get_api_key_from_extra(self, extra: dict[str, Any]) -> str | None: + if self.extra_key: + value = extra.get(self.extra_key) + return str(value).strip() if value else None + + for key in _DEFAULT_EXTRA_KEYS: + value = extra.get(key) + if value: + return str(value).strip() + return None + + +def resolve_airflow_connection_auth(config: dict[str, Any] | None, config_conn_id: str | None = None) -> None: Review Comment: Could we add a small docstring here explaining what is hapenning and why this is so complex (that we can have simple transports, composite transports or nested composite transports and that's why we need recursive logic here). + note that the auth replacement only works for http transport, as it's replacing the airflow_conn with {"type": "api_key", "apiKey": provider.get_api_key()} (it's already in the docs, just a small reminder in the code here would be nice) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
