kacpermuda commented on code in PR #66342:
URL: https://github.com/apache/airflow/pull/66342#discussion_r3257769716


##########
providers/openlineage/tests/unit/openlineage/test_token_provider.py:
##########
@@ -0,0 +1,141 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+from airflow.providers.common.compat.sdk import BaseHook, Connection
+from airflow.providers.openlineage.token_provider import (
+    AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE,
+    AirflowConnectionConfigProvider,
+    AirflowConnectionTokenProvider,
+    OpenLineageAirflowConnectionAuthError,
+    OpenLineageAirflowConnectionConfigError,
+    resolve_airflow_connection_auth,
+)
+
+
[email protected](BaseHook, "get_connection")
+def test_get_api_key_from_connection_password(mock_get_connection):
+    mock_get_connection.return_value = Connection(
+        conn_id="openlineage_default", conn_type="http", password="api-key"
+    )
+
+    provider = AirflowConnectionTokenProvider({"conn_id": 
"openlineage_default"})
+
+    assert provider.get_api_key() == "api-key"
+
+
[email protected](BaseHook, "get_connection")
+def test_get_api_key_from_default_connection_id(mock_get_connection):
+    mock_get_connection.return_value = Connection(
+        conn_id="openlineage_default", conn_type="http", password="api-key"
+    )
+
+    provider = AirflowConnectionTokenProvider({}, 
default_conn_id="openlineage_default")
+
+    assert provider.get_api_key() == "api-key"
+
+
[email protected](BaseHook, "get_connection")
+def test_get_api_key_from_connection_extra(mock_get_connection):
+    mock_get_connection.return_value = Connection(
+        conn_id="openlineage_default", conn_type="http", extra='{"api_key": 
"api-key-from-extra"}'
+    )
+
+    provider = AirflowConnectionTokenProvider({"conn_id": 
"openlineage_default"})
+
+    assert provider.get_api_key() == "api-key-from-extra"
+
+
+def test_missing_conn_id_raises_custom_exception():
+    with pytest.raises(OpenLineageAirflowConnectionAuthError, match="requires 
a non-empty `conn_id`"):
+        AirflowConnectionTokenProvider({})
+
+
[email protected](BaseHook, "get_connection")
+def test_missing_token_raises_custom_exception(mock_get_connection):
+    mock_get_connection.return_value = 
Connection(conn_id="openlineage_default", conn_type="http")
+
+    provider = AirflowConnectionTokenProvider({"conn_id": 
"openlineage_default"})
+
+    with pytest.raises(OpenLineageAirflowConnectionAuthError, match="could not 
find a token"):
+        provider.get_api_key()
+
+
[email protected](BaseHook, "get_connection")
+def test_resolve_connection_auth_in_composite_transport(mock_get_connection):

Review Comment:
   Can we add one more tests here for nested composite transport? So 
composite(transports=[http, composite(transports=[http, console])]), and make 
sure that it also works as expected?



##########
providers/openlineage/src/airflow/providers/openlineage/token_provider.py:
##########
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from typing import Any
+
+from airflow.providers.common.compat.sdk import AirflowException, BaseHook
+
+AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE = "airflow_connection_api_key"
+_DEFAULT_EXTRA_KEYS = ("apiKey", "api_key", "apikey", "token", "access_token")
+
+
+class OpenLineageAirflowConnectionAuthError(AirflowException):
+    """Raised when OpenLineage API key auth cannot be resolved from an Airflow 
connection."""
+
+
+class OpenLineageAirflowConnectionConfigError(AirflowException):
+    """Raised when OpenLineage config cannot be resolved from an Airflow 
connection."""
+
+
+class AirflowConnectionConfigProvider:
+    """
+    Resolve OpenLineage client configuration from an Airflow connection.
+
+    The connection extra contains the full OpenLineage client config, for 
example
+    ``{"transport": {"type": "console"}}``.
+    """
+
+    def __init__(self, conn_id: str) -> None:
+        if not conn_id:
+            raise OpenLineageAirflowConnectionConfigError(
+                "OpenLineage connection config requires a non-empty connection 
ID."
+            )
+        self.conn_id = conn_id
+
+    def get_config(self) -> dict[str, Any]:
+        connection = BaseHook.get_connection(self.conn_id)
+        extra = connection.extra_dejson
+        config = self._get_config_from_extra(extra)
+        if config is not None:
+            return config
+
+        raise OpenLineageAirflowConnectionConfigError(
+            "OpenLineage connection config could not find configuration in 
connection "
+            f"`{self.conn_id}`. Expected OpenLineage config with `transport` 
in connection extra."
+        )
+
+    def _get_config_from_extra(self, extra: dict[str, Any]) -> dict[str, Any] 
| None:
+        if "transport" in extra:
+            return self._validate_config(extra)
+
+        return None

Review Comment:
   Looks like this method is not needed anymore? It only points to 
`self._validate_config` that will already throw error when transport is not in 
extra, so we can probably just use `self._validate_config` in `get_config` 
directly?



##########
providers/openlineage/src/airflow/providers/openlineage/token_provider.py:
##########
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from typing import Any
+
+from airflow.providers.common.compat.sdk import AirflowException, BaseHook
+
+AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE = "airflow_connection_api_key"
+OPENLINEAGE_CONFIG_EXTRA_KEY = "openlineage_config"
+_DEFAULT_EXTRA_KEYS = ("apiKey", "api_key", "apikey", "token", "access_token")
+
+
+class OpenLineageAirflowConnectionAuthError(AirflowException):
+    """Raised when OpenLineage API key auth cannot be resolved from an Airflow 
connection."""
+
+
+class OpenLineageAirflowConnectionConfigError(AirflowException):
+    """Raised when OpenLineage config cannot be resolved from an Airflow 
connection."""

Review Comment:
   Ok !



##########
providers/openlineage/src/airflow/providers/openlineage/token_provider.py:
##########
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from typing import Any
+
+from airflow.providers.common.compat.sdk import AirflowException, BaseHook
+
+AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE = "airflow_connection_api_key"
+OPENLINEAGE_CONFIG_EXTRA_KEY = "openlineage_config"
+_DEFAULT_EXTRA_KEYS = ("apiKey", "api_key", "apikey", "token", "access_token")
+
+
+class OpenLineageAirflowConnectionAuthError(AirflowException):
+    """Raised when OpenLineage API key auth cannot be resolved from an Airflow 
connection."""
+
+
+class OpenLineageAirflowConnectionConfigError(AirflowException):
+    """Raised when OpenLineage config cannot be resolved from an Airflow 
connection."""
+
+
+class AirflowConnectionConfigProvider:
+    """
+    Resolve OpenLineage client configuration from an Airflow connection.
+
+    The connection extra can contain the full OpenLineage client config, for 
example
+    ``{"transport": {"type": "console"}}``. For convenience, it can also 
contain only the transport
+    config, for example ``{"type": "console"}``.
+    """
+
+    def __init__(self, conn_id: str) -> None:
+        if not conn_id:
+            raise OpenLineageAirflowConnectionConfigError(
+                "OpenLineage connection config requires a non-empty connection 
ID."
+            )
+        self.conn_id = conn_id
+
+    def get_config(self) -> dict[str, Any]:
+        connection = BaseHook.get_connection(self.conn_id)
+        extra = connection.extra_dejson
+        config = self._get_config_from_extra(extra)
+        if config is not None:
+            return config
+
+        raise OpenLineageAirflowConnectionConfigError(
+            "OpenLineage connection config could not find configuration in 
connection "
+            f"`{self.conn_id}`. Expected full OpenLineage config or transport 
config in connection extra."
+        )
+
+    def _get_config_from_extra(self, extra: dict[str, Any]) -> dict[str, Any] 
| None:
+        if OPENLINEAGE_CONFIG_EXTRA_KEY in extra:
+            return self._validate_config(extra[OPENLINEAGE_CONFIG_EXTRA_KEY])
+
+        if "transport" in extra:
+            return self._validate_config(extra)
+
+        if "type" in extra:
+            return {"transport": extra}
+
+        return None
+
+    def _validate_config(self, config: Any) -> dict[str, Any]:

Review Comment:
   Makes sense !



##########
providers/openlineage/src/airflow/providers/openlineage/token_provider.py:
##########
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from typing import Any
+
+from airflow.providers.common.compat.sdk import AirflowException, BaseHook
+
+AIRFLOW_CONNECTION_API_KEY_AUTH_TYPE = "airflow_connection_api_key"
+_DEFAULT_EXTRA_KEYS = ("apiKey", "api_key", "apikey", "token", "access_token")
+
+
+class OpenLineageAirflowConnectionAuthError(AirflowException):
+    """Raised when OpenLineage API key auth cannot be resolved from an Airflow 
connection."""
+
+
+class OpenLineageAirflowConnectionConfigError(AirflowException):
+    """Raised when OpenLineage config cannot be resolved from an Airflow 
connection."""
+
+
+class AirflowConnectionConfigProvider:
+    """
+    Resolve OpenLineage client configuration from an Airflow connection.
+
+    The connection extra contains the full OpenLineage client config, for 
example
+    ``{"transport": {"type": "console"}}``.
+    """
+
+    def __init__(self, conn_id: str) -> None:
+        if not conn_id:
+            raise OpenLineageAirflowConnectionConfigError(
+                "OpenLineage connection config requires a non-empty connection 
ID."
+            )
+        self.conn_id = conn_id
+
+    def get_config(self) -> dict[str, Any]:
+        connection = BaseHook.get_connection(self.conn_id)
+        extra = connection.extra_dejson
+        config = self._get_config_from_extra(extra)
+        if config is not None:
+            return config
+
+        raise OpenLineageAirflowConnectionConfigError(
+            "OpenLineage connection config could not find configuration in 
connection "
+            f"`{self.conn_id}`. Expected OpenLineage config with `transport` 
in connection extra."
+        )
+
+    def _get_config_from_extra(self, extra: dict[str, Any]) -> dict[str, Any] 
| None:
+        if "transport" in extra:
+            return self._validate_config(extra)
+
+        return None
+
+    def _validate_config(self, config: Any) -> dict[str, Any]:
+        if not isinstance(config, dict):
+            raise OpenLineageAirflowConnectionConfigError(
+                f"OpenLineage connection config `{config}` is not a dict."
+            )
+        if not isinstance(config.get("transport"), dict):
+            raise OpenLineageAirflowConnectionConfigError(
+                "OpenLineage connection config must contain a `transport` JSON 
object."
+            )
+        return config
+
+
+class AirflowConnectionTokenProvider:
+    """
+    Resolve an OpenLineage API key from an Airflow connection.
+
+    The connection password is preferred. If it is empty and ``extra_key`` is 
configured, that key
+    is read from connection ``extra``. Otherwise, common extra keys are 
checked.
+    """
+
+    def __init__(self, config: dict[str, Any], default_conn_id: str | None = 
None) -> None:
+        self.conn_id = config.get("conn_id") or default_conn_id or ""
+        self.extra_key = config.get("extra_key")
+        if not self.conn_id:
+            raise OpenLineageAirflowConnectionAuthError(
+                "OpenLineage `airflow_connection_api_key` auth requires a 
non-empty `conn_id`."
+            )
+
+    def get_api_key(self) -> str:
+        connection = BaseHook.get_connection(self.conn_id)
+        if connection.password:
+            return connection.password.strip()
+        api_key = self._get_api_key_from_extra(connection.extra_dejson)
+        if api_key:
+            return api_key
+
+        raise OpenLineageAirflowConnectionAuthError(
+            "OpenLineage `airflow_connection_api_key` auth could not find a 
token in connection "
+            f"`{self.conn_id}`. Expected connection password or token in 
connection extra."
+        )
+
+    def _get_api_key_from_extra(self, extra: dict[str, Any]) -> str | None:
+        if self.extra_key:
+            value = extra.get(self.extra_key)
+            return str(value).strip() if value else None
+
+        for key in _DEFAULT_EXTRA_KEYS:
+            value = extra.get(key)
+            if value:
+                return str(value).strip()
+        return None
+
+
+def resolve_airflow_connection_auth(config: dict[str, Any] | None, 
config_conn_id: str | None = None) -> None:

Review Comment:
   Could we add a small docstring here explaining what is hapenning and why 
this is so complex (that we can have  simple transports, composite transports 
or nested composite transports and that's why we need recursive logic here). + 
note that the auth replacement only works for http transport, as it's replacing 
the airflow_conn with {"type": "api_key", "apiKey": provider.get_api_key()} 
(it's already in the docs, just a small reminder in the code here would be nice)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to