kaxil commented on code in PR #56191:
URL: https://github.com/apache/airflow/pull/56191#discussion_r2388452243


##########
task-sdk/src/airflow/sdk/execution_time/context.py:
##########
@@ -177,10 +203,13 @@ def _get_connection(conn_id: str) -> Connection:
     #   will make that module depend on Task SDK, which is not ideal because 
we intend to
     #   keep Task SDK as a separate package than execution time mods.
     #   Also applies to _async_get_connection.
+    from airflow.sdk.execution_time import task_runner
     from airflow.sdk.execution_time.comms import GetConnection
-    from airflow.sdk.execution_time.task_runner import SUPERVISOR_COMMS
 
-    msg = SUPERVISOR_COMMS.send(GetConnection(conn_id=conn_id))
+    if comms := getattr(task_runner, "SUPERVISOR_COMMS", None):

Review Comment:
   Yeah this isn't the right fix!
   
   It is very likely and deployment managers might already do it (after 3.0 or 
3.1) is to restrict access from worker pods to the DB/pgbouncer.
   
   So either the connection details gets passed down somehow or we pre-fetch it 
similar to:
   
   
https://github.com/apache/airflow/blob/3150430b5d976d32784860f3b6cdb5ff7605a7cc/task-sdk/src/airflow/sdk/execution_time/supervisor.py#L869-L882
   
   Something like:
   
   ```diff
   diff --git a/airflow-core/src/airflow/utils/log/connection_manager.py 
b/airflow-core/src/airflow/utils/log/connection_manager.py
   new file mode 100644
   index 0000000000..df3d25a8ad
   --- /dev/null
   +++ b/airflow-core/src/airflow/utils/log/connection_manager.py
   @@ -0,0 +1,100 @@
   +# Licensed to the Apache Software Foundation (ASF) under one
   +# or more contributor license agreements.  See the NOTICE file
   +# distributed with this work for additional information
   +# regarding copyright ownership.  The ASF licenses this file
   +# to you under the Apache License, Version 2.0 (the
   +# "License"); you may not use this file except in compliance
   +# with the License.  You may obtain a copy of the License at
   +#
   +#   http://www.apache.org/licenses/LICENSE-2.0
   +#
   +# Unless required by applicable law or agreed to in writing,
   +# software distributed under the License is distributed on an
   +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   +# KIND, either express or implied.  See the License for the
   +# specific language governing permissions and limitations
   +# under the License.
   +"""Connection management for server-side operations like remote log 
reading."""
   +
   +from __future__ import annotations
   +
   +import contextlib
   +import logging
   +import os
   +from functools import lru_cache
   +
   +from airflow.configuration import conf
   +
   +log = logging.getLogger(__name__)
   +
   +
   +@lru_cache
   +def _get_remote_logging_connection_uri(conn_id: str) -> str | None:
   +    """
   +    Fetch and cache connection URI for remote logging.
   +    
   +    Similar to task-sdk supervisor pattern, but uses airflow-core 
connection access.
   +    """
   +    from airflow.models.connection import Connection
   +    
   +    try:
   +        conn = Connection.get_connection_from_secrets(conn_id)
   +        return conn.get_uri()
   +    except Exception:
   +        log.exception("Unable to retrieve remote logging connection %s", 
conn_id)
   +        return None
   +
   +
   +def _get_remote_log_conn_id() -> str | None:
   +    """Get the remote log connection ID from configuration."""
   +    return conf.get("logging", "remote_log_conn_id", fallback=None)
   +
   +
   [email protected]
   +def with_remote_logging_connection():
   +    """
   +    Context manager to pre-fetch remote logging connection and set as 
environment variable.
   +    
   +    This follows the same pattern as task-sdk supervisor's 
_remote_logging_conn but uses
   +    airflow-core's connection access. When remote log handlers try to get 
connections,
   +    they'll find them in the environment variables instead of trying to use 
SUPERVISOR_COMMS.
   +    
   +    Usage:
   +        with with_remote_logging_connection():
   +            # Remote log handlers will find connections in env vars
   +            sources, logs = remote_io.read(path, ti)
   +    """
   +    conn_id = _get_remote_log_conn_id()
   +    if not conn_id:
   +        # No remote logging connection configured
   +        yield
   +        return
   +    
   +    # Get connection URI using server-side access
   +    conn_uri = _get_remote_logging_connection_uri(conn_id)
   +    if not conn_uri:
   +        log.warning("Could not fetch remote logging connection %s", conn_id)
   +        yield
   +        return
   +    
   +    env_key = f"AIRFLOW_CONN_{conn_id.upper()}"
   +    old_value = os.getenv(env_key)
   +    
   +    try:
   +        os.environ[env_key] = conn_uri
   +        log.debug("Set remote logging connection %s in environment", 
conn_id)
   +        yield
   +    finally:
   +        # Restore original environment state
   +        if old_value is None:
   +            if env_key in os.environ:
   +                del os.environ[env_key]
   +        else:
   +            os.environ[env_key] = old_value
   diff --git a/airflow-core/src/airflow/utils/log/file_task_handler.py 
b/airflow-core/src/airflow/utils/log/file_task_handler.py
   index 9954688a41..1ab2d21bd6 100644
   --- a/airflow-core/src/airflow/utils/log/file_task_handler.py
   +++ b/airflow-core/src/airflow/utils/log/file_task_handler.py
   @@ -932,6 +932,13 @@ class FileTaskHandler(logging.Handler):
    
            # This living here is not really a good plan, but it just about 
works for now.
            # Ideally we move all the read+combine logic in to TaskLogReader 
and out of the task handler.
   -        path = self._render_filename(ti, try_number)
   -        sources, logs = remote_io.read(path, ti)
   -        return sources, logs or []
   +        
   +        # Pre-fetch remote logging connection and set as environment 
variable
   +        # This allows remote log handlers to find connections via env vars 
instead of
   +        # trying to use Task SDK's SUPERVISOR_COMMS (which doesn't exist in 
API server context)
   +        from airflow.utils.log.connection_manager import 
with_remote_logging_connection
   +        
   +        with with_remote_logging_connection():
   +            path = self._render_filename(ti, try_number)
   +            sources, logs = remote_io.read(path, ti)
   +            return sources, logs or []
   
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to