kaxil commented on code in PR #62343:
URL: https://github.com/apache/airflow/pull/62343#discussion_r2963468962


##########
airflow-core/src/airflow/models/connection_test.py:
##########
@@ -0,0 +1,216 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import secrets
+from datetime import datetime
+from enum import Enum
+from typing import TYPE_CHECKING
+from uuid import UUID
+
+import structlog
+import uuid6
+from sqlalchemy import JSON, Boolean, Index, String, Text, Uuid, select
+from sqlalchemy.orm import Mapped, mapped_column
+
+from airflow._shared.timezones import timezone
+from airflow.models.base import Base
+from airflow.models.connection import Connection
+from airflow.models.crypto import get_fernet
+from airflow.utils.sqlalchemy import UtcDateTime
+
+if TYPE_CHECKING:
+    from sqlalchemy.orm import Session
+
+log = structlog.get_logger(__name__)
+
+
+class ConnectionTestState(str, Enum):
+    """All possible states of a connection test."""
+
+    PENDING = "pending"
+    QUEUED = "queued"
+    RUNNING = "running"
+    SUCCESS = "success"
+    FAILED = "failed"
+
+    def __str__(self) -> str:
+        return self.value
+
+
+ACTIVE_STATES = frozenset((ConnectionTestState.QUEUED, 
ConnectionTestState.RUNNING))
+TERMINAL_STATES = frozenset((ConnectionTestState.SUCCESS, 
ConnectionTestState.FAILED))
+
+
+class ConnectionTest(Base):
+    """Tracks an async connection test dispatched to a worker via a 
TestConnection workload."""
+
+    __tablename__ = "connection_test"
+
+    id: Mapped[UUID] = mapped_column(Uuid(), primary_key=True, 
default=uuid6.uuid7)
+    token: Mapped[str] = mapped_column(String(64), nullable=False, unique=True)
+    connection_id: Mapped[str] = mapped_column(String(250), nullable=False)
+    state: Mapped[str] = mapped_column(String(10), nullable=False, 
default=ConnectionTestState.PENDING)
+    result_message: Mapped[str | None] = mapped_column(Text, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(UtcDateTime, 
default=timezone.utcnow, nullable=False)
+    updated_at: Mapped[datetime] = mapped_column(
+        UtcDateTime, default=timezone.utcnow, onupdate=timezone.utcnow, 
nullable=False
+    )
+    executor: Mapped[str | None] = mapped_column(String(256), nullable=True)
+    queue: Mapped[str | None] = mapped_column(String(256), nullable=True)
+    connection_snapshot: Mapped[dict | None] = 
mapped_column(JSON(none_as_null=True), nullable=True)
+    reverted: Mapped[bool] = mapped_column(Boolean, nullable=False, 
default=False, server_default="0")
+
+    __table_args__ = (Index("idx_connection_test_state_created_at", state, 
created_at),)
+
+    def __init__(
+        self, *, connection_id: str, executor: str | None = None, queue: str | 
None = None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.connection_id = connection_id
+        self.executor = executor
+        self.queue = queue
+        self.token = secrets.token_urlsafe(32)
+        self.state = ConnectionTestState.PENDING
+
+    def __repr__(self) -> str:
+        return f"<ConnectionTest id={self.id!r} 
connection_id={self.connection_id!r} state={self.state}>"
+
+    def get_executor_name(self) -> str | None:
+        """Return the executor name for scheduler routing."""
+        return self.executor
+
+    def get_dag_id(self) -> None:
+        """Return None — connection tests are not associated with any DAG."""
+        return None
+
+
+def run_connection_test(*, conn: Connection) -> tuple[bool, str]:
+    """
+    Worker-side function to execute a connection test.
+
+    Returns a (success, message) tuple. The caller is responsible for
+    reporting the result back via the Execution API.
+    """
+    try:
+        return conn.test_connection()
+    except Exception as e:
+        log.exception("Connection test failed", connection_id=conn.conn_id)
+        return False, str(e)
+
+
+_SNAPSHOT_FIELDS = (
+    "conn_type",
+    "description",
+    "host",
+    "login",
+    "_password",
+    "schema",
+    "port",
+    "_extra",
+    "is_encrypted",
+    "is_extra_encrypted",
+)
+
+
+def snapshot_connection(conn: Connection) -> dict:

Review Comment:
   `snapshot_connection()` stores Fernet-encrypted `_password` and `_extra` 
ciphertext in a JSON column. This expands the attack surface — anyone with read 
access to the `connection_test` table gets credential ciphertext that was 
previously confined to the `connection` table.
   
   Two additional problems:
   1. If Fernet keys rotate, stored snapshots become undecryptable and reverts 
will write garbage.
   2. DB-level audit/masking rules on `connection._password` won't cover this 
secondary storage.
   
   If the snapshot approach survives (vs. Pierre's request-buffer redesign), 
consider storing only non-secret fields and re-fetching credentials at revert 
time, or encrypting the entire snapshot blob with a short-lived key.



##########
airflow-core/src/airflow/models/connection_test.py:
##########
@@ -0,0 +1,216 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import secrets
+from datetime import datetime
+from enum import Enum
+from typing import TYPE_CHECKING
+from uuid import UUID
+
+import structlog
+import uuid6
+from sqlalchemy import JSON, Boolean, Index, String, Text, Uuid, select
+from sqlalchemy.orm import Mapped, mapped_column
+
+from airflow._shared.timezones import timezone
+from airflow.models.base import Base
+from airflow.models.connection import Connection
+from airflow.models.crypto import get_fernet
+from airflow.utils.sqlalchemy import UtcDateTime
+
+if TYPE_CHECKING:
+    from sqlalchemy.orm import Session
+
+log = structlog.get_logger(__name__)
+
+
+class ConnectionTestState(str, Enum):
+    """All possible states of a connection test."""
+
+    PENDING = "pending"
+    QUEUED = "queued"
+    RUNNING = "running"
+    SUCCESS = "success"
+    FAILED = "failed"
+
+    def __str__(self) -> str:
+        return self.value
+
+
+ACTIVE_STATES = frozenset((ConnectionTestState.QUEUED, 
ConnectionTestState.RUNNING))

Review Comment:
   `ACTIVE_STATES` includes QUEUED and RUNNING but not PENDING. The reaper 
(`_reap_stale_connection_tests`) only checks `ACTIVE_STATES`, so tests stuck in 
PENDING — e.g., scheduler restart between creation and dispatch, or no 
supporting executor found — accumulate forever.
   
   If those PENDING tests have snapshots (from save-and-test), the connection 
stays in its post-edit state with no resolution path.
   
   ```suggestion
   ACTIVE_STATES = frozenset((ConnectionTestState.PENDING, 
ConnectionTestState.QUEUED, ConnectionTestState.RUNNING))
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to