This is an automated email from the ASF dual-hosted git repository.
pierrejeambrun pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-2-test by this push:
new b14148f41e8 [v3-2-test] Filter scheduling-dependencies graph edges by
readable-DAG access (#67627) (#67907)
b14148f41e8 is described below
commit b14148f41e8c921cf4388b5fbdc02e661e5f400e
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Jun 3 15:50:43 2026 +0200
[v3-2-test] Filter scheduling-dependencies graph edges by readable-DAG
access (#67627) (#67907)
The UI scheduling-dependencies graph route (`GET
/ui/dependencies?dependency_type=scheduling`) was filtering only the top-level
DAG keys in `get_scheduling_dependencies()` by the caller's readable-DAG set.
For each dependency object under a readable top-level DAG, the route still
emitted the dependency node (whose `node_id` embeds both source and target DAG
ids — e.g.
`trigger:external_trigger_dag_id:downstream:trigger_dag_run_operator`), the
inbound edge from `dep.source` to `dep.nod [...]
A caller with read access to one DAG would therefore see identifiers and
edge metadata for other DAGs they cannot read whenever the readable DAG
referenced those DAGs via trigger/sensor dependencies.
This change extends the readable-DAG filter inside the dependency loop:
when either `dep.source` or `dep.target` is a bare DAG id outside the caller's
readable set, the dependency node and both its edges are skipped entirely.
Asset-prefixed identifiers (`asset:<id>`) are unaffected by the new check.
Reference: airflow-s/airflow-s#441
(cherry picked from commit e61c9bac30a7b1a7f68589629f10c4cb247c683e)
Generated-by: Claude Opus 4.7 (1M context) following the guidelines at
https://github.com/apache/airflow/blob/main/contributing-docs/05_pull_requests.rst#gen-ai-assisted-contributions
Co-authored-by: Jarek Potiuk <[email protected]>
---
.../core_api/services/ui/dependencies.py | 14 +++++++
.../core_api/routes/ui/test_dependencies.py | 44 ++++++++++++++++++++++
2 files changed, 58 insertions(+)
diff --git
a/airflow-core/src/airflow/api_fastapi/core_api/services/ui/dependencies.py
b/airflow-core/src/airflow/api_fastapi/core_api/services/ui/dependencies.py
index ead14c68a2d..f125e22fce9 100644
--- a/airflow-core/src/airflow/api_fastapi/core_api/services/ui/dependencies.py
+++ b/airflow-core/src/airflow/api_fastapi/core_api/services/ui/dependencies.py
@@ -98,6 +98,20 @@ def get_scheduling_dependencies(readable_dag_ids: set[str] |
None = None) -> dic
dag_node_id = f"dag:{dag}"
if dag_node_id not in nodes_dict:
for dep in dependencies:
+ # Skip dependency objects whose edge endpoints reference DAGs
+ # outside the caller's readable set. ``dep.node_id`` /
+ # ``dep.source`` / ``dep.target`` would otherwise embed those
+ # DAG ids in the response even when the top-level filter
+ # above hides the DAG itself.
+ if readable_dag_ids is not None:
+ referenced_dag_ids: set[str] = set()
+ if dep.source != dep.dependency_type and ":" not in
dep.source:
+ referenced_dag_ids.add(dep.source)
+ if dep.target != dep.dependency_type and ":" not in
dep.target:
+ referenced_dag_ids.add(dep.target)
+ if not referenced_dag_ids.issubset(readable_dag_ids):
+ continue
+
# Add nodes
nodes_dict[dag_node_id] = {"id": dag_node_id, "label": dag,
"type": "dag"}
if dep.node_id not in nodes_dict:
diff --git
a/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dependencies.py
b/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dependencies.py
index 2ea701c9a6d..d02a66393bc 100644
---
a/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dependencies.py
+++
b/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dependencies.py
@@ -358,6 +358,50 @@ class TestGetDependencies:
for node_id in expected_absent:
assert node_id not in dag_node_ids
+ @mock.patch(
+
"airflow.api_fastapi.auth.managers.base_auth_manager.BaseAuthManager.get_authorized_dag_ids",
+ return_value={"downstream"},
+ )
+ @pytest.mark.usefixtures("make_primary_connected_component")
+ def
test_scheduling_dependencies_redacts_trigger_sensor_endpoints_referencing_unreadable_dags(
+ self, _, test_client, asset1_id
+ ):
+ """Trigger/sensor dependency objects under a readable top-level DAG
must
+ not leak unreadable DAG identifiers through ``dep.node_id`` /
+ ``dep.source`` / ``dep.target``. The top-level filter only hides the
+ unreadable DAG as a top-level key; this regression check covers the
+ edge-endpoint leak."""
+ response = test_client.get("/dependencies")
+ assert response.status_code == 200
+
+ result = response.json()
+ unreadable_dag_ids = {"external_trigger_dag_id", "other_dag",
"upstream"}
+
+ # No node id may contain any unreadable DAG identifier (covers the
+ # bare ``dag:`` nodes that the top-level filter already hid, plus
+ # the ``trigger:.../sensor:...`` nodes whose ids embed both endpoints).
+ for node in result["nodes"]:
+ for unreadable in unreadable_dag_ids:
+ assert unreadable not in node["id"], (
+ f"node id {node['id']!r} leaks unreadable DAG
{unreadable!r}"
+ )
+
+ # No edge endpoint may be a ``dag:<unreadable>`` reference, and no
+ # endpoint may be a ``trigger:.../sensor:...`` node whose id embeds
+ # an unreadable DAG.
+ for edge in result["edges"]:
+ for endpoint in (edge["source_id"], edge["target_id"]):
+ for unreadable in unreadable_dag_ids:
+ assert unreadable not in endpoint, (
+ f"edge endpoint {endpoint!r} leaks unreadable DAG
{unreadable!r}"
+ )
+
+ # The readable top-level DAG itself must still be present, along with
+ # its legitimate asset-scheduled-by edge (asset ids are not DAG ids
+ # and are unaffected by the readable-DAG filter).
+ dag_node_ids = {node["id"] for node in result["nodes"] if node["type"]
== "dag"}
+ assert dag_node_ids == {"dag:downstream"}
+
@pytest.mark.parametrize(
("readable_dags", "expected_present", "expected_absent"),
[