durgaprasadml commented on code in PR #67606: URL: https://github.com/apache/airflow/pull/67606#discussion_r3326427896
########## scripts/ci/prek/extract_permissions.py: ########## @@ -0,0 +1,491 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Extract permission requirements from FastAPI routes in Airflow REST API. + +This script statically parses FastAPI route files under airflow-core's public REST API +routes to extract required permissions for each endpoint. It generates a reference +RST documentation file for security/api_permissions_ref.rst. + +It runs completely statically using Python's built-in AST parser, requiring no runtime +Airflow imports or active execution environment, making it suitable for CI checks. +""" + +from __future__ import annotations + +import ast +import pathlib +import sys +from dataclasses import dataclass + +# --------------------------------------------------------------------------- +# Paths (all relative to the repo root, resolved from this file's location) +# --------------------------------------------------------------------------- +REPO_ROOT = pathlib.Path(__file__).resolve().parents[3] +PUBLIC_ROUTES_DIR = REPO_ROOT / "airflow-core/src/airflow/api_fastapi/core_api/routes/public" +OUTPUT_RST = REPO_ROOT / "airflow-core/docs/security/api_permissions_ref.rst" + +# The global /api/v2 prefix comes from public_router in __init__.py +API_PREFIX = "/api/v2" + + +# --------------------------------------------------------------------------- +# Data model +# --------------------------------------------------------------------------- +@dataclass(frozen=True, order=True) +class PermissionEntry: + """One HTTP operation's permission requirement.""" + + full_path: str # full route path, e.g. /api/v2/dags/{dag_id} + http_method: str # GET / POST / PATCH / PUT / DELETE + tag: str # OpenAPI tag, e.g. "DAG", "Variable" + resource: str # e.g. "DAG", "DAG.RUN", "Variable", "View" + required_permission: str # e.g. "GET", "POST", "DELETE", "multi", "PLUGINS" + source_file: str # route file basename for traceability + + +# --------------------------------------------------------------------------- +# Per-file AST helpers +# --------------------------------------------------------------------------- + + +def _resolve_string_node(node: ast.expr, module_consts: dict[str, str]) -> str: + """ + Convert an AST expression to a string. + + Handles: + - ast.Constant → direct string + - ast.BinOp(+) → resolve left and right recursively (string concat) + - ast.Name → look up in module_consts if available + """ + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add): + left = _resolve_string_node(node.left, module_consts) + right = _resolve_string_node(node.right, module_consts) + return left + right + if isinstance(node, ast.Name) and node.id in module_consts: + return module_consts[node.id] + # Give up — return an unresolvable marker (will surface in tests) + return f"<unresolved:{ast.unparse(node)}>" + + +def _extract_module_string_constants(tree: ast.Module) -> dict[str, str]: + """ + Walk top-level assignments and collect simple string assignments. + + e.g. task_instances_prefix = "/dagRuns/{dag_run_id}/taskInstances" + → {"task_instances_prefix": "/dagRuns/{dag_run_id}/taskInstances"} + """ + consts: dict[str, str] = {} + for node in tree.body: + if ( + isinstance(node, ast.Assign) + and len(node.targets) == 1 + and isinstance(node.targets[0], ast.Name) + and isinstance(node.value, ast.Constant) + and isinstance(node.value.value, str) + ): + consts[node.targets[0].id] = node.value.value + return consts + + +def _extract_router_prefix(tree: ast.Module) -> str: + """ + Find some_router = AirflowRouter(prefix="...") at module level. + + Returns the prefix string or "" if not found. + """ + for node in tree.body: + if not (isinstance(node, ast.Assign) and isinstance(node.value, ast.Call)): + continue + call = node.value + call_name = ( + call.func.id + if isinstance(call.func, ast.Name) + else call.func.attr + if isinstance(call.func, ast.Attribute) + else "" + ) + if call_name != "AirflowRouter": + continue + for kw in call.keywords: + if kw.arg == "prefix" and isinstance(kw.value, ast.Constant) and isinstance(kw.value.value, str): + return kw.value.value + return "" + + +def _get_requires_access_call_name(call_node: ast.Call) -> str | None: + """Extract the function name from a requires_access_*() call node.""" + fn = call_node.func + if isinstance(fn, ast.Name) and fn.id.startswith("requires_access"): + return fn.id + if isinstance(fn, ast.Attribute) and fn.attr.startswith("requires_access"): + return fn.attr + return None + + +def _extract_method_arg(call_node: ast.Call) -> str: + """ + Extract the HTTP method from a requires_access_*(...) call. + + Two calling conventions exist in the codebase: + requires_access_dag("GET", ...) ← positional + requires_access_dag(method="GET", ...) ← keyword + + Returns the method string (GET/POST/PUT/DELETE) or "multi" + for bulk functions that carry no method. + """ + # Positional first arg + if call_node.args: + first = call_node.args[0] + if isinstance(first, ast.Constant) and isinstance(first.value, str): + return first.value.upper() + return ast.unparse(first).strip("\"'").upper() + + # Keyword method= + for kw in call_node.keywords: + if kw.arg == "method": + val = kw.value + if isinstance(val, ast.Constant) and isinstance(val.value, str): + return val.value.upper() + return ast.unparse(val).strip("\"'").upper() + + # bulk functions: no method arg + return "multi" + + +def _extract_entity_arg(call_node: ast.Call) -> str | None: + """ + Extract the access_entity or first positional (for requires_access_view). + + Returns e.g. "TASK_INSTANCE", "PLUGINS", or None. + """ + fn_name = _get_requires_access_call_name(call_node) or "" + + # For requires_access_view the entity IS the first positional arg + if fn_name == "requires_access_view": + for kw in call_node.keywords: + if kw.arg == "access_view": + return ast.unparse(kw.value).split(".")[-1] # AccessView.PLUGINS → "PLUGINS" + if call_node.args: + return ast.unparse(call_node.args[0]).split(".")[-1] + return None + + # For requires_access_dag the entity is the access_entity keyword + if fn_name == "requires_access_dag": + for kw in call_node.keywords: + if kw.arg == "access_entity": + return ast.unparse(kw.value).split(".")[-1] # DagAccessEntity.RUN → "RUN" + return None Review Comment: Thanks, fixed now. The extractor has been updated to support both forms used by requires_access_dag: - keyword argument: access_entity=... - second positional argument: requires_access_dag("GET", DagAccessEntity.TASK_LOGS) I also added a regression test covering the positional form and regenerated the reference docs to reflect the corrected DAG entity mappings. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
