kaxil commented on code in PR #68533: URL: https://github.com/apache/airflow/pull/68533#discussion_r3442178496
########## devel-common/src/tests_common/test_utils/in_process_taskrun.py: ########## @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""DB-free, xdist-safe execution of a task through a *real* supervisor socket. + +`run_task` (in ``pytest_plugin``) mocks supervisor comms entirely in-process and +has **no real socket**, so operators that spawn a subprocess which re-connects to +the supervisor — ``PythonVirtualenvOperator``, ``ExternalPythonOperator``, +``run_as_user`` — fail there with ``OSError: Socket operation on non-socket``. + +This drives the *real* ``InProcessTestSupervisor`` (its socketpair machinery is +created specifically for VirtualEnv operators) but injects a **dry-run Execution-API +client** instead of the DB-backed in-process API server, so the subprocess gets a +working supervisor socket without touching the metadata DB. Tests using it need no +``@pytest.mark.db_test`` and run under xdist. + +The client is the real ``Client(dry_run=True)`` (which already fakes the run +context and no-ops heartbeats via ``noop_handler``), with the discarding transport +swapped for one that *remembers* XCom writes in an in-memory dict — exposed as +``client.pushed_xcoms`` so tests can assert what a task pushed. + +Requires the Task SDK ``run_task_in_process(..., client=)`` parameter (newer than +Airflow 3.0/3.1, and absent on 2.x); callers must gate on its availability and fall +back to a DB-backed path otherwise. +""" + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from collections.abc import Callable + + from airflow.sdk.api.client import Client + from airflow.sdk.execution_time.supervisor import TaskRunResult + from airflow.sdk.types import Operator + +# XCom is the only resource that must round-trip; the run-context is fed back from the +# (valid) ti_context the test built, and everything else (heartbeat, state updates) is the +# stock ``noop_handler``. (``noop_handler``'s own run-context is stale vs the live schema.) +_XCOM_PATH_PARTS = 5 # /xcoms/{dag_id}/{run_id}/{task_id}/{key} + + +def _remembering_handler(store: dict, run_context_json: bytes) -> Callable: + """A dry-run transport handler: valid run-context + XCom round-trip from ``store``, else no-op.""" + import httpx + + from airflow.sdk.api.client import noop_handler + + def handler(request: httpx.Request) -> httpx.Response: + path = request.url.path + if path.startswith("/task-instances/") and path.endswith("/run"): + return httpx.Response(200, content=run_context_json) + parts = path.strip("/").split("/") + if len(parts) == _XCOM_PATH_PARTS and parts[0] == "xcoms": + dag_id, run_id, task_id, key = parts[1:] + sig = (dag_id, run_id, task_id, key) + if request.method == "POST": + store[sig] = json.loads(request.content or b"null") + return httpx.Response(201, json={"ok": True}) + if request.method == "GET": + if sig in store: + return httpx.Response(200, json={"key": key, "value": store[sig]}) + return httpx.Response(404, json={"detail": "XCom not found"}) + return noop_handler(request) + + return handler + + +def build_in_memory_client(ti_context) -> Client: + """A real ``Client(dry_run=True)`` that remembers XCom writes (no DB, no network). + + ``ti_context`` (a ``TIRunContext``) is replayed for the task-start request. Pushed XCom + values are exposed as ``client.pushed_xcoms`` keyed by ``(dag_id, run_id, task_id, key)``. + """ + import httpx Review Comment: `import httpx` (here and L60) and `from uuid6 import uuid7` (L122) are unconditional third-party deps present on every supported version, so they can move to the module top. Worth keeping these separate from the `airflow.sdk.*` imports you're moving to the compat shim in the other thread: those need version guarding, these don't. ########## devel-common/src/tests_common/test_utils/in_process_taskrun.py: ########## @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""DB-free, xdist-safe execution of a task through a *real* supervisor socket. + +`run_task` (in ``pytest_plugin``) mocks supervisor comms entirely in-process and +has **no real socket**, so operators that spawn a subprocess which re-connects to +the supervisor — ``PythonVirtualenvOperator``, ``ExternalPythonOperator``, +``run_as_user`` — fail there with ``OSError: Socket operation on non-socket``. + +This drives the *real* ``InProcessTestSupervisor`` (its socketpair machinery is +created specifically for VirtualEnv operators) but injects a **dry-run Execution-API +client** instead of the DB-backed in-process API server, so the subprocess gets a +working supervisor socket without touching the metadata DB. Tests using it need no +``@pytest.mark.db_test`` and run under xdist. + +The client is the real ``Client(dry_run=True)`` (which already fakes the run +context and no-ops heartbeats via ``noop_handler``), with the discarding transport +swapped for one that *remembers* XCom writes in an in-memory dict — exposed as +``client.pushed_xcoms`` so tests can assert what a task pushed. + +Requires the Task SDK ``run_task_in_process(..., client=)`` parameter (newer than +Airflow 3.0/3.1, and absent on 2.x); callers must gate on its availability and fall +back to a DB-backed path otherwise. +""" + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from collections.abc import Callable + + from airflow.sdk.api.client import Client + from airflow.sdk.execution_time.supervisor import TaskRunResult + from airflow.sdk.types import Operator + +# XCom is the only resource that must round-trip; the run-context is fed back from the +# (valid) ti_context the test built, and everything else (heartbeat, state updates) is the +# stock ``noop_handler``. (``noop_handler``'s own run-context is stale vs the live schema.) +_XCOM_PATH_PARTS = 5 # /xcoms/{dag_id}/{run_id}/{task_id}/{key} + + +def _remembering_handler(store: dict, run_context_json: bytes) -> Callable: + """A dry-run transport handler: valid run-context + XCom round-trip from ``store``, else no-op.""" + import httpx + + from airflow.sdk.api.client import noop_handler + + def handler(request: httpx.Request) -> httpx.Response: + path = request.url.path + if path.startswith("/task-instances/") and path.endswith("/run"): + return httpx.Response(200, content=run_context_json) + parts = path.strip("/").split("/") + if len(parts) == _XCOM_PATH_PARTS and parts[0] == "xcoms": + dag_id, run_id, task_id, key = parts[1:] + sig = (dag_id, run_id, task_id, key) + if request.method == "POST": + store[sig] = json.loads(request.content or b"null") Review Comment: Minor: `request.content` is always non-empty here, so the `or b"null"` fallback is dead. A POST to `/xcoms/...` carries the serialized value, and even a `None` value serializes to `b"null"` rather than empty bytes. Can simplify to `json.loads(request.content)`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
