ashb commented on code in PR #46020:
URL: https://github.com/apache/airflow/pull/46020#discussion_r1930703697
##########
airflow/api_fastapi/execution_api/routes/task_instances.py:
##########
@@ -442,6 +444,48 @@ def get_previous_successful_dagrun(
return PrevSuccessfulDagRunResponse.model_validate(dag_run)
[email protected](
+ "/{task_instance_id}/runtime-checks",
+ status_code=status.HTTP_200_OK,
+ # TODO: Add description to the operation
+ # TODO: Add Operation ID to control the function name in the OpenAPI spec
+ # TODO: Do we need to use create_openapi_http_exception_doc here?
+ responses={
+ status.HTTP_400_BAD_REQUEST: {
+ "description": "Task Instance doesn't pass the required runtime
checks"
+ },
+ status.HTTP_204_NO_CONTENT: {
+ "description": "Task Instance is not in a running state, cannot
perform runtime checks."
+ },
+ status.HTTP_422_UNPROCESSABLE_ENTITY: {
+ "description": "Invalid payload for requested runtime checks on
the Task Instance."
+ },
+ },
+)
+def ti_runtime_checks(
+ task_instance_id: UUID,
+ payload: TIRuntimeCheckPayload,
+ session: SessionDep,
+):
+ ti_id_str = str(task_instance_id)
+ task_instance = session.scalar(select(TI).where(TI.id == ti_id_str))
+ if task_instance.state != State.RUNNING:
+ return Response(status_code=status.HTTP_204_NO_CONTENT)
Review Comment:
I don't think it matters/is avoidable, but previously the asset checks were
done before the task was put in to the running state right?
##########
airflow/api_fastapi/execution_api/routes/task_instances.py:
##########
@@ -442,6 +444,48 @@ def get_previous_successful_dagrun(
return PrevSuccessfulDagRunResponse.model_validate(dag_run)
[email protected](
+ "/{task_instance_id}/runtime-checks",
+ status_code=status.HTTP_200_OK,
+ # TODO: Add description to the operation
+ # TODO: Add Operation ID to control the function name in the OpenAPI spec
+ # TODO: Do we need to use create_openapi_http_exception_doc here?
+ responses={
+ status.HTTP_400_BAD_REQUEST: {
+ "description": "Task Instance doesn't pass the required runtime
checks"
+ },
+ status.HTTP_204_NO_CONTENT: {
+ "description": "Task Instance is not in a running state, cannot
perform runtime checks."
Review Comment:
Message is wrong :)
##########
task_sdk/src/airflow/sdk/execution_time/supervisor.py:
##########
@@ -767,6 +768,14 @@ def _handle_request(self, msg: ToSupervisor, log:
FilteringBoundLogger):
if isinstance(msg, TaskState):
self._terminal_state = msg.state
self._task_end_time_monotonic = time.monotonic()
+ elif isinstance(msg, RuntimeCheckOnTask):
+ runtime_check_resp =
self.client.task_instances.runtime_checks(id=self.id, msg=msg)
+ resp = runtime_check_resp.model_dump_json().encode()
+ if not runtime_check_resp.ok:
+ log.debug("Runtime checks failed on task %s, marking task as
failed..", self.id)
+ self.client.task_instances.finish(
+ id=self.id, state=TerminalTIState.FAILED,
when=datetime.now(tz=timezone.utc)
+ )
Review Comment:
I'm not sure we should directly mark the task as failed here -- or at the
least I think the task subprocess should be told about it so that it can run
on_failure callbacks etc.
##########
airflow/api_fastapi/execution_api/datamodels/taskinstance.py:
##########
@@ -249,3 +249,10 @@ class PrevSuccessfulDagRunResponse(BaseModel):
data_interval_end: UtcDateTime | None = None
start_date: UtcDateTime | None = None
end_date: UtcDateTime | None = None
+
+
+class TIRuntimeCheckPayload(BaseModel):
+ """Payload for performing Runtime checks on the TaskInstance model as
requested by the SDK."""
+
+ inlet: list[AssetProfile] | None = None
+ outlet: list[AssetProfile] | None = None
Review Comment:
Nit:
```suggestion
inlets: list[AssetProfile] | None = None
outlets: list[AssetProfile] | None = None
```
##########
task_sdk/src/airflow/sdk/execution_time/task_runner.py:
##########
@@ -501,6 +503,22 @@ def run(ti: RuntimeTaskInstance, log: Logger):
# TODO: Get a real context object
ti.hostname = get_hostname()
ti.task = ti.task.prepare_for_execution()
+ if ti.task.inlets or ti.task.outlets:
Review Comment:
Nice
##########
task_sdk/src/airflow/sdk/execution_time/task_runner.py:
##########
@@ -501,6 +503,22 @@ def run(ti: RuntimeTaskInstance, log: Logger):
# TODO: Get a real context object
ti.hostname = get_hostname()
ti.task = ti.task.prepare_for_execution()
+ if ti.task.inlets or ti.task.outlets:
+ inlets = [
+ AssetProfile(name=x.name or None, uri=x.uri or None,
asset_type=Asset.__name__)
+ for x in ti.task.inlets
+ if isinstance(x, Asset)
+ ]
+ outlets = [
+ AssetProfile(name=x.name or None, uri=x.uri or None,
asset_type=Asset.__name__)
+ for x in ti.task.outlets
+ if isinstance(x, Asset)
+ ]
+ SUPERVISOR_COMMS.send_request(msg=RuntimeCheckOnTask(inlet=inlets,
outlet=outlets), log=log) # type: ignore
+ msg = SUPERVISOR_COMMS.get_message() # type: ignore
+ if isinstance(msg, OKResponse) and not msg.ok:
+ log.info("Runtime checks failed for task, marking task as
failed..")
+ return
Review Comment:
I don't see you sending the OKResponse message anywhere...
##########
task_sdk/src/airflow/sdk/execution_time/task_runner.py:
##########
@@ -501,6 +503,22 @@ def run(ti: RuntimeTaskInstance, log: Logger):
# TODO: Get a real context object
ti.hostname = get_hostname()
ti.task = ti.task.prepare_for_execution()
+ if ti.task.inlets or ti.task.outlets:
+ inlets = [
+ AssetProfile(name=x.name or None, uri=x.uri or None,
asset_type=Asset.__name__)
+ for x in ti.task.inlets
+ if isinstance(x, Asset)
Review Comment:
Is this worth adding an `asprofile` or something method on Asset so we can do
```suggestion
asset.asprofile() for asset in ti.task.inlets if
isinstance(asset, Asset)
```
##########
airflow/api_fastapi/execution_api/routes/task_instances.py:
##########
@@ -442,6 +444,48 @@ def get_previous_successful_dagrun(
return PrevSuccessfulDagRunResponse.model_validate(dag_run)
[email protected](
+ "/{task_instance_id}/runtime-checks",
+ status_code=status.HTTP_200_OK,
+ # TODO: Add description to the operation
+ # TODO: Add Operation ID to control the function name in the OpenAPI spec
+ # TODO: Do we need to use create_openapi_http_exception_doc here?
+ responses={
+ status.HTTP_400_BAD_REQUEST: {
+ "description": "Task Instance doesn't pass the required runtime
checks"
+ },
+ status.HTTP_204_NO_CONTENT: {
+ "description": "Task Instance is not in a running state, cannot
perform runtime checks."
+ },
+ status.HTTP_422_UNPROCESSABLE_ENTITY: {
+ "description": "Invalid payload for requested runtime checks on
the Task Instance."
+ },
+ },
+)
+def ti_runtime_checks(
+ task_instance_id: UUID,
+ payload: TIRuntimeCheckPayload,
+ session: SessionDep,
+):
+ ti_id_str = str(task_instance_id)
+ task_instance = session.scalar(select(TI).where(TI.id == ti_id_str))
+ if task_instance.state != State.RUNNING:
+ return Response(status_code=status.HTTP_204_NO_CONTENT)
Review Comment:
Returning a 204 if the task isn't in the right state doesn't feel right.
This feels more like it should be a HTTP 409 or a 400, and 204 No Content
should be for the success path
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]