uranusjr commented on code in PR #41232:
URL: https://github.com/apache/airflow/pull/41232#discussion_r1716332040
##########
airflow/jobs/triggerer_job_runner.py:
##########
@@ -751,3 +757,28 @@ def get_trigger_by_classpath(self, classpath: str) ->
type[BaseTrigger]:
if classpath not in self.trigger_cache:
self.trigger_cache[classpath] = import_string(classpath)
return self.trigger_cache[classpath]
+
+ @staticmethod
+ def add_trigger_cancel_reasons(
+ triggerer_id, cancel_trigger_ids: set[int]
+ ) -> list[tuple[int, TriggerTerminationReason]]:
+ """
+ Add trigger cancel reasons to give consumer more context.
+
+ Currently, we only distinguish between reassigned and other reasons.
+ """
+
+ def add_reason(
+ ids: set[int], reason: TriggerTerminationReason
+ ) -> list[tuple[int, TriggerTerminationReason]]:
+ return [(trigger_id, reason) for trigger_id in ids]
+
+ # find out reassigned triggers
+ reassigned_trigger_ids =
set(Trigger.filter_out_reassigned_triggers(triggerer_id, cancel_trigger_ids))
Review Comment:
Why does `filter_out_reassigned_triggers` return a list and this immediately
converts it into a set? Why not just return a set directly instead?
##########
tests/jobs/test_triggerer_job.py:
##########
@@ -787,3 +801,323 @@ def non_pytest_handlers(val):
assert qh.__class__ == LocalQueueHandler
assert qh.queue == listener.queue
listener.stop()
+
+
+class RemoteJob:
+ RUNNING_STATUS = "running"
+ FAILED_STATUS = "failed"
+ SUCCESS_STATUS = "success"
+
+ def __init__(self, status):
+ self.status = status
+
+ def get_status(self):
+ return self.status
+
+ def set_status(self, status):
+ self.status = status
+
+ def kill(self):
+ self.set_status(RemoteJob.FAILED_STATUS)
+
+
+class RemoteService:
Review Comment:
These singleton classes (this one and the one below) are a bit overkill IMO.
They can just be regular classes and instantiate regular test-scoped objects.
It’d be much easier to debug too.
##########
airflow/triggers/base.py:
##########
@@ -115,6 +116,19 @@ async def cleanup(self) -> None:
and handle it appropriately (in async-compatible way).
"""
+ def should_cleanup(self, termination_reason: TriggerTerminationReason |
None) -> bool:
+ """
+ Check the trigger should be cleaned up or not base on the context.
+
+ :param termination_reason: The reason for terminating the trigger.
+ Since the trigger could be terminated for various reasons, like
triggerer restart or reassigned to
+ another triggerer, this method allows the trigger to decide whether it
should be cleaned up under the
+ current circumstances.
+
+ By default, will always return True, override this method base custom
requirements.
Review Comment:
Is this only for testing purposes? The docstring should say so instead if
that’s the case.
##########
tests/jobs/test_triggerer_job.py:
##########
@@ -787,3 +801,323 @@ def non_pytest_handlers(val):
assert qh.__class__ == LocalQueueHandler
assert qh.queue == listener.queue
listener.stop()
+
+
+class RemoteJob:
+ RUNNING_STATUS = "running"
+ FAILED_STATUS = "failed"
+ SUCCESS_STATUS = "success"
+
+ def __init__(self, status):
+ self.status = status
+
+ def get_status(self):
+ return self.status
+
+ def set_status(self, status):
+ self.status = status
+
+ def kill(self):
+ self.set_status(RemoteJob.FAILED_STATUS)
+
+
+class RemoteService:
+ _instance = None
+
+ event_queue: list[tuple[RemoteJobTrigger, int]]
+ remote_jobs: dict[int, RemoteJob]
+
+ def __new__(cls):
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ cls._instance.event_queue = []
+ cls._instance.remote_jobs = {
+ 1: RemoteJob(RemoteJob.RUNNING_STATUS),
+ }
+ return cls._instance
+
+ def complete_job(self, job_id: int):
+ self.remote_jobs[job_id].set_status(RemoteJob.SUCCESS_STATUS)
+
+ def kill_job(self, caller: RemoteJobTrigger, job_id: int):
+ self.event_queue.append((caller, job_id))
+ self.remote_jobs[job_id].kill()
+
+ def get_job(self, job_id: int):
+ return self.remote_jobs[job_id]
+
+ def reset(self):
+ self.event_queue = []
+ self.remote_jobs = {
+ 1: RemoteJob(RemoteJob.RUNNING_STATUS),
+ }
+
+
+class TriggerInstances:
+ _instance = None
+
+ def __new__(cls):
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ cls._instance.trigger_instances = []
+ return cls._instance
+
+ def reset(self):
+ self.trigger_instances.clear()
+
+ def add_instances(self, instance):
+ self.trigger_instances.append(instance)
+
+
+class RemoteJobTrigger(BaseTrigger):
+ def __init__(self, remote_job_id, cleanup_on_reassignment: bool):
+ super().__init__()
+ self.remote_job_id = remote_job_id
+ self.finished = False
+ self.cleanup_done = False
+ self.last_status = None
+ self.cleanup_on_reassignment = cleanup_on_reassignment
+ TriggerInstances().add_instances(self)
+
+ def get_status(self):
+ return RemoteService().get_job(self.remote_job_id).get_status()
+
+ async def run(self):
+ print(f"Trigger object {id(self)}: start to run")
+
+ while self.get_status() == RemoteJob.RUNNING_STATUS:
+ await asyncio.sleep(0.1)
+ self.finished = True
+ print(f"Trigger object {id(self)}: finished with status:
{self.get_status()}")
+ yield TriggerEvent(self.remote_job_id)
+
+ async def cleanup(self) -> None:
+ RemoteService().kill_job(self, self.remote_job_id)
+ self.cleanup_done = True
+ print(f"Trigger object {id(self)}: cleanup done")
+
+ def should_cleanup(self, termination_reason: TriggerTerminationReason |
None) -> bool:
+ return not (
+ termination_reason == TriggerTerminationReason.REASSIGNED and not
self.cleanup_on_reassignment
+ )
Review Comment:
```suggestion
return (
termination_reason != TriggerTerminationReason.REASSIGNED
or self.cleanup_on_reassignment
)
```
(Or is more difficult to understand?)
##########
airflow/jobs/triggerer_job_runner.py:
##########
@@ -751,3 +757,28 @@ def get_trigger_by_classpath(self, classpath: str) ->
type[BaseTrigger]:
if classpath not in self.trigger_cache:
self.trigger_cache[classpath] = import_string(classpath)
return self.trigger_cache[classpath]
+
+ @staticmethod
+ def add_trigger_cancel_reasons(
+ triggerer_id, cancel_trigger_ids: set[int]
+ ) -> list[tuple[int, TriggerTerminationReason]]:
+ """
+ Add trigger cancel reasons to give consumer more context.
+
+ Currently, we only distinguish between reassigned and other reasons.
+ """
+
+ def add_reason(
+ ids: set[int], reason: TriggerTerminationReason
+ ) -> list[tuple[int, TriggerTerminationReason]]:
+ return [(trigger_id, reason) for trigger_id in ids]
+
+ # find out reassigned triggers
+ reassigned_trigger_ids =
set(Trigger.filter_out_reassigned_triggers(triggerer_id, cancel_trigger_ids))
+
+ other_reasons_trigger_ids = cancel_trigger_ids - reassigned_trigger_ids
+
+ trigger_id_reasons = add_reason(reassigned_trigger_ids,
TriggerTerminationReason.REASSIGNED)
+ trigger_id_reasons.extend(add_reason(other_reasons_trigger_ids,
TriggerTerminationReason.OTHER))
+
+ return trigger_id_reasons
Review Comment:
```suggestion
reassigned_pairs = (
(trigger_id, TriggerTerminationReason.REASSIGNED)
for trigger_id in reassigned_trigger_ids
)
other_reason_pairs = (
(trigger_id, TriggerTerminationReason.OTHER)
for trigger_id in other_reasons_trigger_ids
)
return [*reassigned_pairs, *other_reason_pairs]
```
Same as your code but easier to read IMO. The additional `add_reason`
function doesn’t make things easier.
##########
tests/jobs/test_triggerer_job.py:
##########
@@ -787,3 +801,323 @@ def non_pytest_handlers(val):
assert qh.__class__ == LocalQueueHandler
assert qh.queue == listener.queue
listener.stop()
+
+
+class RemoteJob:
+ RUNNING_STATUS = "running"
+ FAILED_STATUS = "failed"
+ SUCCESS_STATUS = "success"
+
+ def __init__(self, status):
+ self.status = status
+
+ def get_status(self):
+ return self.status
+
+ def set_status(self, status):
+ self.status = status
+
+ def kill(self):
+ self.set_status(RemoteJob.FAILED_STATUS)
+
+
+class RemoteService:
+ _instance = None
+
+ event_queue: list[tuple[RemoteJobTrigger, int]]
+ remote_jobs: dict[int, RemoteJob]
+
+ def __new__(cls):
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ cls._instance.event_queue = []
+ cls._instance.remote_jobs = {
+ 1: RemoteJob(RemoteJob.RUNNING_STATUS),
+ }
+ return cls._instance
+
+ def complete_job(self, job_id: int):
+ self.remote_jobs[job_id].set_status(RemoteJob.SUCCESS_STATUS)
+
+ def kill_job(self, caller: RemoteJobTrigger, job_id: int):
+ self.event_queue.append((caller, job_id))
+ self.remote_jobs[job_id].kill()
+
+ def get_job(self, job_id: int):
+ return self.remote_jobs[job_id]
+
+ def reset(self):
+ self.event_queue = []
+ self.remote_jobs = {
+ 1: RemoteJob(RemoteJob.RUNNING_STATUS),
+ }
+
+
+class TriggerInstances:
+ _instance = None
+
+ def __new__(cls):
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ cls._instance.trigger_instances = []
+ return cls._instance
+
+ def reset(self):
+ self.trigger_instances.clear()
+
+ def add_instances(self, instance):
+ self.trigger_instances.append(instance)
+
+
+class RemoteJobTrigger(BaseTrigger):
+ def __init__(self, remote_job_id, cleanup_on_reassignment: bool):
+ super().__init__()
+ self.remote_job_id = remote_job_id
+ self.finished = False
+ self.cleanup_done = False
+ self.last_status = None
+ self.cleanup_on_reassignment = cleanup_on_reassignment
+ TriggerInstances().add_instances(self)
+
+ def get_status(self):
+ return RemoteService().get_job(self.remote_job_id).get_status()
+
+ async def run(self):
+ print(f"Trigger object {id(self)}: start to run")
+
+ while self.get_status() == RemoteJob.RUNNING_STATUS:
+ await asyncio.sleep(0.1)
+ self.finished = True
+ print(f"Trigger object {id(self)}: finished with status:
{self.get_status()}")
Review Comment:
These prints should be removed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]