ashb commented on code in PR #46677:
URL: https://github.com/apache/airflow/pull/46677#discussion_r1953560525
##########
airflow/jobs/triggerer_job_runner.py:
##########
@@ -415,33 +344,174 @@ def handle_failed_triggers(self):
Task Instances that depend on them need failing.
"""
- while self.trigger_runner.failed_triggers:
+ while self.failed_triggers:
# Tell the model to fail this trigger's deps
- trigger_id, saved_exc =
self.trigger_runner.failed_triggers.popleft()
+ trigger_id, saved_exc = self.failed_triggers.popleft()
Trigger.submit_failure(trigger_id=trigger_id, exc=saved_exc)
# Emit stat event
Stats.incr("triggers.failed")
- @add_span
def emit_metrics(self):
- Stats.gauge(f"triggers.running.{self.job.hostname}",
len(self.trigger_runner.triggers))
- Stats.gauge(
- "triggers.running", len(self.trigger_runner.triggers),
tags={"hostname": self.job.hostname}
- )
+ Stats.gauge(f"triggers.running.{self.job.hostname}",
len(self.running_triggers))
+ Stats.gauge("triggers.running", len(self.running_triggers),
tags={"hostname": self.job.hostname})
- capacity_left = self.capacity - len(self.trigger_runner.triggers)
+ capacity_left = self.capacity - len(self.running_triggers)
Stats.gauge(f"triggerer.capacity_left.{self.job.hostname}",
capacity_left)
Stats.gauge("triggerer.capacity_left", capacity_left,
tags={"hostname": self.job.hostname})
span = Trace.get_current_span()
span.set_attributes(
{
"trigger host": self.job.hostname,
- "triggers running": len(self.trigger_runner.triggers),
+ "triggers running": len(self.running_triggers),
"capacity left": capacity_left,
}
)
+ def update_triggers(self, requested_trigger_ids: set[int]):
+ """
+ Request that we update what triggers we're running.
+
+ Works out the differences - ones to add, and ones to remove - then
+ adds them to the deques so the subprocess can actually mutate the
running
+ trigger set.
+ """
+ render_log_fname = log_filename_template_renderer()
+
+ known_trigger_ids = (
+ self.running_triggers.union(x[0] for x in self.events)
+ .union(self.cancelling_triggers)
+ # .union(x.id for x in self.to_create)
+ .union(trigger[0] for trigger in self.failed_triggers)
+ )
+ # Work out the two difference sets
+ new_trigger_ids = requested_trigger_ids - known_trigger_ids
+ cancel_trigger_ids = self.running_triggers - requested_trigger_ids
+ # Bulk-fetch new trigger records
+ new_triggers = Trigger.bulk_fetch(new_trigger_ids)
+ triggers_with_assets = Trigger.fetch_trigger_ids_with_asset()
+ to_create: list[workloads.RunTrigger] = []
+ # Add in new triggers
+ for new_id in new_trigger_ids:
+ # Check it didn't vanish in the meantime
+ if new_id not in new_triggers:
+ log.warning("Trigger disappeared before we could start it",
id=new_id)
+ continue
+
+ new_trigger_orm = new_triggers[new_id]
+
+ # If the trigger is not associated to a task or an asset, this
means the TaskInstance
+ # row was updated by either Trigger.submit_event or
Trigger.submit_failure
+ # and can happen when a single trigger Job is being run on
multiple TriggerRunners
+ # in a High-Availability setup.
+ if new_trigger_orm.task_instance is None and new_id not in
triggers_with_assets:
+ log.info(
+ (
+ "TaskInstance Trigger is None. It was likely updated
by another trigger job. "
+ "Skipping trigger instantiation."
+ ),
+ id=new_id,
+ )
+ continue
+
+ workload = workloads.RunTrigger(
+ classpath=new_trigger_orm.classpath,
+ id=new_id,
+ kwargs=new_trigger_orm.kwargs,
+ ti=None,
+ )
+ if new_trigger_orm.task_instance:
+ log_path = render_log_fname(ti=new_trigger_orm.task_instance)
+ # When producing logs from TIs, include the job id producing
the logs to disambiguate it.
+ self.logger_cache[new_id] = TriggerLoggingFactory(
+ log_path=f"{log_path}.trigger.{self.job.id}.log"
+ )
+
+ ser_ti = workloads.TaskInstance.model_validate(
+ new_trigger_orm.task_instance, from_attributes=True
+ )
+ workload.ti = ser_ti
+
+ to_create.append(workload)
+
+ # TODO: Send to subprocess
Review Comment:
```suggestion
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]