Lee-W commented on code in PR #59115:
URL: https://github.com/apache/airflow/pull/59115#discussion_r2700711040
##########
airflow-core/src/airflow/jobs/scheduler_job_runner.py:
##########
@@ -1765,24 +1767,48 @@ def _create_dag_runs(self, dag_models:
Collection[DagModel], session: Session) -
# as DagModel.dag_id and DagModel.next_dagrun
# This list is used to verify if the DagRun already exist so that we
don't attempt to create
# duplicate DagRuns
- existing_dagruns = (
- session.execute(
- select(DagRun.dag_id, DagRun.logical_date).where(
+ existing_dagrun_objects = (
+ session.scalars(
+ select(DagRun)
+ .where(
tuple_(DagRun.dag_id, DagRun.logical_date).in_(
(dm.dag_id, dm.next_dagrun) for dm in dag_models
- ),
+ )
)
+ .options(load_only(DagRun.dag_id, DagRun.logical_date))
)
.unique()
.all()
)
+ existing_dagruns = {(x.dag_id, x.logical_date): x for x in
existing_dagrun_objects}
+
+ # todo: AIP-76 we may want to update check existing to also check
partitioned dag runs,
+ # but the thing is, there is not actually a restriction that
+ # we don't create new runs with the same partition key
+ # so it's unclear whether we should / need to.
+
+ partitioned_dags = set()
+ non_partitioned_dags: list[DagModel] = []
+ missing_dags = set()
+ serdags: dict[str, SerializedDAG] = {}
+ for dag in dag_models:
+ serdag = _get_current_dag(dag_id=dag.dag_id, session=session)
+ if serdag:
+ serdags[serdag.dag_id] = serdag
+ if isinstance(serdag.timetable, CronPartitionTimetable):
+ # todo: AIP-76 there may be a better way to identify this!
+ # should we use an attribute on BaseTimetable instead?
Review Comment:
Yep, I also remember that. Either way works, I think. No strong opinion
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]