Lee-W commented on code in PR #59115:
URL: https://github.com/apache/airflow/pull/59115#discussion_r2689806730
##########
airflow-core/src/airflow/dag_processing/collection.py:
##########
@@ -144,7 +178,23 @@ def calculate(cls, dag: LazyDeserializedDAG, *, session:
Session) -> Self:
if not dag.timetable.can_be_scheduled:
return cls(None, 0)
- latest_run = session.scalar(_get_latest_runs_stmt(dag_id=dag.dag_id))
+ if isinstance( # todo: AIP-76 what's a more general way to detect?
Review Comment:
If we're not adding a new attribute, then this is probably what we can do?
##########
airflow-core/src/airflow/jobs/scheduler_job_runner.py:
##########
@@ -1765,24 +1767,48 @@ def _create_dag_runs(self, dag_models:
Collection[DagModel], session: Session) -
# as DagModel.dag_id and DagModel.next_dagrun
# This list is used to verify if the DagRun already exist so that we
don't attempt to create
# duplicate DagRuns
- existing_dagruns = (
- session.execute(
- select(DagRun.dag_id, DagRun.logical_date).where(
+ existing_dagrun_objects = (
+ session.scalars(
+ select(DagRun)
+ .where(
tuple_(DagRun.dag_id, DagRun.logical_date).in_(
(dm.dag_id, dm.next_dagrun) for dm in dag_models
- ),
+ )
)
+ .options(load_only(DagRun.dag_id, DagRun.logical_date))
)
.unique()
.all()
)
+ existing_dagruns = {(x.dag_id, x.logical_date): x for x in
existing_dagrun_objects}
+
+ # todo: AIP-76 we may want to update check existing to also check
partitioned dag runs,
+ # but the thing is, there is not actually a restriction that
+ # we don't create new runs with the same partition key
+ # so it's unclear whether we should / need to.
+
+ partitioned_dags = set()
+ non_partitioned_dags: list[DagModel] = []
+ missing_dags = set()
+ serdags: dict[str, SerializedDAG] = {}
+ for dag in dag_models:
+ serdag = _get_current_dag(dag_id=dag.dag_id, session=session)
+ if serdag:
+ serdags[serdag.dag_id] = serdag
+ if isinstance(serdag.timetable, CronPartitionTimetable):
+ # todo: AIP-76 there may be a better way to identify this!
+ # should we use an attribute on BaseTimetable instead?
Review Comment:
a base partition timetable or an attribute will do. I kinda like base
partition timetable better
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]