dstandish commented on code in PR #46248:
URL: https://github.com/apache/airflow/pull/46248#discussion_r1965244888


##########
airflow/models/backfill.py:
##########
@@ -263,57 +286,54 @@ def _create_backfill_dag_run(
     backfill_sort_ordinal,
     session,
 ):
-    with session.begin_nested() as nested:
-        dr = session.scalar(
-            with_row_locks(
-                query=_get_latest_dag_run_row_query(info, session),
-                session=session,
-            ),
+    with session.begin_nested():
+        should_skip_create_backfill = should_create_backfill_dag_run(
+            info, reprocess_behavior, backfill_id, backfill_sort_ordinal, 
session
         )
-        if dr:
-            non_create_reason = _get_dag_run_no_create_reason(dr, 
reprocess_behavior)
-            if non_create_reason:
-                # rolling back here restores to start of this nested tran
-                # which releases the lock on the latest dag run, since we
-                # are not creating a new one
-                nested.rollback()
-                session.add(
-                    BackfillDagRun(
-                        backfill_id=backfill_id,
-                        dag_run_id=None,
-                        logical_date=info.logical_date,
-                        exception_reason=non_create_reason,
-                        sort_ordinal=backfill_sort_ordinal,
-                    )
-                )
-                return
+        if should_skip_create_backfill:
+            return
+
         dag_version = DagVersion.get_latest_version(dag.dag_id, 
session=session)
-        dr = dag.create_dagrun(
-            run_id=dag.timetable.generate_run_id(
-                run_type=DagRunType.BACKFILL_JOB,
+        try:
+            dr = dag.create_dagrun(
+                run_id=dag.timetable.generate_run_id(
+                    run_type=DagRunType.BACKFILL_JOB,
+                    logical_date=info.logical_date,
+                    data_interval=info.data_interval,
+                ),
                 logical_date=info.logical_date,
                 data_interval=info.data_interval,
-            ),
-            logical_date=info.logical_date,
-            data_interval=info.data_interval,
-            run_after=info.run_after,
-            conf=dag_run_conf,
-            run_type=DagRunType.BACKFILL_JOB,
-            triggered_by=DagRunTriggeredByType.BACKFILL,
-            dag_version=dag_version,
-            state=DagRunState.QUEUED,
-            start_date=timezone.utcnow(),
-            backfill_id=backfill_id,
-            session=session,
-        )
-        session.add(
-            BackfillDagRun(
+                run_after=info.run_after,
+                conf=dag_run_conf,
+                run_type=DagRunType.BACKFILL_JOB,
+                triggered_by=DagRunTriggeredByType.BACKFILL,
+                dag_version=dag_version,
+                state=DagRunState.QUEUED,
+                start_date=timezone.utcnow(),
                 backfill_id=backfill_id,
-                dag_run_id=dr.id,
-                sort_ordinal=backfill_sort_ordinal,
-                logical_date=info.logical_date,
+                session=session,
+            )
+            session.add(
+                BackfillDagRun(
+                    backfill_id=backfill_id,
+                    dag_run_id=dr.id,
+                    sort_ordinal=backfill_sort_ordinal,
+                    logical_date=info.logical_date,
+                )
+            )
+        except IntegrityError:
+            log.info(
+                "Skipped creating backfill dag run for dag_id=%s 
backfill_id=%s, logical_date=%s (already exists)",
+                dr.dag_id,
+                dr.id,
+                info.logical_date,
+            )
+            log.info("Doing session rollback.")
+            session.rollback()
+
+            should_create_backfill_dag_run(

Review Comment:
   yeah the problem here is in part naming.
   
   a function called "should_create_x" reads like it will just give you 
information.  so you would not expect it to be creating records, or to depend 
on it to create a record.
   
   otherwise it's confusing.  you could either get rid of this method, or limit 
it to only give info (e.g. maybe just return `BackfillDagRunExceptionReason | 
None` and do the insert conditionally at call site) 
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to