ashb commented on a change in pull request #16182:
URL: https://github.com/apache/airflow/pull/16182#discussion_r663758644
##########
File path: airflow/models/dagbag.py
##########
@@ -166,6 +166,34 @@ def dag_ids(self) -> List[str]:
"""
return list(self.dags.keys())
+ @provide_session
+ def has_dag(self, dag_id: str, session: Session = None):
+ """
+ Checks the "local" cache, if it exists, and is not older than the
configured cache time, return True
+ else check in the DB if the dag exists, return True, False otherwise
+ """
+ from airflow.models.serialized_dag import SerializedDagModel
+
+ sd_last_updated_datetime =
SerializedDagModel.get_last_updated_datetime(
+ dag_id=dag_id,
+ session=session,
+ )
+ sd_has_dag = sd_last_updated_datetime is not None
+ if dag_id not in self.dags:
+ return sd_has_dag
+ if dag_id not in self.dags_last_fetched:
+ return sd_has_dag
+ min_serialized_dag_fetch_secs =
timedelta(seconds=settings.MIN_SERIALIZED_DAG_FETCH_INTERVAL)
+ if timezone.utcnow() < self.dags_last_fetched[dag_id] +
min_serialized_dag_fetch_secs:
+ return sd_has_dag
+ if sd_has_dag:
+ return True
Review comment:
We should refactor this to delay the DB check until we need it -- for
instance if we have the dag locally, and it was fetched less than the
configured timeout already, then we don't need to ask the DB
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]