jedcunningham commented on code in PR #45371:
URL: https://github.com/apache/airflow/pull/45371#discussion_r1906145369


##########
airflow/dag_processing/manager.py:
##########
@@ -631,25 +646,60 @@ def _get_priority_filelocs(cls, session: Session = 
NEW_SESSION):
             session.delete(request)
         return filelocs
 
-    def _refresh_dag_dir(self) -> bool:
-        """Refresh file paths from dag dir if we haven't done it for too 
long."""
-        now = time.monotonic()
-        elapsed_time_since_refresh = now - self.last_dag_dir_refresh_time
-        if elapsed_time_since_refresh <= self.dag_dir_list_interval:
-            return False
+    def _refresh_dag_bundles(self):
+        """Refresh DAG bundles, if required."""
+        now = timezone.utcnow()
 
-        # Build up a list of Python files that could contain DAGs
-        self.log.info("Searching for files in %s", self._dag_directory)
-        self._file_paths = list_py_file_paths(self._dag_directory)
-        self.last_dag_dir_refresh_time = now
-        self.log.info("There are %s files in %s", len(self._file_paths), 
self._dag_directory)
-        self.set_file_paths(self._file_paths)
+        self.log.info("Refreshing DAG bundles")
+
+        for bundle in self._dag_bundles:
+            # TODO: AIP-66 test to make sure we get a fresh record from the db 
and it's not cached
+            with create_session() as session:
+                bundle_model = session.query(DagBundleModel).get(bundle.name)
+            elapsed_time_since_refresh = (
+                now - (bundle_model.last_refreshed or timezone.utc_epoch())
+            ).total_seconds()
+            if not elapsed_time_since_refresh > bundle.refresh_interval:
+                # or bundle_model.version != bundle.get_current_version():
+                self.log.info("Not time to refresh %s", bundle.name)
+                continue
 
-        try:
-            self.log.debug("Removing old import errors")
-            self.clear_nonexistent_import_errors()
-        except Exception:
-            self.log.exception("Error removing old import errors")
+            # TODO: AIP-66 locking / dealing with multiple processors
+            self.log.info("Time to refresh %s", bundle.name)
+            old_version = bundle.get_current_version()
+            bundle.refresh()
+            bundle_model.last_refreshed = now
+
+            if old_version != bundle.get_current_version():
+                self.log.info(
+                    "Version changed for %s, new version: %s", bundle.name, 
bundle.get_current_version()
+                )
+            bundle_file_paths = self._refresh_dag_dir(bundle)
+            # remove all files from the bundle, then add the new ones
+            self._file_paths = [f for f in self._file_paths if f.bundle_name 
!= bundle_model.name]
+            self._file_paths.extend(
+                DagFileInfo(path=path, bundle_name=bundle_model.name) for path 
in bundle_file_paths
+            )
+
+            try:
+                self.log.debug("Removing old import errors")
+                self.clear_nonexistent_import_errors()
+            except Exception:
+                self.log.exception("Error removing old import errors")
+
+            self._bundle_versions[bundle_model.name] = 
bundle.get_current_version()
+            self.log.info("Found %s files for bundle %s", 
len(bundle_file_paths), bundle.name)
+            # TODO: AIP-66 detect if version changed and update accordingly
+
+    def _refresh_dag_dir(self, bundle: BaseDagBundle) -> list[str]:

Review Comment:
   No, not really :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to