rusackas commented on code in PR #40084:
URL: https://github.com/apache/superset/pull/40084#discussion_r3263875193
##########
superset/extensions/local_extensions_watcher.py:
##########
@@ -29,37 +29,113 @@
logger = logging.getLogger(__name__)
+# Sentinel file Flask watches via --extra-files. Touching it on a real change
+# triggers a server reload without depending on cwd or the location of any
+# Python source file.
+RELOAD_TRIGGER = Path(__file__).resolve().parent / ".reload_trigger"
+
# Guard to prevent multiple initializations
_watcher_initialized = False
_watcher_lock = threading.Lock()
-def _get_file_handler_class() -> Any:
+def _get_file_handler_class() -> Any: # noqa: C901
"""Get the file handler class, importing watchdog only when needed."""
try:
- from watchdog.events import FileSystemEventHandler
+ import hashlib
+
+ from watchdog.events import (
+ FileCreatedEvent,
+ FileModifiedEvent,
+ FileMovedEvent,
+ FileSystemEventHandler,
+ )
class LocalExtensionFileHandler(FileSystemEventHandler):
- """Custom file system event handler for LOCAL_EXTENSIONS
directories."""
+ """Custom file system event handler for LOCAL_EXTENSIONS
directories.
+
+ Only reacts to genuine content changes (create / modify / move) in
the
+ dist directory, verified by comparing a SHA-256 of the file's
content.
+ This avoids the Docker VirtioFS / osxfs problem where reading a
file
+ generates inotify events that watchdog surfaces as modifications.
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+ # sha256 of last-seen content, keyed by absolute path
+ self._file_hashes: dict[str, str] = {}
+ # Deduplicate: only trigger once per second across all files
+ self._last_trigger: float = 0.0
+ self._lock = threading.Lock()
+
+ # ── helpers ──────────────────────────────────────────────────────
+
+ @staticmethod
+ def _sha256(path: str) -> str | None:
+ try:
+ with open(path, "rb") as fh:
+ return hashlib.sha256(fh.read()).hexdigest()
+ except OSError:
+ return None
+
+ def _content_changed(self, path: str) -> bool:
+ """Return True only when the file's content differs from last
seen.
+
+ The first time a path is observed its hash is stored as the
baseline
+ and False is returned — that event is a 'first-seen', not a
change.
+ Only a subsequent event where the digest differs from the
baseline
+ is treated as a genuine content change.
+ """
+ digest = self._sha256(path)
+ if digest is None:
+ return False
+ old_digest = self._file_hashes.get(path)
+ self._file_hashes[path] = digest
+ if old_digest is None:
+ # First observation — record baseline, do not trigger
restart.
+ return False
Review Comment:
Applied in e7a863b0 (combined fix for all four review concerns). See commit
message for the four changes — short version: pre-populated baseline hashes so
the first edit isn't dropped, switched moves to use `dest_path`, moves now
trigger regardless of content match, and replaced leading-edge dedupe with
trailing debounce via `threading.Timer`.
##########
superset/extensions/local_extensions_watcher.py:
##########
@@ -29,37 +29,113 @@
logger = logging.getLogger(__name__)
+# Sentinel file Flask watches via --extra-files. Touching it on a real change
+# triggers a server reload without depending on cwd or the location of any
+# Python source file.
+RELOAD_TRIGGER = Path(__file__).resolve().parent / ".reload_trigger"
+
# Guard to prevent multiple initializations
_watcher_initialized = False
_watcher_lock = threading.Lock()
-def _get_file_handler_class() -> Any:
+def _get_file_handler_class() -> Any: # noqa: C901
"""Get the file handler class, importing watchdog only when needed."""
try:
- from watchdog.events import FileSystemEventHandler
+ import hashlib
+
+ from watchdog.events import (
+ FileCreatedEvent,
+ FileModifiedEvent,
+ FileMovedEvent,
+ FileSystemEventHandler,
+ )
class LocalExtensionFileHandler(FileSystemEventHandler):
- """Custom file system event handler for LOCAL_EXTENSIONS
directories."""
+ """Custom file system event handler for LOCAL_EXTENSIONS
directories.
+
+ Only reacts to genuine content changes (create / modify / move) in
the
+ dist directory, verified by comparing a SHA-256 of the file's
content.
+ This avoids the Docker VirtioFS / osxfs problem where reading a
file
+ generates inotify events that watchdog surfaces as modifications.
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+ # sha256 of last-seen content, keyed by absolute path
+ self._file_hashes: dict[str, str] = {}
+ # Deduplicate: only trigger once per second across all files
+ self._last_trigger: float = 0.0
+ self._lock = threading.Lock()
+
+ # ── helpers ──────────────────────────────────────────────────────
+
+ @staticmethod
+ def _sha256(path: str) -> str | None:
+ try:
+ with open(path, "rb") as fh:
+ return hashlib.sha256(fh.read()).hexdigest()
+ except OSError:
+ return None
+
+ def _content_changed(self, path: str) -> bool:
+ """Return True only when the file's content differs from last
seen.
+
+ The first time a path is observed its hash is stored as the
baseline
+ and False is returned — that event is a 'first-seen', not a
change.
+ Only a subsequent event where the digest differs from the
baseline
+ is treated as a genuine content change.
+ """
+ digest = self._sha256(path)
+ if digest is None:
+ return False
+ old_digest = self._file_hashes.get(path)
+ self._file_hashes[path] = digest
+ if old_digest is None:
+ # First observation — record baseline, do not trigger
restart.
+ return False
+ return old_digest != digest
+
+ # ── event handler
─────────────────────────────────────────────────
def on_any_event(self, event: Any) -> None:
- """Handle any file system event in the watched directories."""
+ """Handle file system events in the watched directories."""
if event.is_directory:
return
- # Only trigger on changes to files in `dist` directory
+ # Only react to true write events; skip access / close / open
etc.
+ if not isinstance(
+ event, (FileCreatedEvent, FileModifiedEvent,
FileMovedEvent)
+ ):
+ return
+
+ # Only care about files inside a `dist` directory
src = getattr(event, "src_path", None)
if not isinstance(src, str) or "dist" not in Path(src).parts:
return
+ # Verify the file content actually changed to ignore spurious
+ # inotify events generated by Docker bind-mount reads.
+ if not self._content_changed(src):
+ return
Review Comment:
Applied in e7a863b0 (combined fix for all four review concerns). See commit
message for the four changes — short version: pre-populated baseline hashes so
the first edit isn't dropped, switched moves to use `dest_path`, moves now
trigger regardless of content match, and replaced leading-edge dedupe with
trailing debounce via `threading.Timer`.
##########
superset/extensions/local_extensions_watcher.py:
##########
@@ -29,37 +29,113 @@
logger = logging.getLogger(__name__)
+# Sentinel file Flask watches via --extra-files. Touching it on a real change
+# triggers a server reload without depending on cwd or the location of any
+# Python source file.
+RELOAD_TRIGGER = Path(__file__).resolve().parent / ".reload_trigger"
+
# Guard to prevent multiple initializations
_watcher_initialized = False
_watcher_lock = threading.Lock()
-def _get_file_handler_class() -> Any:
+def _get_file_handler_class() -> Any: # noqa: C901
"""Get the file handler class, importing watchdog only when needed."""
try:
- from watchdog.events import FileSystemEventHandler
+ import hashlib
+
+ from watchdog.events import (
+ FileCreatedEvent,
+ FileModifiedEvent,
+ FileMovedEvent,
+ FileSystemEventHandler,
+ )
class LocalExtensionFileHandler(FileSystemEventHandler):
- """Custom file system event handler for LOCAL_EXTENSIONS
directories."""
+ """Custom file system event handler for LOCAL_EXTENSIONS
directories.
+
+ Only reacts to genuine content changes (create / modify / move) in
the
+ dist directory, verified by comparing a SHA-256 of the file's
content.
+ This avoids the Docker VirtioFS / osxfs problem where reading a
file
+ generates inotify events that watchdog surfaces as modifications.
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+ # sha256 of last-seen content, keyed by absolute path
+ self._file_hashes: dict[str, str] = {}
+ # Deduplicate: only trigger once per second across all files
+ self._last_trigger: float = 0.0
+ self._lock = threading.Lock()
+
+ # ── helpers ──────────────────────────────────────────────────────
+
+ @staticmethod
+ def _sha256(path: str) -> str | None:
+ try:
+ with open(path, "rb") as fh:
+ return hashlib.sha256(fh.read()).hexdigest()
+ except OSError:
+ return None
+
+ def _content_changed(self, path: str) -> bool:
+ """Return True only when the file's content differs from last
seen.
+
+ The first time a path is observed its hash is stored as the
baseline
+ and False is returned — that event is a 'first-seen', not a
change.
+ Only a subsequent event where the digest differs from the
baseline
+ is treated as a genuine content change.
+ """
+ digest = self._sha256(path)
+ if digest is None:
+ return False
+ old_digest = self._file_hashes.get(path)
+ self._file_hashes[path] = digest
+ if old_digest is None:
+ # First observation — record baseline, do not trigger
restart.
+ return False
+ return old_digest != digest
+
+ # ── event handler
─────────────────────────────────────────────────
def on_any_event(self, event: Any) -> None:
- """Handle any file system event in the watched directories."""
+ """Handle file system events in the watched directories."""
if event.is_directory:
return
- # Only trigger on changes to files in `dist` directory
+ # Only react to true write events; skip access / close / open
etc.
+ if not isinstance(
+ event, (FileCreatedEvent, FileModifiedEvent,
FileMovedEvent)
+ ):
+ return
+
+ # Only care about files inside a `dist` directory
src = getattr(event, "src_path", None)
if not isinstance(src, str) or "dist" not in Path(src).parts:
return
+ # Verify the file content actually changed to ignore spurious
+ # inotify events generated by Docker bind-mount reads.
+ if not self._content_changed(src):
+ return
+
+ # Debounce: one restart per second max, regardless of how many
+ # files webpack writes simultaneously.
+ now = time.monotonic()
+ with self._lock:
+ if now - self._last_trigger < 1.0:
+ return
+ self._last_trigger = now
Review Comment:
Applied in e7a863b0 (combined fix for all four review concerns). See commit
message for the four changes — short version: pre-populated baseline hashes so
the first edit isn't dropped, switched moves to use `dest_path`, moves now
trigger regardless of content match, and replaced leading-edge dedupe with
trailing debounce via `threading.Timer`.
##########
superset/extensions/local_extensions_watcher.py:
##########
@@ -29,37 +29,113 @@
logger = logging.getLogger(__name__)
+# Sentinel file Flask watches via --extra-files. Touching it on a real change
+# triggers a server reload without depending on cwd or the location of any
+# Python source file.
+RELOAD_TRIGGER = Path(__file__).resolve().parent / ".reload_trigger"
+
# Guard to prevent multiple initializations
_watcher_initialized = False
_watcher_lock = threading.Lock()
-def _get_file_handler_class() -> Any:
+def _get_file_handler_class() -> Any: # noqa: C901
"""Get the file handler class, importing watchdog only when needed."""
try:
- from watchdog.events import FileSystemEventHandler
+ import hashlib
+
+ from watchdog.events import (
+ FileCreatedEvent,
+ FileModifiedEvent,
+ FileMovedEvent,
+ FileSystemEventHandler,
+ )
class LocalExtensionFileHandler(FileSystemEventHandler):
- """Custom file system event handler for LOCAL_EXTENSIONS
directories."""
+ """Custom file system event handler for LOCAL_EXTENSIONS
directories.
+
+ Only reacts to genuine content changes (create / modify / move) in
the
+ dist directory, verified by comparing a SHA-256 of the file's
content.
+ This avoids the Docker VirtioFS / osxfs problem where reading a
file
+ generates inotify events that watchdog surfaces as modifications.
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+ # sha256 of last-seen content, keyed by absolute path
+ self._file_hashes: dict[str, str] = {}
+ # Deduplicate: only trigger once per second across all files
+ self._last_trigger: float = 0.0
+ self._lock = threading.Lock()
+
+ # ── helpers ──────────────────────────────────────────────────────
+
+ @staticmethod
+ def _sha256(path: str) -> str | None:
+ try:
+ with open(path, "rb") as fh:
+ return hashlib.sha256(fh.read()).hexdigest()
+ except OSError:
+ return None
+
+ def _content_changed(self, path: str) -> bool:
+ """Return True only when the file's content differs from last
seen.
+
+ The first time a path is observed its hash is stored as the
baseline
+ and False is returned — that event is a 'first-seen', not a
change.
+ Only a subsequent event where the digest differs from the
baseline
+ is treated as a genuine content change.
+ """
+ digest = self._sha256(path)
+ if digest is None:
+ return False
+ old_digest = self._file_hashes.get(path)
+ self._file_hashes[path] = digest
+ if old_digest is None:
+ # First observation — record baseline, do not trigger
restart.
+ return False
+ return old_digest != digest
+
+ # ── event handler
─────────────────────────────────────────────────
def on_any_event(self, event: Any) -> None:
- """Handle any file system event in the watched directories."""
+ """Handle file system events in the watched directories."""
if event.is_directory:
return
- # Only trigger on changes to files in `dist` directory
+ # Only react to true write events; skip access / close / open
etc.
+ if not isinstance(
+ event, (FileCreatedEvent, FileModifiedEvent,
FileMovedEvent)
+ ):
+ return
+
+ # Only care about files inside a `dist` directory
src = getattr(event, "src_path", None)
if not isinstance(src, str) or "dist" not in Path(src).parts:
return
+ # Verify the file content actually changed to ignore spurious
+ # inotify events generated by Docker bind-mount reads.
+ if not self._content_changed(src):
+ return
+
+ # Debounce: one restart per second max, regardless of how many
+ # files webpack writes simultaneously.
+ now = time.monotonic()
+ with self._lock:
+ if now - self._last_trigger < 1.0:
+ return
+ self._last_trigger = now
+
logger.info(
"File change detected in LOCAL_EXTENSIONS: %s",
event.src_path
)
- # Touch superset/__init__.py to trigger Flask's file watcher
- superset_init = Path("superset/__init__.py")
- logger.info("Triggering restart by touching %s", superset_init)
- os.utime(superset_init, (time.time(), time.time()))
+ # Touch the dedicated reload-trigger sentinel file.
+ # Flask watches this via --extra-files; it is never read by
Python
+ # so Docker VirtioFS will not generate spurious inotify events
on it.
+ logger.info("Triggering restart by touching %s",
RELOAD_TRIGGER)
+ os.utime(RELOAD_TRIGGER, (time.time(), time.time()))
Review Comment:
Applied in e7a863b0 (combined fix for all four review concerns). See commit
message for the four changes — short version: pre-populated baseline hashes so
the first edit isn't dropped, switched moves to use `dest_path`, moves now
trigger regardless of content match, and replaced leading-edge dedupe with
trailing debounce via `threading.Timer`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]