bito-code-review[bot] commented on code in PR #40084:
URL: https://github.com/apache/superset/pull/40084#discussion_r3231542750
##########
superset/extensions/local_extensions_watcher.py:
##########
@@ -29,37 +29,113 @@
logger = logging.getLogger(__name__)
+# Sentinel file Flask watches via --extra-files. Touching it on a real change
+# triggers a server reload without depending on cwd or the location of any
+# Python source file.
+RELOAD_TRIGGER = Path(__file__).resolve().parent / ".reload_trigger"
+
# Guard to prevent multiple initializations
_watcher_initialized = False
_watcher_lock = threading.Lock()
-def _get_file_handler_class() -> Any:
+def _get_file_handler_class() -> Any: # noqa: C901
"""Get the file handler class, importing watchdog only when needed."""
try:
- from watchdog.events import FileSystemEventHandler
+ import hashlib
+
+ from watchdog.events import (
+ FileCreatedEvent,
+ FileModifiedEvent,
+ FileMovedEvent,
+ FileSystemEventHandler,
+ )
class LocalExtensionFileHandler(FileSystemEventHandler):
- """Custom file system event handler for LOCAL_EXTENSIONS
directories."""
+ """Custom file system event handler for LOCAL_EXTENSIONS
directories.
+
+ Only reacts to genuine content changes (create / modify / move) in
the
+ dist directory, verified by comparing a SHA-256 of the file's
content.
+ This avoids the Docker VirtioFS / osxfs problem where reading a
file
+ generates inotify events that watchdog surfaces as modifications.
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+ # sha256 of last-seen content, keyed by absolute path
+ self._file_hashes: dict[str, str] = {}
+ # Deduplicate: only trigger once per second across all files
+ self._last_trigger: float = 0.0
+ self._lock = threading.Lock()
+
+ # ── helpers ──────────────────────────────────────────────────────
+
+ @staticmethod
+ def _sha256(path: str) -> str | None:
+ try:
+ with open(path, "rb") as fh:
+ return hashlib.sha256(fh.read()).hexdigest()
+ except OSError:
+ return None
+
+ def _content_changed(self, path: str) -> bool:
+ """Return True only when the file's content differs from last
seen.
+
+ The first time a path is observed its hash is stored as the
baseline
+ and False is returned — that event is a 'first-seen', not a
change.
+ Only a subsequent event where the digest differs from the
baseline
+ is treated as a genuine content change.
+ """
+ digest = self._sha256(path)
+ if digest is None:
+ return False
+ old_digest = self._file_hashes.get(path)
+ self._file_hashes[path] = digest
+ if old_digest is None:
+ # First observation — record baseline, do not trigger
restart.
+ return False
+ return old_digest != digest
+
+ # ── event handler
─────────────────────────────────────────────────
def on_any_event(self, event: Any) -> None:
- """Handle any file system event in the watched directories."""
+ """Handle file system events in the watched directories."""
if event.is_directory:
return
- # Only trigger on changes to files in `dist` directory
+ # Only react to true write events; skip access / close / open
etc.
+ if not isinstance(
+ event, (FileCreatedEvent, FileModifiedEvent,
FileMovedEvent)
+ ):
+ return
+
+ # Only care about files inside a `dist` directory
src = getattr(event, "src_path", None)
if not isinstance(src, str) or "dist" not in Path(src).parts:
return
+ # Verify the file content actually changed to ignore spurious
+ # inotify events generated by Docker bind-mount reads.
+ if not self._content_changed(src):
+ return
+
+ # Debounce: one restart per second max, regardless of how many
+ # files webpack writes simultaneously.
+ now = time.monotonic()
+ with self._lock:
+ if now - self._last_trigger < 1.0:
+ return
+ self._last_trigger = now
+
logger.info(
"File change detected in LOCAL_EXTENSIONS: %s",
event.src_path
)
- # Touch superset/__init__.py to trigger Flask's file watcher
- superset_init = Path("superset/__init__.py")
- logger.info("Triggering restart by touching %s", superset_init)
- os.utime(superset_init, (time.time(), time.time()))
+ # Touch the dedicated reload-trigger sentinel file.
+ # Flask watches this via --extra-files; it is never read by
Python
+ # so Docker VirtioFS will not generate spurious inotify events
on it.
+ logger.info("Triggering restart by touching %s",
RELOAD_TRIGGER)
+ os.utime(RELOAD_TRIGGER, (time.time(), time.time()))
Review Comment:
<div>
<div id="suggestion">
<div id="issue"><b>Incorrect FileMovedEvent handling</b></div>
<div id="fix">
The event handler incorrectly handles file moves: it only checks src_path
for 'dist' inclusion and requires content changes, but moves into/out of 'dist'
should trigger restarts regardless of content (matching original behavior).
This could miss restarts when files are moved into watched directories. The fix
separates move handling to trigger on path checks alone.
</div>
<details>
<summary>
<b>Code suggestion</b>
</summary>
<blockquote>Check the AI-generated fix before applying</blockquote>
<div id="code">
```
- def on_any_event(self, event: Any) -> None:
- """Handle file system events in the watched directories."""
- if event.is_directory:
- return
-
- # Only react to true write events; skip access / close /
open etc.
- if not isinstance(
- event, (FileCreatedEvent, FileModifiedEvent,
FileMovedEvent)
- ):
- return
-
- # Only care about files inside a `dist` directory
- src = getattr(event, "src_path", None)
- if not isinstance(src, str) or "dist" not in
Path(src).parts:
- return
-
- # Verify the file content actually changed to ignore
spurious
- # inotify events generated by Docker bind-mount reads.
- if not self._content_changed(src):
- return
-
- # Debounce: one restart per second max, regardless of how
many
- # files webpack writes simultaneously.
- now = time.monotonic()
- with self._lock:
- if now - self._last_trigger < 1.0:
- return
- self._last_trigger = now
-
- logger.info(
- "File change detected in LOCAL_EXTENSIONS: %s",
event.src_path
- )
-
- # Touch the dedicated reload-trigger sentinel file.
- # Flask watches this via --extra-files; it is never read
by Python
- # so Docker VirtioFS will not generate spurious inotify
events on it.
- logger.info("Triggering restart by touching %s",
RELOAD_TRIGGER)
- os.utime(RELOAD_TRIGGER, (time.time(), time.time()))
+ def on_any_event(self, event: Any) -> None:
+ """Handle file system events in the watched directories."""
- if event.is_directory:
- return
-
- # Handle FileMovedEvent separately
- if isinstance(event, FileMovedEvent):
- src = getattr(event, "src_path", None)
- dest = getattr(event, "dest_path", None)
- if not ((isinstance(src, str) and "dist" in
Path(src).parts) or (isinstance(dest, str) and "dist" in Path(dest).parts)):
- return
- # For moves, trigger without content check
- elif isinstance(event, (FileCreatedEvent,
FileModifiedEvent)):
- src = getattr(event, "src_path", None)
- if not isinstance(src, str) or "dist" not in
Path(src).parts:
- return
- if not self._content_changed(src):
- return
- else:
- return
-
- # Debounce: one restart per second max, regardless of how
many
- # files webpack writes simultaneously.
- now = time.monotonic()
- with self._lock:
- if now - self._last_trigger < 1.0:
- return
- self._last_trigger = now
-
- logger.info(
- "File change detected in LOCAL_EXTENSIONS: %s",
event.src_path
- )
-
- # Touch the dedicated reload-trigger sentinel file.
- # Flask watches this via --extra-files; it is never read
by Python
- # so Docker VirtioFS will not generate spurious inotify
events on it.
- logger.info("Triggering restart by touching %s",
RELOAD_TRIGGER)
- os.utime(RELOAD_TRIGGER, (time.time(), time.time()))
```
</div>
</details>
</div>
<small><i>Code Review Run #faeec6</i></small>
</div>
---
Should Bito avoid suggestions like this for future reviews? (<a
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
- [ ] Yes, avoid them
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]