bito-code-review[bot] commented on code in PR #40084:
URL: https://github.com/apache/superset/pull/40084#discussion_r3231542750


##########
superset/extensions/local_extensions_watcher.py:
##########
@@ -29,37 +29,113 @@
 
 logger = logging.getLogger(__name__)
 
+# Sentinel file Flask watches via --extra-files.  Touching it on a real change
+# triggers a server reload without depending on cwd or the location of any
+# Python source file.
+RELOAD_TRIGGER = Path(__file__).resolve().parent / ".reload_trigger"
+
 # Guard to prevent multiple initializations
 _watcher_initialized = False
 _watcher_lock = threading.Lock()
 
 
-def _get_file_handler_class() -> Any:
+def _get_file_handler_class() -> Any:  # noqa: C901
     """Get the file handler class, importing watchdog only when needed."""
     try:
-        from watchdog.events import FileSystemEventHandler
+        import hashlib
+
+        from watchdog.events import (
+            FileCreatedEvent,
+            FileModifiedEvent,
+            FileMovedEvent,
+            FileSystemEventHandler,
+        )
 
         class LocalExtensionFileHandler(FileSystemEventHandler):
-            """Custom file system event handler for LOCAL_EXTENSIONS 
directories."""
+            """Custom file system event handler for LOCAL_EXTENSIONS 
directories.
+
+            Only reacts to genuine content changes (create / modify / move) in 
the
+            dist directory, verified by comparing a SHA-256 of the file's 
content.
+            This avoids the Docker VirtioFS / osxfs problem where reading a 
file
+            generates inotify events that watchdog surfaces as modifications.
+            """
+
+            def __init__(self) -> None:
+                super().__init__()
+                # sha256 of last-seen content, keyed by absolute path
+                self._file_hashes: dict[str, str] = {}
+                # Deduplicate: only trigger once per second across all files
+                self._last_trigger: float = 0.0
+                self._lock = threading.Lock()
+
+            # ── helpers ──────────────────────────────────────────────────────
+
+            @staticmethod
+            def _sha256(path: str) -> str | None:
+                try:
+                    with open(path, "rb") as fh:
+                        return hashlib.sha256(fh.read()).hexdigest()
+                except OSError:
+                    return None
+
+            def _content_changed(self, path: str) -> bool:
+                """Return True only when the file's content differs from last 
seen.
+
+                The first time a path is observed its hash is stored as the 
baseline
+                and False is returned — that event is a 'first-seen', not a 
change.
+                Only a subsequent event where the digest differs from the 
baseline
+                is treated as a genuine content change.
+                """
+                digest = self._sha256(path)
+                if digest is None:
+                    return False
+                old_digest = self._file_hashes.get(path)
+                self._file_hashes[path] = digest
+                if old_digest is None:
+                    # First observation — record baseline, do not trigger 
restart.
+                    return False
+                return old_digest != digest
+
+            # ── event handler 
─────────────────────────────────────────────────
 
             def on_any_event(self, event: Any) -> None:
-                """Handle any file system event in the watched directories."""
+                """Handle file system events in the watched directories."""
                 if event.is_directory:
                     return
 
-                # Only trigger on changes to files in `dist` directory
+                # Only react to true write events; skip access / close / open 
etc.
+                if not isinstance(
+                    event, (FileCreatedEvent, FileModifiedEvent, 
FileMovedEvent)
+                ):
+                    return
+
+                # Only care about files inside a `dist` directory
                 src = getattr(event, "src_path", None)
                 if not isinstance(src, str) or "dist" not in Path(src).parts:
                     return
 
+                # Verify the file content actually changed to ignore spurious
+                # inotify events generated by Docker bind-mount reads.
+                if not self._content_changed(src):
+                    return
+
+                # Debounce: one restart per second max, regardless of how many
+                # files webpack writes simultaneously.
+                now = time.monotonic()
+                with self._lock:
+                    if now - self._last_trigger < 1.0:
+                        return
+                    self._last_trigger = now
+
                 logger.info(
                     "File change detected in LOCAL_EXTENSIONS: %s", 
event.src_path
                 )
 
-                # Touch superset/__init__.py to trigger Flask's file watcher
-                superset_init = Path("superset/__init__.py")
-                logger.info("Triggering restart by touching %s", superset_init)
-                os.utime(superset_init, (time.time(), time.time()))
+                # Touch the dedicated reload-trigger sentinel file.
+                # Flask watches this via --extra-files; it is never read by 
Python
+                # so Docker VirtioFS will not generate spurious inotify events 
on it.
+                logger.info("Triggering restart by touching %s", 
RELOAD_TRIGGER)
+                os.utime(RELOAD_TRIGGER, (time.time(), time.time()))

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>Incorrect FileMovedEvent handling</b></div>
   <div id="fix">
   
   The event handler incorrectly handles file moves: it only checks src_path 
for 'dist' inclusion and requires content changes, but moves into/out of 'dist' 
should trigger restarts regardless of content (matching original behavior). 
This could miss restarts when files are moved into watched directories. The fix 
separates move handling to trigger on path checks alone.
   </div>
   
   
   <details>
   <summary>
   <b>Code suggestion</b>
   </summary>
   <blockquote>Check the AI-generated fix before applying</blockquote>
   <div id="code">
   
   
   ```
    -            def on_any_event(self, event: Any) -> None:
    -                """Handle file system events in the watched directories."""
    -                if event.is_directory:
    -                    return
    -
    -                # Only react to true write events; skip access / close / 
open etc.
    -                if not isinstance(
    -                    event, (FileCreatedEvent, FileModifiedEvent, 
FileMovedEvent)
    -                ):
    -                    return
    -
    -                # Only care about files inside a `dist` directory
    -                src = getattr(event, "src_path", None)
    -                if not isinstance(src, str) or "dist" not in 
Path(src).parts:
    -                    return
    -
    -                # Verify the file content actually changed to ignore 
spurious
    -                # inotify events generated by Docker bind-mount reads.
    -                if not self._content_changed(src):
    -                    return
    -
    -                # Debounce: one restart per second max, regardless of how 
many
    -                # files webpack writes simultaneously.
    -                now = time.monotonic()
    -                with self._lock:
    -                    if now - self._last_trigger < 1.0:
    -                        return
    -                    self._last_trigger = now
    -
    -                logger.info(
    -                    "File change detected in LOCAL_EXTENSIONS: %s", 
event.src_path
    -                )
    -
    -                # Touch the dedicated reload-trigger sentinel file.
    -                # Flask watches this via --extra-files; it is never read 
by Python
    -                # so Docker VirtioFS will not generate spurious inotify 
events on it.
    -                logger.info("Triggering restart by touching %s", 
RELOAD_TRIGGER)
    -                os.utime(RELOAD_TRIGGER, (time.time(), time.time()))
    +            def on_any_event(self, event: Any) -> None:
    +                """Handle file system events in the watched directories."""
    -                if event.is_directory:
    -                    return
    -
    -                # Handle FileMovedEvent separately
    -                if isinstance(event, FileMovedEvent):
    -                    src = getattr(event, "src_path", None)
    -                    dest = getattr(event, "dest_path", None)
    -                    if not ((isinstance(src, str) and "dist" in 
Path(src).parts) or (isinstance(dest, str) and "dist" in Path(dest).parts)):
    -                        return
    -                    # For moves, trigger without content check
    -                elif isinstance(event, (FileCreatedEvent, 
FileModifiedEvent)):
    -                    src = getattr(event, "src_path", None)
    -                    if not isinstance(src, str) or "dist" not in 
Path(src).parts:
    -                        return
    -                    if not self._content_changed(src):
    -                        return
    -                else:
    -                    return
    -
    -                # Debounce: one restart per second max, regardless of how 
many
    -                # files webpack writes simultaneously.
    -                now = time.monotonic()
    -                with self._lock:
    -                    if now - self._last_trigger < 1.0:
    -                        return
    -                    self._last_trigger = now
    -
    -                logger.info(
    -                    "File change detected in LOCAL_EXTENSIONS: %s", 
event.src_path
    -                )
    -
    -                # Touch the dedicated reload-trigger sentinel file.
    -                # Flask watches this via --extra-files; it is never read 
by Python
    -                # so Docker VirtioFS will not generate spurious inotify 
events on it.
    -                logger.info("Triggering restart by touching %s", 
RELOAD_TRIGGER)
    -                os.utime(RELOAD_TRIGGER, (time.time(), time.time()))
   ```
   
   </div>
   </details>
   
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #faeec6</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to