codeant-ai-for-open-source[bot] commented on code in PR #36433:
URL: https://github.com/apache/superset/pull/36433#discussion_r2590596323


##########
scripts/check-dependencies.py:
##########
@@ -0,0 +1,284 @@
+#!/usr/bin/env python3
+"""
+Script to check for unused dependencies in Superset frontend
+"""
+
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import Dict, List, Set, Tuple
+import re
+
+class DependencyChecker:
+    def __init__(self, base_path: str = "."):
+        self.base_path = Path(base_path)
+        self.package_json_path = self.base_path / "package.json"
+        self.package_lock_path = self.base_path / "package-lock.json"
+        self.src_paths = [
+            self.base_path / "src",
+            self.base_path / "packages",
+            self.base_path / "plugins",
+        ]
+        
+    def load_package_json(self) -> Dict:
+        """Load and return package.json content"""
+        with open(self.package_json_path, 'r') as f:
+            return json.load(f)
+    
+    def get_all_dependencies(self) -> Dict[str, str]:
+        """Get all dependencies from package.json"""
+        package_data = self.load_package_json()
+        deps = {}
+        
+        for dep_type in ['dependencies', 'devDependencies', 
'peerDependencies']:
+            if dep_type in package_data:
+                deps.update(package_data[dep_type])
+        
+        return deps
+    
+    def search_for_imports(self, package_name: str) -> Tuple[bool, List[str]]:
+        """Search for imports of a package in the codebase"""
+        patterns = [
+            f"from ['\"]{{}}['\"]",
+            f"require\\(['\"]{{}}['\"]",
+            f"import.*['\"]{{}}['\"]",
+            f"from ['\"]{{}}/.+['\"]",  # Subpath imports
+            f"require\\(['\"]{{}}/.+['\"]",  # Subpath requires
+        ]
+        
+        found_files = []
+        
+        for src_path in self.src_paths:
+            if not src_path.exists():
+                continue
+                
+            for pattern in patterns:
+                search_pattern = pattern.format(re.escape(package_name))
+                try:
+                    # Use ripgrep for faster searching
+                    result = subprocess.run(
+                        ["rg", "-l", search_pattern, str(src_path)],
+                        capture_output=True,
+                        text=True
+                    )
+                    if result.returncode == 0 and result.stdout:
+                        files = result.stdout.strip().split('\n')
+                        found_files.extend(files)
+                except FileNotFoundError:
+                    # Fallback to grep if ripgrep not available
+                    result = subprocess.run(
+                        ["grep", "-r", "-l", "-E", search_pattern, 
str(src_path)],
+                        capture_output=True,
+                        text=True
+                    )
+                    if result.returncode == 0 and result.stdout:
+                        files = result.stdout.strip().split('\n')
+                        found_files.extend(files)
+        
+        # Remove duplicates
+        found_files = list(set(filter(None, found_files)))
+        return len(found_files) > 0, found_files
+    
+    def check_transitive_dependencies(self, package_name: str) -> List[str]:
+        """Check what packages depend on this package"""
+        dependent_packages = []
+        
+        if self.package_lock_path.exists():
+            with open(self.package_lock_path, 'r') as f:
+                lock_data = json.load(f)
+            
+            # Check packages for dependencies
+            if 'packages' in lock_data:
+                for pkg_path, pkg_info in lock_data['packages'].items():
+                    if 'dependencies' in pkg_info:
+                        if package_name in pkg_info['dependencies']:
+                            # Extract package name from path
+                            if pkg_path.startswith('node_modules/'):
+                                pkg_name = pkg_path.replace('node_modules/', 
'')
+                                dependent_packages.append(pkg_name)
+        
+        return dependent_packages
+    
+    def is_types_package(self, package_name: str) -> bool:
+        """Check if this is a TypeScript types package"""
+        return package_name.startswith('@types/')
+    
+    def check_special_cases(self, package_name: str) -> Dict:
+        """Check for special cases like build tools, polyfills, etc."""
+        special_cases = {
+            'build_tool': False,
+            'in_scripts': False,
+            'config_file': False,
+        }
+        
+        package_data = self.load_package_json()
+        
+        # Check if used in npm scripts
+        if 'scripts' in package_data:
+            scripts_str = json.dumps(package_data['scripts'])
+            if package_name in scripts_str:
+                special_cases['in_scripts'] = True
+        
+        # Check webpack config files
+        config_files = list(self.base_path.glob('webpack.*.js'))
+        config_files.extend(self.base_path.glob('*.config.js'))
+        config_files.extend(self.base_path.glob('*.config.ts'))
+        
+        for config_file in config_files:
+            with open(config_file, 'r') as f:
+                content = f.read()
+                if package_name in content:
+                    special_cases['config_file'] = True
+                    break
+        
+        return special_cases
+    
+    def analyze_dependency(self, package_name: str) -> Dict:
+        """Analyze a single dependency"""
+        all_deps = self.get_all_dependencies()
+        
+        if package_name not in all_deps:
+            return {
+                'package': package_name,
+                'status': 'not_found',
+                'recommendation': 'skip',
+                'notes': 'Package not found in package.json'
+            }
+        
+        version = all_deps[package_name]
+        is_types = self.is_types_package(package_name)
+        found, locations = self.search_for_imports(package_name)
+        dependents = self.check_transitive_dependencies(package_name)
+        special_cases = self.check_special_cases(package_name)
+        
+        # Determine status and recommendation
+        if found:
+            status = 'used'
+            recommendation = 'keep'
+        elif is_types:
+            # Check if the main package exists
+            main_package = package_name.replace('@types/', '')
+            if main_package in all_deps:
+                status = 'types-only'
+                recommendation = 'keep'
+            else:
+                status = 'unused'
+                recommendation = 'remove'
+        elif any(special_cases.values()):
+            status = 'special-use'
+            recommendation = 'investigate'
+        elif dependents:
+            status = 'transitive-only'
+            recommendation = 'remove'  # Can be removed if available through 
dependents

Review Comment:
   **Suggestion:** Logic error: when other packages depend on this package the 
code marks the package as removable. If `dependents` is non-empty the package 
is required by other packages and should not be recommended for removal; change 
the recommendation to keep (or investigate) instead of remove. [logic error]
   
   **Severity Level:** Minor ⚠️
   ```suggestion
               recommendation = 'keep'  # Required by other packages; do not 
recommend removal automatically
   ```
   <details>
   <summary><b>Why it matters? ⭐ </b></summary>
   
   The current logic flips the intent: if other packages depend on this package 
(dependents is non-empty),
   the package is required transitively and should not be auto-recommended for 
removal. Changing the
   recommendation to 'keep' (or at least 'investigate') fixes a real logic bug 
that would otherwise
   produce unsafe/incorrect removal suggestions.
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** scripts/check-dependencies.py
   **Line:** 174:174
   **Comment:**
        *Logic Error: Logic error: when other packages depend on this package 
the code marks the package as removable. If `dependents` is non-empty the 
package is required by other packages and should not be recommended for 
removal; change the recommendation to keep (or investigate) instead of remove.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>



##########
scripts/check-dependencies.py:
##########
@@ -0,0 +1,284 @@
+#!/usr/bin/env python3
+"""
+Script to check for unused dependencies in Superset frontend
+"""
+
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import Dict, List, Set, Tuple
+import re
+
+class DependencyChecker:
+    def __init__(self, base_path: str = "."):
+        self.base_path = Path(base_path)
+        self.package_json_path = self.base_path / "package.json"
+        self.package_lock_path = self.base_path / "package-lock.json"
+        self.src_paths = [
+            self.base_path / "src",
+            self.base_path / "packages",
+            self.base_path / "plugins",
+        ]
+        
+    def load_package_json(self) -> Dict:
+        """Load and return package.json content"""
+        with open(self.package_json_path, 'r') as f:
+            return json.load(f)
+    
+    def get_all_dependencies(self) -> Dict[str, str]:
+        """Get all dependencies from package.json"""
+        package_data = self.load_package_json()
+        deps = {}
+        
+        for dep_type in ['dependencies', 'devDependencies', 
'peerDependencies']:
+            if dep_type in package_data:
+                deps.update(package_data[dep_type])
+        
+        return deps
+    
+    def search_for_imports(self, package_name: str) -> Tuple[bool, List[str]]:
+        """Search for imports of a package in the codebase"""
+        patterns = [
+            f"from ['\"]{{}}['\"]",
+            f"require\\(['\"]{{}}['\"]",
+            f"import.*['\"]{{}}['\"]",
+            f"from ['\"]{{}}/.+['\"]",  # Subpath imports
+            f"require\\(['\"]{{}}/.+['\"]",  # Subpath requires
+        ]
+        
+        found_files = []
+        
+        for src_path in self.src_paths:
+            if not src_path.exists():
+                continue
+                
+            for pattern in patterns:
+                search_pattern = pattern.format(re.escape(package_name))
+                try:
+                    # Use ripgrep for faster searching
+                    result = subprocess.run(
+                        ["rg", "-l", search_pattern, str(src_path)],
+                        capture_output=True,
+                        text=True
+                    )
+                    if result.returncode == 0 and result.stdout:
+                        files = result.stdout.strip().split('\n')
+                        found_files.extend(files)
+                except FileNotFoundError:
+                    # Fallback to grep if ripgrep not available
+                    result = subprocess.run(
+                        ["grep", "-r", "-l", "-E", search_pattern, 
str(src_path)],
+                        capture_output=True,
+                        text=True
+                    )
+                    if result.returncode == 0 and result.stdout:
+                        files = result.stdout.strip().split('\n')
+                        found_files.extend(files)
+        
+        # Remove duplicates
+        found_files = list(set(filter(None, found_files)))
+        return len(found_files) > 0, found_files
+    
+    def check_transitive_dependencies(self, package_name: str) -> List[str]:
+        """Check what packages depend on this package"""
+        dependent_packages = []
+        
+        if self.package_lock_path.exists():
+            with open(self.package_lock_path, 'r') as f:
+                lock_data = json.load(f)
+            
+            # Check packages for dependencies
+            if 'packages' in lock_data:
+                for pkg_path, pkg_info in lock_data['packages'].items():
+                    if 'dependencies' in pkg_info:
+                        if package_name in pkg_info['dependencies']:
+                            # Extract package name from path
+                            if pkg_path.startswith('node_modules/'):
+                                pkg_name = pkg_path.replace('node_modules/', 
'')
+                                dependent_packages.append(pkg_name)
+        
+        return dependent_packages
+    
+    def is_types_package(self, package_name: str) -> bool:
+        """Check if this is a TypeScript types package"""
+        return package_name.startswith('@types/')
+    
+    def check_special_cases(self, package_name: str) -> Dict:
+        """Check for special cases like build tools, polyfills, etc."""
+        special_cases = {
+            'build_tool': False,
+            'in_scripts': False,
+            'config_file': False,
+        }
+        
+        package_data = self.load_package_json()
+        
+        # Check if used in npm scripts
+        if 'scripts' in package_data:
+            scripts_str = json.dumps(package_data['scripts'])
+            if package_name in scripts_str:
+                special_cases['in_scripts'] = True
+        
+        # Check webpack config files
+        config_files = list(self.base_path.glob('webpack.*.js'))
+        config_files.extend(self.base_path.glob('*.config.js'))
+        config_files.extend(self.base_path.glob('*.config.ts'))
+        
+        for config_file in config_files:
+            with open(config_file, 'r') as f:
+                content = f.read()
+                if package_name in content:
+                    special_cases['config_file'] = True
+                    break
+        
+        return special_cases
+    
+    def analyze_dependency(self, package_name: str) -> Dict:
+        """Analyze a single dependency"""
+        all_deps = self.get_all_dependencies()
+        
+        if package_name not in all_deps:
+            return {
+                'package': package_name,
+                'status': 'not_found',
+                'recommendation': 'skip',
+                'notes': 'Package not found in package.json'
+            }
+        
+        version = all_deps[package_name]
+        is_types = self.is_types_package(package_name)
+        found, locations = self.search_for_imports(package_name)
+        dependents = self.check_transitive_dependencies(package_name)
+        special_cases = self.check_special_cases(package_name)
+        
+        # Determine status and recommendation
+        if found:
+            status = 'used'
+            recommendation = 'keep'
+        elif is_types:
+            # Check if the main package exists
+            main_package = package_name.replace('@types/', '')
+            if main_package in all_deps:
+                status = 'types-only'
+                recommendation = 'keep'
+            else:
+                status = 'unused'
+                recommendation = 'remove'
+        elif any(special_cases.values()):
+            status = 'special-use'
+            recommendation = 'investigate'
+        elif dependents:
+            status = 'transitive-only'
+            recommendation = 'remove'  # Can be removed if available through 
dependents
+        else:
+            status = 'unused'
+            recommendation = 'remove'
+        
+        return {
+            'package': package_name,
+            'version': version,
+            'status': status,
+            'direct_usage': {
+                'found': found,
+                'locations': locations[:5],  # Limit to first 5 locations
+                'import_count': len(locations)
+            },
+            'transitive_usage': {
+                'required_by': dependents[:5],  # Limit to first 5 dependents
+                'can_be_removed': len(dependents) > 0

Review Comment:
   **Suggestion:** Incorrect boolean for transitive removal: `can_be_removed` 
is set true when there are dependents, which is backwards — if other packages 
require this package it should not be marked removable. Use a flag that 
reflects whether there are no dependents. [logic error]
   
   **Severity Level:** Minor ⚠️
   ```suggestion
                   'can_be_removed': len(dependents) == 0
   ```
   <details>
   <summary><b>Why it matters? ⭐ </b></summary>
   
   The boolean is inverted: can_be_removed should be true only when there are 
NO dependents. As written,
   packages that are required by other packages are incorrectly flagged 
removable. Fixing this corrects
   the transitive usage reporting and aligns the field with the actual 
semantics.
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** scripts/check-dependencies.py
   **Line:** 190:190
   **Comment:**
        *Logic Error: Incorrect boolean for transitive removal: 
`can_be_removed` is set true when there are dependents, which is backwards — if 
other packages require this package it should not be marked removable. Use a 
flag that reflects whether there are no dependents.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to