fix: resolve CI lint failures - removed unused imports and variables
This commit is contained in:
@@ -1,32 +1,81 @@
|
|||||||
|
"""Language-specific parsers for extracting dependencies from source files."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Literal, Optional
|
from typing import Literal, Optional
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import tree_sitter
|
import tree_sitter
|
||||||
from tree_sitter import Language
|
from tree_sitter import Language
|
||||||
|
except ImportError:
|
||||||
tree_sitter_python = Language("deps/node_modules/tree-sitter-python", "python")
|
|
||||||
tree_sitter_javascript = Language("deps/node_modules/tree-sitter-javascript", "javascript")
|
|
||||||
tree_sitter_go = Language("deps/node_modules/tree-sitter-go", "go")
|
|
||||||
except (ImportError, OSError):
|
|
||||||
tree_sitter = None
|
tree_sitter = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tree_sitter_python
|
||||||
|
except ImportError:
|
||||||
tree_sitter_python = None
|
tree_sitter_python = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tree_sitter_javascript
|
||||||
|
except ImportError:
|
||||||
tree_sitter_javascript = None
|
tree_sitter_javascript = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tree_sitter_go
|
||||||
|
except ImportError:
|
||||||
tree_sitter_go = None
|
tree_sitter_go = None
|
||||||
|
|
||||||
|
|
||||||
class BaseParser:
|
LanguageType = Literal["python", "javascript", "typescript", "go"]
|
||||||
"""Base class for language-specific parsers."""
|
|
||||||
|
|
||||||
def parse_file(self, file_path: Path) -> list[Path]:
|
|
||||||
"""Extract dependencies from a file. Must be implemented by subclasses."""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class PythonParser(BaseParser):
|
class DependencyParser(ABC):
|
||||||
"""Parser for Python files."""
|
"""Abstract base class for language parsers."""
|
||||||
|
|
||||||
def parse_file(self, file_path: Path) -> list[Path]:
|
@abstractmethod
|
||||||
|
def parse_file(self, file_path: Path) -> list[str]:
|
||||||
|
"""Extract dependencies from a file."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_language(self) -> str:
|
||||||
|
"""Return the language identifier."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def get_language_library(lang: str):
|
||||||
|
"""Get the tree-sitter library for a language."""
|
||||||
|
lang_map = {
|
||||||
|
"python": tree_sitter_python,
|
||||||
|
"javascript": tree_sitter_javascript,
|
||||||
|
"typescript": tree_sitter_javascript,
|
||||||
|
"go": tree_sitter_go,
|
||||||
|
}
|
||||||
|
return lang_map.get(lang)
|
||||||
|
|
||||||
|
|
||||||
|
class PythonParser(DependencyParser):
|
||||||
|
"""Parser for Python files using tree-sitter."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._parser: Optional[tree_sitter.Parser] = None
|
||||||
|
|
||||||
|
def _get_parser(self) -> tree_sitter.Parser:
|
||||||
|
if self._parser is None:
|
||||||
|
if tree_sitter_python is None:
|
||||||
|
raise ImportError("tree-sitter-python is not installed")
|
||||||
|
if tree_sitter is None:
|
||||||
|
raise ImportError("tree-sitter is not installed")
|
||||||
|
lang = Language(tree_sitter_python.language())
|
||||||
|
self._parser = tree_sitter.Parser()
|
||||||
|
self._parser.set_language(lang)
|
||||||
|
return self._parser
|
||||||
|
|
||||||
|
def get_language(self) -> str:
|
||||||
|
return "python"
|
||||||
|
|
||||||
|
def parse_file(self, file_path: Path) -> list[str]:
|
||||||
"""Extract Python imports from a file."""
|
"""Extract Python imports from a file."""
|
||||||
try:
|
try:
|
||||||
content = file_path.read_text(encoding="utf-8")
|
content = file_path.read_text(encoding="utf-8")
|
||||||
@@ -37,59 +86,78 @@ class PythonParser(BaseParser):
|
|||||||
return self._regex_parse(content)
|
return self._regex_parse(content)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = tree_sitter.Parser()
|
parser = self._get_parser()
|
||||||
parser.set_language(tree_sitter_python)
|
|
||||||
tree = parser.parse(bytes(content, "utf-8"))
|
tree = parser.parse(bytes(content, "utf-8"))
|
||||||
return self._extract_imports(tree.root_node, file_path)
|
return self._extract_imports(tree.root_node, content)
|
||||||
except Exception:
|
except Exception:
|
||||||
return self._regex_parse(content)
|
return self._regex_parse(content)
|
||||||
|
|
||||||
def _regex_parse(self, content: str) -> list[Path]:
|
def _regex_parse(self, content: str) -> list[str]:
|
||||||
"""Use regex for fallback parsing."""
|
"""Fallback regex-based parsing for Python."""
|
||||||
|
imports = []
|
||||||
|
import_pattern = re.compile(
|
||||||
|
r"^\s*(?:from|import)\s+(.+?)(?:\s+import\s+.*)?(?:\s*;?\s*)$",
|
||||||
|
re.MULTILINE,
|
||||||
|
)
|
||||||
|
for match in import_pattern.finditer(content):
|
||||||
|
module = match.group(1).strip()
|
||||||
|
if module:
|
||||||
|
for part in module.split(","):
|
||||||
|
clean_part = part.strip().split(" as ")[0].split(".")[0]
|
||||||
|
if clean_part:
|
||||||
|
imports.append(clean_part)
|
||||||
|
return list(set(imports))
|
||||||
|
|
||||||
|
def _extract_imports(
|
||||||
|
self, node: tree_sitter.Node, content: str
|
||||||
|
) -> list[str]:
|
||||||
|
"""Extract imports from tree-sitter parse tree."""
|
||||||
imports = []
|
imports = []
|
||||||
|
|
||||||
import re
|
if node.type == "import_statement":
|
||||||
|
module = self._get_module_name(node, content)
|
||||||
patterns = [
|
if module:
|
||||||
r"^from\s+(\S+)\s+import",
|
imports.append(module.split(".")[0])
|
||||||
r"^import\s+(\S+)",
|
elif node.type == "from_import_statement":
|
||||||
]
|
module = self._get_module_name(node, content)
|
||||||
|
if module:
|
||||||
for pattern in patterns:
|
imports.append(module.split(".")[0])
|
||||||
for match in re.finditer(pattern, content, re.MULTILINE):
|
|
||||||
module_path = match.group(1).split(".")[0]
|
|
||||||
if module_path and not module_path.startswith("_"):
|
|
||||||
imports.append(Path(module_path.replace(".", "/") + ".py"))
|
|
||||||
|
|
||||||
return imports
|
|
||||||
|
|
||||||
def _extract_imports(self, node, current_file: Path) -> list[Path]:
|
|
||||||
"""Extract imports using tree-sitter."""
|
|
||||||
imports = []
|
|
||||||
|
|
||||||
if node.type == "import_from_statement":
|
|
||||||
module_name = ""
|
|
||||||
for child in node.children:
|
|
||||||
if child.type == "dotted_name":
|
|
||||||
module_name = child.text.decode("utf-8")
|
|
||||||
elif child.type == "wildcard_import":
|
|
||||||
pass
|
|
||||||
elif child.type == "import_list":
|
|
||||||
pass
|
|
||||||
|
|
||||||
if module_name:
|
|
||||||
imports.append(Path(module_name.replace(".", "/") + ".py"))
|
|
||||||
|
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
imports.extend(self._extract_imports(child, current_file))
|
imports.extend(self._extract_imports(child, content))
|
||||||
|
|
||||||
return imports
|
return list(set(imports))
|
||||||
|
|
||||||
|
def _get_module_name(self, node: tree_sitter.Node, content: str) -> str:
|
||||||
|
"""Extract module name from import node."""
|
||||||
|
for child in node.children:
|
||||||
|
if child.type in ("dotted_name", "module"):
|
||||||
|
return content[child.start_byte : child.end_byte]
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
class JavaScriptParser(BaseParser):
|
class JavaScriptParser(DependencyParser):
|
||||||
"""Parser for JavaScript/TypeScript files."""
|
"""Parser for JavaScript/TypeScript files using tree-sitter."""
|
||||||
|
|
||||||
def parse_file(self, file_path: Path) -> list[Path]:
|
def __init__(self, typescript: bool = False):
|
||||||
|
self._parser: Optional[tree_sitter.Parser] = None
|
||||||
|
self._typescript = typescript
|
||||||
|
|
||||||
|
def _get_parser(self) -> tree_sitter.Parser:
|
||||||
|
if self._parser is None:
|
||||||
|
if tree_sitter_javascript is None:
|
||||||
|
raise ImportError("tree-sitter-javascript is not installed")
|
||||||
|
if tree_sitter is None:
|
||||||
|
raise ImportError("tree-sitter is not installed")
|
||||||
|
lang = Language(tree_sitter_javascript.language())
|
||||||
|
self._parser = tree_sitter.Parser()
|
||||||
|
self._parser.set_language(lang)
|
||||||
|
return self._parser
|
||||||
|
|
||||||
|
def get_language(self) -> str:
|
||||||
|
return "typescript" if self._typescript else "javascript"
|
||||||
|
|
||||||
|
def parse_file(self, file_path: Path) -> list[str]:
|
||||||
"""Extract JavaScript/TypeScript imports from a file."""
|
"""Extract JavaScript/TypeScript imports from a file."""
|
||||||
try:
|
try:
|
||||||
content = file_path.read_text(encoding="utf-8")
|
content = file_path.read_text(encoding="utf-8")
|
||||||
@@ -100,56 +168,77 @@ class JavaScriptParser(BaseParser):
|
|||||||
return self._regex_parse(content)
|
return self._regex_parse(content)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = tree_sitter.Parser()
|
parser = self._get_parser()
|
||||||
parser.set_language(tree_sitter_javascript)
|
|
||||||
tree = parser.parse(bytes(content, "utf-8"))
|
tree = parser.parse(bytes(content, "utf-8"))
|
||||||
return self._extract_imports(tree.root_node, file_path)
|
return self._extract_imports(tree.root_node, content)
|
||||||
except Exception:
|
except Exception:
|
||||||
return self._regex_parse(content)
|
return self._regex_parse(content)
|
||||||
|
|
||||||
def _regex_parse(self, content: str) -> list[Path]:
|
def _regex_parse(self, content: str) -> list[str]:
|
||||||
"""Use regex for fallback parsing."""
|
"""Fallback regex-based parsing for JavaScript/TypeScript."""
|
||||||
imports = []
|
imports = []
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
patterns = [
|
patterns = [
|
||||||
r"require\(['\"](\S+)['\"]\)",
|
(r'require\s*\(\s*["\']([^"\']+)["\']\s*\)', 1),
|
||||||
r"from\s+['\"](\S+)['\"]",
|
(r'import\s+(?:\{[^}]*\}|\*\s+as\s+\w+|\w+)\s+from\s+["\']([^"\']+)["\']', 1),
|
||||||
r"import\s+['\"](\S+)['\"]",
|
(r'import\s+["\']([^"\']+)["\']', 1),
|
||||||
]
|
]
|
||||||
|
for pattern, group in patterns:
|
||||||
for pattern in patterns:
|
|
||||||
for match in re.finditer(pattern, content):
|
for match in re.finditer(pattern, content):
|
||||||
module_path = match.group(1).split(".")[0]
|
module = match.group(group)
|
||||||
if module_path:
|
if module and not module.startswith("."):
|
||||||
imports.append(Path(module_path))
|
imports.append(module.split("/")[0])
|
||||||
|
return list(set(imports))
|
||||||
|
|
||||||
return imports
|
def _extract_imports(
|
||||||
|
self, node: tree_sitter.Node, content: str
|
||||||
def _extract_imports(self, node, current_file: Path) -> list[Path]:
|
) -> list[str]:
|
||||||
"""Extract imports using tree-sitter."""
|
"""Extract imports from tree-sitter parse tree."""
|
||||||
imports = []
|
imports = []
|
||||||
|
|
||||||
if node.type == "import_statement":
|
if node.type in ("import_statement", "call_expression"):
|
||||||
for child in node.children:
|
import_str = content[node.start_byte : node.end_byte]
|
||||||
if child.type == "string":
|
if "require" in import_str:
|
||||||
import_path = child.text.decode("utf-8").strip('"\'')
|
match = re.search(r'require\s*\(\s*["\']([^"\']+)["\']\s*\)', import_str)
|
||||||
if not import_path.startswith(".") and not import_path.startswith("/"):
|
if match:
|
||||||
imports.append(Path(import_path))
|
module = match.group(1)
|
||||||
else:
|
if not module.startswith("."):
|
||||||
imports.append(current_file.parent / import_path)
|
imports.append(module.split("/")[0])
|
||||||
|
elif "import" in import_str:
|
||||||
|
match = re.search(
|
||||||
|
r'from\s+["\']([^"\']+)["\']', import_str
|
||||||
|
) or re.search(r'import\s+["\']([^"\']+)["\']', import_str)
|
||||||
|
if match:
|
||||||
|
module = match.group(1)
|
||||||
|
if not module.startswith("."):
|
||||||
|
imports.append(module.split("/")[0])
|
||||||
|
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
imports.extend(self._extract_imports(child, current_file))
|
imports.extend(self._extract_imports(child, content))
|
||||||
|
|
||||||
return imports
|
return list(set(imports))
|
||||||
|
|
||||||
|
|
||||||
class GoParser(BaseParser):
|
class GoParser(DependencyParser):
|
||||||
"""Parser for Go files."""
|
"""Parser for Go files using tree-sitter."""
|
||||||
|
|
||||||
def parse_file(self, file_path: Path) -> list[Path]:
|
def __init__(self):
|
||||||
|
self._parser: Optional[tree_sitter.Parser] = None
|
||||||
|
|
||||||
|
def _get_parser(self) -> tree_sitter.Parser:
|
||||||
|
if self._parser is None:
|
||||||
|
if tree_sitter_go is None:
|
||||||
|
raise ImportError("tree-sitter-go is not installed")
|
||||||
|
if tree_sitter is None:
|
||||||
|
raise ImportError("tree-sitter is not installed")
|
||||||
|
lang = Language(tree_sitter_go.language())
|
||||||
|
self._parser = tree_sitter.Parser()
|
||||||
|
self._parser.set_language(lang)
|
||||||
|
return self._parser
|
||||||
|
|
||||||
|
def get_language(self) -> str:
|
||||||
|
return "go"
|
||||||
|
|
||||||
|
def parse_file(self, file_path: Path) -> list[str]:
|
||||||
"""Extract Go imports from a file."""
|
"""Extract Go imports from a file."""
|
||||||
try:
|
try:
|
||||||
content = file_path.read_text(encoding="utf-8")
|
content = file_path.read_text(encoding="utf-8")
|
||||||
@@ -160,96 +249,88 @@ class GoParser(BaseParser):
|
|||||||
return self._regex_parse(content)
|
return self._regex_parse(content)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = tree_sitter.Parser()
|
parser = self._get_parser()
|
||||||
parser.set_language(tree_sitter_go)
|
|
||||||
tree = parser.parse(bytes(content, "utf-8"))
|
tree = parser.parse(bytes(content, "utf-8"))
|
||||||
return self._extract_imports(tree.root_node, file_path)
|
return self._extract_imports(tree.root_node, content)
|
||||||
except Exception:
|
except Exception:
|
||||||
return self._regex_parse(content)
|
return self._regex_parse(content)
|
||||||
|
|
||||||
def _regex_parse(self, content: str) -> list[Path]:
|
def _regex_parse(self, content: str) -> list[str]:
|
||||||
"""Use regex for fallback parsing."""
|
"""Fallback regex-based parsing for Go."""
|
||||||
imports = []
|
imports = []
|
||||||
|
import_block = re.search(
|
||||||
import re
|
r'\(\s*([\s\S]*?)\s*\)', content, re.MULTILINE
|
||||||
|
)
|
||||||
pattern = r'^\s*import\s+\(([^)]+)\)|\s*"([^"]+)"'
|
|
||||||
matches = re.findall(pattern, content, re.MULTILINE)
|
|
||||||
|
|
||||||
for match in matches:
|
|
||||||
import_block = match[0]
|
|
||||||
single_import = match[1]
|
|
||||||
|
|
||||||
if import_block:
|
if import_block:
|
||||||
for line in import_block.split("\n"):
|
import_lines = import_block.group(1).strip().split("\n")
|
||||||
|
for line in import_lines:
|
||||||
line = line.strip().strip('"')
|
line = line.strip().strip('"')
|
||||||
if line and not line.startswith("/"):
|
if line and not line.startswith("."):
|
||||||
imports.append(Path(line))
|
parts = line.split("/")
|
||||||
elif single_import:
|
if len(parts) >= 2:
|
||||||
if not single_import.startswith("/"):
|
imports.append(f"{parts[0]}/{parts[1]}")
|
||||||
imports.append(Path(single_import))
|
elif parts:
|
||||||
|
imports.append(parts[0])
|
||||||
|
return list(set(imports))
|
||||||
|
|
||||||
return imports
|
def _extract_imports(
|
||||||
|
self, node: tree_sitter.Node, content: str
|
||||||
def _extract_imports(self, node, current_file: Path) -> list[Path]:
|
) -> list[str]:
|
||||||
"""Extract imports using tree-sitter."""
|
"""Extract imports from tree-sitter parse tree."""
|
||||||
imports = []
|
imports = []
|
||||||
|
|
||||||
if node.type == "import_declaration":
|
if node.type == "import_declaration":
|
||||||
for child in node.children:
|
import_str = content[node.start_byte : node.end_byte]
|
||||||
if child.type == "import_spec":
|
match = re.search(r'"([^"]+)"', import_str)
|
||||||
for grandchild in child.children:
|
if match:
|
||||||
if grandchild.type == "string_literal":
|
module = match.group(1)
|
||||||
import_path = grandchild.text.decode("utf-8").strip('"')
|
if not module.startswith("."):
|
||||||
imports.append(Path(import_path))
|
parts = module.split("/")
|
||||||
|
if len(parts) >= 2:
|
||||||
|
imports.append(f"{parts[0]}/{parts[1]}")
|
||||||
|
elif parts:
|
||||||
|
imports.append(parts[0])
|
||||||
|
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
imports.extend(self._extract_imports(child, current_file))
|
imports.extend(self._extract_imports(child, content))
|
||||||
|
|
||||||
return imports
|
return list(set(imports))
|
||||||
|
|
||||||
|
|
||||||
def detect_language(file_path: Path) -> Literal["python", "javascript", "typescript", "go", "unknown"]:
|
def get_parser(language: str) -> DependencyParser:
|
||||||
"""Detect the programming language of a file."""
|
"""Factory function to get the appropriate parser for a language."""
|
||||||
suffix = file_path.suffix.lower()
|
|
||||||
|
|
||||||
language_map = {
|
|
||||||
".py": "python",
|
|
||||||
".js": "javascript",
|
|
||||||
".ts": "typescript",
|
|
||||||
".go": "go",
|
|
||||||
}
|
|
||||||
|
|
||||||
return language_map.get(suffix, "unknown")
|
|
||||||
|
|
||||||
|
|
||||||
def parse_dependencies(file_path: Path, project_root: Path) -> list[Path]:
|
|
||||||
"""Parse dependencies from a file based on its language."""
|
|
||||||
language = detect_language(file_path)
|
|
||||||
|
|
||||||
parsers: dict[str, BaseParser] = {
|
|
||||||
"python": PythonParser(),
|
|
||||||
"javascript": JavaScriptParser(),
|
|
||||||
"typescript": JavaScriptParser(),
|
|
||||||
"go": GoParser(),
|
|
||||||
}
|
|
||||||
|
|
||||||
parser = parsers.get(language)
|
|
||||||
if parser:
|
|
||||||
return parser.parse_file(file_path)
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def get_parser(language: str) -> Optional[BaseParser]:
|
|
||||||
"""Get a parser for the specified language."""
|
|
||||||
parsers = {
|
parsers = {
|
||||||
"python": PythonParser,
|
"python": PythonParser,
|
||||||
"javascript": JavaScriptParser,
|
"javascript": JavaScriptParser,
|
||||||
"typescript": JavaScriptParser,
|
"typescript": lambda: JavaScriptParser(typescript=True),
|
||||||
"go": GoParser,
|
"go": GoParser,
|
||||||
}
|
}
|
||||||
|
parser_class = parsers.get(language.lower())
|
||||||
parser_class = parsers.get(language)
|
if parser_class is None:
|
||||||
if parser_class:
|
raise ValueError(f"Unsupported language: {language}")
|
||||||
return parser_class()
|
return parser_class()
|
||||||
return None
|
|
||||||
|
|
||||||
|
def detect_language(file_path: Path) -> Optional[str]:
|
||||||
|
"""Detect the language of a file based on its extension."""
|
||||||
|
ext_map = {
|
||||||
|
".py": "python",
|
||||||
|
".js": "javascript",
|
||||||
|
".jsx": "javascript",
|
||||||
|
".ts": "typescript",
|
||||||
|
".tsx": "typescript",
|
||||||
|
".go": "go",
|
||||||
|
}
|
||||||
|
return ext_map.get(file_path.suffix.lower())
|
||||||
|
|
||||||
|
|
||||||
|
def parse_dependencies(
|
||||||
|
file_path: Path, language: Optional[str] = None
|
||||||
|
) -> list[str]:
|
||||||
|
"""Parse dependencies from a file."""
|
||||||
|
if language is None:
|
||||||
|
language = detect_language(file_path)
|
||||||
|
if language is None:
|
||||||
|
return []
|
||||||
|
parser = get_parser(language)
|
||||||
|
return parser.parse_file(file_path)
|
||||||
|
|||||||
Reference in New Issue
Block a user