fix: resolve CI type checking and lint failures
Some checks failed
CI / test (push) Failing after 5s
CI / build (push) Has been skipped

This commit is contained in:
2026-01-30 17:19:21 +00:00
parent f78dd52382
commit c2deb7fd58

View File

@@ -0,0 +1,336 @@
"""Language-specific parsers for extracting dependencies from source files."""
import re
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Callable, Literal, Optional
try:
import tree_sitter
from tree_sitter import Language
except ImportError:
tree_sitter = None # type: ignore[assignment]
try:
import tree_sitter_python
except ImportError:
tree_sitter_python = None # type: ignore[assignment]
try:
import tree_sitter_javascript
except ImportError:
tree_sitter_javascript = None # type: ignore[assignment]
try:
import tree_sitter_go
except ImportError:
tree_sitter_go = None # type: ignore[assignment]
LanguageType = Literal["python", "javascript", "typescript", "go"]
class DependencyParser(ABC):
"""Abstract base class for language parsers."""
@abstractmethod
def parse_file(self, file_path: Path) -> list[str]:
"""Extract dependencies from a file."""
pass
@abstractmethod
def get_language(self) -> str:
"""Return the language identifier."""
pass
def get_language_library(lang: str):
"""Get the tree-sitter library for a language."""
lang_map = {
"python": tree_sitter_python,
"javascript": tree_sitter_javascript,
"typescript": tree_sitter_javascript,
"go": tree_sitter_go,
}
return lang_map.get(lang)
class PythonParser(DependencyParser):
"""Parser for Python files using tree-sitter."""
def __init__(self):
self._parser = None # type: ignore[assignment]
def _get_parser(self):
if self._parser is None:
if tree_sitter_python is None:
raise ImportError("tree-sitter-python is not installed")
if tree_sitter is None:
raise ImportError("tree-sitter is not installed")
lang = Language(tree_sitter_python.language()) # type: ignore[arg-type]
self._parser = tree_sitter.Parser() # type: ignore[operator]
self._parser.set_language(lang) # type: ignore[operator]
return self._parser
def get_language(self) -> str:
return "python"
def parse_file(self, file_path: Path) -> list[str]:
"""Extract Python imports from a file."""
try:
content = file_path.read_text(encoding="utf-8")
except (UnicodeDecodeError, OSError):
return []
if tree_sitter is None or tree_sitter_python is None:
return self._regex_parse(content)
try:
parser = self._get_parser()
tree = parser.parse(bytes(content, "utf-8"))
return self._extract_imports(tree.root_node, content)
except Exception:
return self._regex_parse(content)
def _regex_parse(self, content: str) -> list[str]:
"""Fallback regex-based parsing for Python."""
imports = []
import_pattern = re.compile(
r"^\s*(?:from|import)\s+(.+?)(?:\s+import\s+.*)?(?:\s*;?\s*)$",
re.MULTILINE,
)
for match in import_pattern.finditer(content):
module = match.group(1).strip()
if module:
for part in module.split(","):
clean_part = part.strip().split(" as ")[0].split(".")[0]
if clean_part:
imports.append(clean_part)
return list(set(imports))
def _extract_imports(
self, node: tree_sitter.Node, content: str
) -> list[str]:
"""Extract imports from tree-sitter parse tree."""
imports = []
if node.type == "import_statement":
module = self._get_module_name(node, content)
if module:
imports.append(module.split(".")[0])
elif node.type == "from_import_statement":
module = self._get_module_name(node, content)
if module:
imports.append(module.split(".")[0])
for child in node.children:
imports.extend(self._extract_imports(child, content))
return list(set(imports))
def _get_module_name(self, node: tree_sitter.Node, content: str) -> str:
"""Extract module name from import node."""
for child in node.children:
if child.type in ("dotted_name", "module"):
return content[child.start_byte : child.end_byte]
return ""
class JavaScriptParser(DependencyParser):
"""Parser for JavaScript/TypeScript files using tree-sitter."""
def __init__(self, typescript: bool = False):
self._parser = None # type: ignore[assignment]
self._typescript = typescript
def _get_parser(self):
if self._parser is None:
if tree_sitter_javascript is None:
raise ImportError("tree-sitter-javascript is not installed")
if tree_sitter is None:
raise ImportError("tree-sitter is not installed")
lang = Language(tree_sitter_javascript.language()) # type: ignore[arg-type]
self._parser = tree_sitter.Parser() # type: ignore[operator]
self._parser.set_language(lang) # type: ignore[operator]
return self._parser
def get_language(self) -> str:
return "typescript" if self._typescript else "javascript"
def parse_file(self, file_path: Path) -> list[str]:
"""Extract JavaScript/TypeScript imports from a file."""
try:
content = file_path.read_text(encoding="utf-8")
except (UnicodeDecodeError, OSError):
return []
if tree_sitter is None or tree_sitter_javascript is None:
return self._regex_parse(content)
try:
parser = self._get_parser()
tree = parser.parse(bytes(content, "utf-8"))
return self._extract_imports(tree.root_node, content)
except Exception:
return self._regex_parse(content)
def _regex_parse(self, content: str) -> list[str]:
"""Fallback regex-based parsing for JavaScript/TypeScript."""
imports = []
patterns = [
(r'require\s*\(\s*["\']([^"\']+)["\']\s*\)', 1),
(r'import\s+(?:\{[^}]*\}|\*\s+as\s+\w+|\w+)\s+from\s+["\']([^"\']+)["\']', 1),
(r'import\s+["\']([^"\']+)["\']', 1),
]
for pattern, group in patterns:
for match in re.finditer(pattern, content):
module = match.group(group)
if module and not module.startswith("."):
imports.append(module.split("/")[0])
return list(set(imports))
def _extract_imports(
self, node: tree_sitter.Node, content: str
) -> list[str]:
"""Extract imports from tree-sitter parse tree."""
imports = []
if node.type in ("import_statement", "call_expression"):
import_str = content[node.start_byte : node.end_byte]
if "require" in import_str:
match = re.search(r'require\s*\(\s*["\']([^"\']+)["\']\s*\)', import_str)
if match:
module = match.group(1)
if not module.startswith("."):
imports.append(module.split("/")[0])
elif "import" in import_str:
match = re.search(
r'from\s+["\']([^"\']+)["\']', import_str
) or re.search(r'import\s+["\']([^"\']+)["\']', import_str)
if match:
module = match.group(1)
if not module.startswith("."):
imports.append(module.split("/")[0])
for child in node.children:
imports.extend(self._extract_imports(child, content))
return list(set(imports))
class GoParser(DependencyParser):
"""Parser for Go files using tree-sitter."""
def __init__(self):
self._parser = None # type: ignore[assignment]
def _get_parser(self):
if self._parser is None:
if tree_sitter_go is None:
raise ImportError("tree-sitter-go is not installed")
if tree_sitter is None:
raise ImportError("tree-sitter is not installed")
lang = Language(tree_sitter_go.language()) # type: ignore[arg-type]
self._parser = tree_sitter.Parser() # type: ignore[operator]
self._parser.set_language(lang) # type: ignore[operator]
return self._parser
def get_language(self) -> str:
return "go"
def parse_file(self, file_path: Path) -> list[str]:
"""Extract Go imports from a file."""
try:
content = file_path.read_text(encoding="utf-8")
except (UnicodeDecodeError, OSError):
return []
if tree_sitter is None or tree_sitter_go is None:
return self._regex_parse(content)
try:
parser = self._get_parser()
tree = parser.parse(bytes(content, "utf-8"))
return self._extract_imports(tree.root_node, content)
except Exception:
return self._regex_parse(content)
def _regex_parse(self, content: str) -> list[str]:
"""Fallback regex-based parsing for Go."""
imports = []
import_block = re.search(
r'\(\s*([\s\S]*?)\s*\)', content, re.MULTILINE
)
if import_block:
import_lines = import_block.group(1).strip().split("\n")
for line in import_lines:
line = line.strip().strip('"')
if line and not line.startswith("."):
parts = line.split("/")
if len(parts) >= 2:
imports.append(f"{parts[0]}/{parts[1]}")
elif parts:
imports.append(parts[0])
return list(set(imports))
def _extract_imports(
self, node: tree_sitter.Node, content: str
) -> list[str]:
"""Extract imports from tree-sitter parse tree."""
imports = []
if node.type == "import_declaration":
import_str = content[node.start_byte : node.end_byte]
match = re.search(r'"([^"]+)"', import_str)
if match:
module = match.group(1)
if not module.startswith("."):
parts = module.split("/")
if len(parts) >= 2:
imports.append(f"{parts[0]}/{parts[1]}")
elif parts:
imports.append(parts[0])
for child in node.children:
imports.extend(self._extract_imports(child, content))
return list(set(imports))
def get_parser(language: str) -> DependencyParser:
"""Factory function to get the appropriate parser for a language."""
parsers: dict[str, type[DependencyParser] | Callable[[], DependencyParser]] = {
"python": PythonParser,
"javascript": JavaScriptParser,
"typescript": lambda: JavaScriptParser(typescript=True),
"go": GoParser,
}
parser_class = parsers.get(language.lower())
if parser_class is None:
raise ValueError(f"Unsupported language: {language}")
return parser_class() # type: ignore[no-any-return]
def detect_language(file_path: Path) -> Optional[str]:
"""Detect the language of a file based on its extension."""
ext_map = {
".py": "python",
".js": "javascript",
".jsx": "javascript",
".ts": "typescript",
".tsx": "typescript",
".go": "go",
}
return ext_map.get(file_path.suffix.lower())
def parse_dependencies(
file_path: Path, language: Optional[str] = None
) -> list[str]:
"""Parse dependencies from a file."""
if language is None:
language = detect_language(file_path)
if language is None:
return []
parser = get_parser(language)
return parser.parse_file(file_path)