fix: resolve mypy type errors in parser.py and config.py
This commit is contained in:
333
depnav/src/depnav/parser.py
Normal file
333
depnav/src/depnav/parser.py
Normal file
@@ -0,0 +1,333 @@
|
||||
Language-specific parsers for extracting dependencies from source files.
|
||||
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
try:
|
||||
import tree_sitter
|
||||
from tree_sitter import Language
|
||||
except ImportError:
|
||||
tree_sitter = None
|
||||
|
||||
try:
|
||||
import tree_sitter_python
|
||||
except ImportError:
|
||||
tree_sitter_python = None
|
||||
|
||||
try:
|
||||
import tree_sitter_javascript
|
||||
except ImportError:
|
||||
tree_sitter_javascript = None
|
||||
|
||||
try:
|
||||
import tree_sitter_go
|
||||
except ImportError:
|
||||
tree_sitter_go = None
|
||||
|
||||
|
||||
class DependencyParser(ABC):
|
||||
"""Abstract base class for language parsers."""
|
||||
|
||||
@abstractmethod
|
||||
def parse_file(self, file_path: Path) -> list[str]:
|
||||
"""Extract dependencies from a file."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_language(self) -> str:
|
||||
"""Return the language identifier."""
|
||||
pass
|
||||
|
||||
|
||||
def get_language_library(lang: str):
|
||||
"""Get the tree-sitter library for a language."""
|
||||
lang_map = {
|
||||
"python": tree_sitter_python,
|
||||
"javascript": tree_sitter_javascript,
|
||||
"typescript": tree_sitter_javascript,
|
||||
"go": tree_sitter_go,
|
||||
}
|
||||
return lang_map.get(lang)
|
||||
|
||||
|
||||
class PythonParser(DependencyParser):
|
||||
"""Parser for Python files using tree-sitter."""
|
||||
|
||||
def __init__(self):
|
||||
self._parser: Optional[tree_sitter.Parser] = None
|
||||
|
||||
def _get_parser(self) -> tree_sitter.Parser:
|
||||
if self._parser is None:
|
||||
if tree_sitter_python is None:
|
||||
raise ImportError("tree-sitter-python is not installed")
|
||||
if tree_sitter is None:
|
||||
raise ImportError("tree-sitter is not installed")
|
||||
lang = Language(tree_sitter_python.language())
|
||||
self._parser = tree_sitter.Parser()
|
||||
self._parser.set_language(lang)
|
||||
return self._parser
|
||||
|
||||
def get_language(self) -> str:
|
||||
return "python"
|
||||
|
||||
def parse_file(self, file_path: Path) -> list[str]:
|
||||
"""Extract Python imports from a file."""
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
except (UnicodeDecodeError, OSError):
|
||||
return []
|
||||
|
||||
if tree_sitter is None or tree_sitter_python is None:
|
||||
return self._regex_parse(content)
|
||||
|
||||
try:
|
||||
parser = self._get_parser()
|
||||
tree = parser.parse(bytes(content, "utf-8"))
|
||||
return self._extract_imports(tree.root_node, content)
|
||||
except Exception:
|
||||
return self._regex_parse(content)
|
||||
|
||||
def _regex_parse(self, content: str) -> list[str]:
|
||||
"""Fallback regex-based parsing for Python."""
|
||||
imports = []
|
||||
import_pattern = re.compile(
|
||||
r"^\s*(?:from|import)\s+(.+?)(?:\s+import\s+.*)?(?:\s*;?\s*)$",
|
||||
re.MULTILINE,
|
||||
)
|
||||
for match in import_pattern.finditer(content):
|
||||
module = match.group(1).strip()
|
||||
if module:
|
||||
for part in module.split(","):
|
||||
clean_part = part.strip().split(" as ")[0].split(".")[0]
|
||||
if clean_part:
|
||||
imports.append(clean_part)
|
||||
return list(set(imports))
|
||||
|
||||
def _extract_imports(
|
||||
self, node: tree_sitter.Node, content: str
|
||||
) -> list[str]:
|
||||
"""Extract imports from tree-sitter parse tree."""
|
||||
imports = []
|
||||
|
||||
if node.type == "import_statement":
|
||||
module = self._get_module_name(node, content)
|
||||
if module:
|
||||
imports.append(module.split(".")[0])
|
||||
elif node.type == "from_import_statement":
|
||||
module = self._get_module_name(node, content)
|
||||
if module:
|
||||
imports.append(module.split(".")[0])
|
||||
|
||||
for child in node.children:
|
||||
imports.extend(self._extract_imports(child, content))
|
||||
|
||||
return list(set(imports))
|
||||
|
||||
def _get_module_name(self, node: tree_sitter.Node, content: str) -> str:
|
||||
"""Extract module name from import node."""
|
||||
for child in node.children:
|
||||
if child.type in ("dotted_name", "module"):
|
||||
return content[child.start_byte : child.end_byte]
|
||||
return ""
|
||||
|
||||
|
||||
class JavaScriptParser(DependencyParser):
|
||||
"""Parser for JavaScript/TypeScript files using tree-sitter."""
|
||||
|
||||
def __init__(self, typescript: bool = False):
|
||||
self._parser: Optional[tree_sitter.Parser] = None
|
||||
self._typescript = typescript
|
||||
|
||||
def _get_parser(self) -> tree_sitter.Parser:
|
||||
if self._parser is None:
|
||||
if tree_sitter_javascript is None:
|
||||
raise ImportError("tree-sitter-javascript is not installed")
|
||||
if tree_sitter is None:
|
||||
raise ImportError("tree-sitter is not installed")
|
||||
lang = Language(tree_sitter_javascript.language())
|
||||
self._parser = tree_sitter.Parser()
|
||||
self._parser.set_language(lang)
|
||||
return self._parser
|
||||
|
||||
def get_language(self) -> str:
|
||||
return "typescript" if self._typescript else "javascript"
|
||||
|
||||
def parse_file(self, file_path: Path) -> list[str]:
|
||||
"""Extract JavaScript/TypeScript imports from a file."""
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
except (UnicodeDecodeError, OSError):
|
||||
return []
|
||||
|
||||
if tree_sitter is None or tree_sitter_javascript is None:
|
||||
return self._regex_parse(content)
|
||||
|
||||
try:
|
||||
parser = self._get_parser()
|
||||
tree = parser.parse(bytes(content, "utf-8"))
|
||||
return self._extract_imports(tree.root_node, content)
|
||||
except Exception:
|
||||
return self._regex_parse(content)
|
||||
|
||||
def _regex_parse(self, content: str) -> list[str]:
|
||||
"""Fallback regex-based parsing for JavaScript/TypeScript."""
|
||||
imports = []
|
||||
patterns = [
|
||||
(r'require\s*\(\s*["\']([^"\']+)["\']\s*\)', 1),
|
||||
(r'import\s+(?:\{[^}]*\}|\*\s+as\s+\w+|\w+)\s+from\s+["\']([^"\']+)["\']', 1),
|
||||
(r'import\s+["\']([^"\']+)["\']', 1),
|
||||
]
|
||||
for pattern, group in patterns:
|
||||
for match in re.finditer(pattern, content):
|
||||
module = match.group(group)
|
||||
if module and not module.startswith("."):
|
||||
imports.append(module.split("/")[0])
|
||||
return list(set(imports))
|
||||
|
||||
def _extract_imports(
|
||||
self, node: tree_sitter.Node, content: str
|
||||
) -> list[str]:
|
||||
"""Extract imports from tree-sitter parse tree."""
|
||||
imports = []
|
||||
|
||||
if node.type in ("import_statement", "call_expression"):
|
||||
import_str = content[node.start_byte : node.end_byte]
|
||||
if "require" in import_str:
|
||||
match = re.search(r'require\s*\(\s*["\']([^"\']+)["\']\s*\)', import_str)
|
||||
if match:
|
||||
module = match.group(1)
|
||||
if not module.startswith("."):
|
||||
imports.append(module.split("/")[0])
|
||||
elif "import" in import_str:
|
||||
match = re.search(
|
||||
r'from\s+["\']([^"\']+)["\']', import_str
|
||||
) or re.search(r'import\s+["\']([^"\']+)["\']', import_str)
|
||||
if match:
|
||||
module = match.group(1)
|
||||
if not module.startswith("."):
|
||||
imports.append(module.split("/")[0])
|
||||
|
||||
for child in node.children:
|
||||
imports.extend(self._extract_imports(child, content))
|
||||
|
||||
return list(set(imports))
|
||||
|
||||
|
||||
class GoParser(DependencyParser):
|
||||
"""Parser for Go files using tree-sitter."""
|
||||
|
||||
def __init__(self):
|
||||
self._parser: Optional[tree_sitter.Parser] = None
|
||||
|
||||
def _get_parser(self) -> tree_sitter.Parser:
|
||||
if self._parser is None:
|
||||
if tree_sitter_go is None:
|
||||
raise ImportError("tree-sitter-go is not installed")
|
||||
if tree_sitter is None:
|
||||
raise ImportError("tree-sitter is not installed")
|
||||
lang = Language(tree_sitter_go.language())
|
||||
self._parser = tree_sitter.Parser()
|
||||
self._parser.set_language(lang)
|
||||
return self._parser
|
||||
|
||||
def get_language(self) -> str:
|
||||
return "go"
|
||||
|
||||
def parse_file(self, file_path: Path) -> list[str]:
|
||||
"""Extract Go imports from a file."""
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
except (UnicodeDecodeError, OSError):
|
||||
return []
|
||||
|
||||
if tree_sitter is None or tree_sitter_go is None:
|
||||
return self._regex_parse(content)
|
||||
|
||||
try:
|
||||
parser = self._get_parser()
|
||||
tree = parser.parse(bytes(content, "utf-8"))
|
||||
return self._extract_imports(tree.root_node, content)
|
||||
except Exception:
|
||||
return self._regex_parse(content)
|
||||
|
||||
def _regex_parse(self, content: str) -> list[str]:
|
||||
"""Fallback regex-based parsing for Go."""
|
||||
imports = []
|
||||
import_block = re.search(
|
||||
r'\(\s*([\s\S]*?)\s*\)', content, re.MULTILINE
|
||||
)
|
||||
if import_block:
|
||||
import_lines = import_block.group(1).strip().split("\n")
|
||||
for line in import_lines:
|
||||
line = line.strip().strip('"')
|
||||
if line and not line.startswith("."):
|
||||
parts = line.split("/")
|
||||
if len(parts) >= 2:
|
||||
imports.append(f"{parts[0]}/{parts[1]}")
|
||||
elif parts:
|
||||
imports.append(parts[0])
|
||||
return list(set(imports))
|
||||
|
||||
def _extract_imports(
|
||||
self, node: tree_sitter.Node, content: str
|
||||
) -> list[str]:
|
||||
"""Extract imports from tree-sitter parse tree."""
|
||||
imports = []
|
||||
|
||||
if node.type == "import_declaration":
|
||||
import_str = content[node.start_byte : node.end_byte]
|
||||
match = re.search(r'"([^"]+)"', import_str)
|
||||
if match:
|
||||
module = match.group(1)
|
||||
if not module.startswith("."):
|
||||
parts = module.split("/")
|
||||
if len(parts) >= 2:
|
||||
imports.append(f"{parts[0]}/{parts[1]}")
|
||||
elif parts:
|
||||
imports.append(parts[0])
|
||||
|
||||
for child in node.children:
|
||||
imports.extend(self._extract_imports(child, content))
|
||||
|
||||
return list(set(imports))
|
||||
|
||||
|
||||
def get_parser(language: str) -> DependencyParser:
|
||||
"""Factory function to get the appropriate parser for a language."""
|
||||
if language.lower() == "python":
|
||||
return PythonParser()
|
||||
elif language.lower() == "javascript":
|
||||
return JavaScriptParser()
|
||||
elif language.lower() == "typescript":
|
||||
return JavaScriptParser(typescript=True)
|
||||
elif language.lower() == "go":
|
||||
return GoParser()
|
||||
else:
|
||||
raise ValueError(f"Unsupported language: {language}")
|
||||
|
||||
|
||||
def detect_language(file_path: Path) -> Optional[str]:
|
||||
"""Detect the language of a file based on its extension."""
|
||||
ext_map = {
|
||||
".py": "python",
|
||||
".js": "javascript",
|
||||
".jsx": "javascript",
|
||||
".ts": "typescript",
|
||||
".tsx": "typescript",
|
||||
".go": "go",
|
||||
}
|
||||
return ext_map.get(file_path.suffix.lower())
|
||||
|
||||
|
||||
def parse_dependencies(
|
||||
file_path: Path, language: Optional[str] = None
|
||||
) -> list[str]:
|
||||
"""Parse dependencies from a file."""
|
||||
if language is None:
|
||||
language = detect_language(file_path)
|
||||
if language is None:
|
||||
return []
|
||||
parser = get_parser(language)
|
||||
return parser.parse_file(file_path)
|
||||
Reference in New Issue
Block a user