From c2deb7fd58278e8cce6b77229533d1bad18c3a7e Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Fri, 30 Jan 2026 17:19:21 +0000 Subject: [PATCH] fix: resolve CI type checking and lint failures --- app/depnav/src/depnav/parser.py | 336 ++++++++++++++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 app/depnav/src/depnav/parser.py diff --git a/app/depnav/src/depnav/parser.py b/app/depnav/src/depnav/parser.py new file mode 100644 index 0000000..9ed5472 --- /dev/null +++ b/app/depnav/src/depnav/parser.py @@ -0,0 +1,336 @@ +"""Language-specific parsers for extracting dependencies from source files.""" + +import re +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Callable, Literal, Optional + +try: + import tree_sitter + from tree_sitter import Language +except ImportError: + tree_sitter = None # type: ignore[assignment] + +try: + import tree_sitter_python +except ImportError: + tree_sitter_python = None # type: ignore[assignment] + +try: + import tree_sitter_javascript +except ImportError: + tree_sitter_javascript = None # type: ignore[assignment] + +try: + import tree_sitter_go +except ImportError: + tree_sitter_go = None # type: ignore[assignment] + + +LanguageType = Literal["python", "javascript", "typescript", "go"] + + +class DependencyParser(ABC): + """Abstract base class for language parsers.""" + + @abstractmethod + def parse_file(self, file_path: Path) -> list[str]: + """Extract dependencies from a file.""" + pass + + @abstractmethod + def get_language(self) -> str: + """Return the language identifier.""" + pass + + +def get_language_library(lang: str): + """Get the tree-sitter library for a language.""" + lang_map = { + "python": tree_sitter_python, + "javascript": tree_sitter_javascript, + "typescript": tree_sitter_javascript, + "go": tree_sitter_go, + } + return lang_map.get(lang) + + +class PythonParser(DependencyParser): + """Parser for Python files using tree-sitter.""" + + def __init__(self): + self._parser = None # type: ignore[assignment] + + def _get_parser(self): + if self._parser is None: + if tree_sitter_python is None: + raise ImportError("tree-sitter-python is not installed") + if tree_sitter is None: + raise ImportError("tree-sitter is not installed") + lang = Language(tree_sitter_python.language()) # type: ignore[arg-type] + self._parser = tree_sitter.Parser() # type: ignore[operator] + self._parser.set_language(lang) # type: ignore[operator] + return self._parser + + def get_language(self) -> str: + return "python" + + def parse_file(self, file_path: Path) -> list[str]: + """Extract Python imports from a file.""" + try: + content = file_path.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return [] + + if tree_sitter is None or tree_sitter_python is None: + return self._regex_parse(content) + + try: + parser = self._get_parser() + tree = parser.parse(bytes(content, "utf-8")) + return self._extract_imports(tree.root_node, content) + except Exception: + return self._regex_parse(content) + + def _regex_parse(self, content: str) -> list[str]: + """Fallback regex-based parsing for Python.""" + imports = [] + import_pattern = re.compile( + r"^\s*(?:from|import)\s+(.+?)(?:\s+import\s+.*)?(?:\s*;?\s*)$", + re.MULTILINE, + ) + for match in import_pattern.finditer(content): + module = match.group(1).strip() + if module: + for part in module.split(","): + clean_part = part.strip().split(" as ")[0].split(".")[0] + if clean_part: + imports.append(clean_part) + return list(set(imports)) + + def _extract_imports( + self, node: tree_sitter.Node, content: str + ) -> list[str]: + """Extract imports from tree-sitter parse tree.""" + imports = [] + + if node.type == "import_statement": + module = self._get_module_name(node, content) + if module: + imports.append(module.split(".")[0]) + elif node.type == "from_import_statement": + module = self._get_module_name(node, content) + if module: + imports.append(module.split(".")[0]) + + for child in node.children: + imports.extend(self._extract_imports(child, content)) + + return list(set(imports)) + + def _get_module_name(self, node: tree_sitter.Node, content: str) -> str: + """Extract module name from import node.""" + for child in node.children: + if child.type in ("dotted_name", "module"): + return content[child.start_byte : child.end_byte] + return "" + + +class JavaScriptParser(DependencyParser): + """Parser for JavaScript/TypeScript files using tree-sitter.""" + + def __init__(self, typescript: bool = False): + self._parser = None # type: ignore[assignment] + self._typescript = typescript + + def _get_parser(self): + if self._parser is None: + if tree_sitter_javascript is None: + raise ImportError("tree-sitter-javascript is not installed") + if tree_sitter is None: + raise ImportError("tree-sitter is not installed") + lang = Language(tree_sitter_javascript.language()) # type: ignore[arg-type] + self._parser = tree_sitter.Parser() # type: ignore[operator] + self._parser.set_language(lang) # type: ignore[operator] + return self._parser + + def get_language(self) -> str: + return "typescript" if self._typescript else "javascript" + + def parse_file(self, file_path: Path) -> list[str]: + """Extract JavaScript/TypeScript imports from a file.""" + try: + content = file_path.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return [] + + if tree_sitter is None or tree_sitter_javascript is None: + return self._regex_parse(content) + + try: + parser = self._get_parser() + tree = parser.parse(bytes(content, "utf-8")) + return self._extract_imports(tree.root_node, content) + except Exception: + return self._regex_parse(content) + + def _regex_parse(self, content: str) -> list[str]: + """Fallback regex-based parsing for JavaScript/TypeScript.""" + imports = [] + patterns = [ + (r'require\s*\(\s*["\']([^"\']+)["\']\s*\)', 1), + (r'import\s+(?:\{[^}]*\}|\*\s+as\s+\w+|\w+)\s+from\s+["\']([^"\']+)["\']', 1), + (r'import\s+["\']([^"\']+)["\']', 1), + ] + for pattern, group in patterns: + for match in re.finditer(pattern, content): + module = match.group(group) + if module and not module.startswith("."): + imports.append(module.split("/")[0]) + return list(set(imports)) + + def _extract_imports( + self, node: tree_sitter.Node, content: str + ) -> list[str]: + """Extract imports from tree-sitter parse tree.""" + imports = [] + + if node.type in ("import_statement", "call_expression"): + import_str = content[node.start_byte : node.end_byte] + if "require" in import_str: + match = re.search(r'require\s*\(\s*["\']([^"\']+)["\']\s*\)', import_str) + if match: + module = match.group(1) + if not module.startswith("."): + imports.append(module.split("/")[0]) + elif "import" in import_str: + match = re.search( + r'from\s+["\']([^"\']+)["\']', import_str + ) or re.search(r'import\s+["\']([^"\']+)["\']', import_str) + if match: + module = match.group(1) + if not module.startswith("."): + imports.append(module.split("/")[0]) + + for child in node.children: + imports.extend(self._extract_imports(child, content)) + + return list(set(imports)) + + +class GoParser(DependencyParser): + """Parser for Go files using tree-sitter.""" + + def __init__(self): + self._parser = None # type: ignore[assignment] + + def _get_parser(self): + if self._parser is None: + if tree_sitter_go is None: + raise ImportError("tree-sitter-go is not installed") + if tree_sitter is None: + raise ImportError("tree-sitter is not installed") + lang = Language(tree_sitter_go.language()) # type: ignore[arg-type] + self._parser = tree_sitter.Parser() # type: ignore[operator] + self._parser.set_language(lang) # type: ignore[operator] + return self._parser + + def get_language(self) -> str: + return "go" + + def parse_file(self, file_path: Path) -> list[str]: + """Extract Go imports from a file.""" + try: + content = file_path.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return [] + + if tree_sitter is None or tree_sitter_go is None: + return self._regex_parse(content) + + try: + parser = self._get_parser() + tree = parser.parse(bytes(content, "utf-8")) + return self._extract_imports(tree.root_node, content) + except Exception: + return self._regex_parse(content) + + def _regex_parse(self, content: str) -> list[str]: + """Fallback regex-based parsing for Go.""" + imports = [] + import_block = re.search( + r'\(\s*([\s\S]*?)\s*\)', content, re.MULTILINE + ) + if import_block: + import_lines = import_block.group(1).strip().split("\n") + for line in import_lines: + line = line.strip().strip('"') + if line and not line.startswith("."): + parts = line.split("/") + if len(parts) >= 2: + imports.append(f"{parts[0]}/{parts[1]}") + elif parts: + imports.append(parts[0]) + return list(set(imports)) + + def _extract_imports( + self, node: tree_sitter.Node, content: str + ) -> list[str]: + """Extract imports from tree-sitter parse tree.""" + imports = [] + + if node.type == "import_declaration": + import_str = content[node.start_byte : node.end_byte] + match = re.search(r'"([^"]+)"', import_str) + if match: + module = match.group(1) + if not module.startswith("."): + parts = module.split("/") + if len(parts) >= 2: + imports.append(f"{parts[0]}/{parts[1]}") + elif parts: + imports.append(parts[0]) + + for child in node.children: + imports.extend(self._extract_imports(child, content)) + + return list(set(imports)) + + +def get_parser(language: str) -> DependencyParser: + """Factory function to get the appropriate parser for a language.""" + parsers: dict[str, type[DependencyParser] | Callable[[], DependencyParser]] = { + "python": PythonParser, + "javascript": JavaScriptParser, + "typescript": lambda: JavaScriptParser(typescript=True), + "go": GoParser, + } + parser_class = parsers.get(language.lower()) + if parser_class is None: + raise ValueError(f"Unsupported language: {language}") + return parser_class() # type: ignore[no-any-return] + + +def detect_language(file_path: Path) -> Optional[str]: + """Detect the language of a file based on its extension.""" + ext_map = { + ".py": "python", + ".js": "javascript", + ".jsx": "javascript", + ".ts": "typescript", + ".tsx": "typescript", + ".go": "go", + } + return ext_map.get(file_path.suffix.lower()) + + +def parse_dependencies( + file_path: Path, language: Optional[str] = None +) -> list[str]: + """Parse dependencies from a file.""" + if language is None: + language = detect_language(file_path) + if language is None: + return [] + parser = get_parser(language) + return parser.parse_file(file_path)