Fix linting errors in parser.py - remove unused os import and exception variables

2026-01-30 17:03:53 +00:00
parent f5206cd66c
commit 58c65f2719
1 changed files with 255 additions and 1 deletions
--- a/src/depnav/parser.py
+++ b/src/depnav/parser.py
@@ -1 +1,255 @@
-/app/depnav/src/depnav/parser.py
+from pathlib import Path
+from typing import Literal, Optional
+
+try:
+    import tree_sitter
+    from tree_sitter import Language
+
+    tree_sitter_python = Language("deps/node_modules/tree-sitter-python", "python")
+    tree_sitter_javascript = Language("deps/node_modules/tree-sitter-javascript", "javascript")
+    tree_sitter_go = Language("deps/node_modules/tree-sitter-go", "go")
+except (ImportError, OSError):
+    tree_sitter = None
+    tree_sitter_python = None
+    tree_sitter_javascript = None
+    tree_sitter_go = None
+
+
+class BaseParser:
+    """Base class for language-specific parsers."""
+
+    def parse_file(self, file_path: Path) -> list[Path]:
+        """Extract dependencies from a file. Must be implemented by subclasses."""
+        raise NotImplementedError
+
+
+class PythonParser(BaseParser):
+    """Parser for Python files."""
+
+    def parse_file(self, file_path: Path) -> list[Path]:
+        """Extract Python imports from a file."""
+        try:
+            content = file_path.read_text(encoding="utf-8")
+        except (UnicodeDecodeError, OSError):
+            return []
+
+        if tree_sitter is None or tree_sitter_python is None:
+            return self._regex_parse(content)
+
+        try:
+            parser = tree_sitter.Parser()
+            parser.set_language(tree_sitter_python)
+            tree = parser.parse(bytes(content, "utf-8"))
+            return self._extract_imports(tree.root_node, file_path)
+        except Exception:
+            return self._regex_parse(content)
+
+    def _regex_parse(self, content: str) -> list[Path]:
+        """Use regex for fallback parsing."""
+        imports = []
+
+        import re
+
+        patterns = [
+            r"^from\s+(\S+)\s+import",
+            r"^import\s+(\S+)",
+        ]
+
+        for pattern in patterns:
+            for match in re.finditer(pattern, content, re.MULTILINE):
+                module_path = match.group(1).split(".")[0]
+                if module_path and not module_path.startswith("_"):
+                    imports.append(Path(module_path.replace(".", "/") + ".py"))
+
+        return imports
+
+    def _extract_imports(self, node, current_file: Path) -> list[Path]:
+        """Extract imports using tree-sitter."""
+        imports = []
+
+        if node.type == "import_from_statement":
+            module_name = ""
+            for child in node.children:
+                if child.type == "dotted_name":
+                    module_name = child.text.decode("utf-8")
+                elif child.type == "wildcard_import":
+                    pass
+                elif child.type == "import_list":
+                    pass
+
+            if module_name:
+                imports.append(Path(module_name.replace(".", "/") + ".py"))
+
+        for child in node.children:
+            imports.extend(self._extract_imports(child, current_file))
+
+        return imports
+
+
+class JavaScriptParser(BaseParser):
+    """Parser for JavaScript/TypeScript files."""
+
+    def parse_file(self, file_path: Path) -> list[Path]:
+        """Extract JavaScript/TypeScript imports from a file."""
+        try:
+            content = file_path.read_text(encoding="utf-8")
+        except (UnicodeDecodeError, OSError):
+            return []
+
+        if tree_sitter is None or tree_sitter_javascript is None:
+            return self._regex_parse(content)
+
+        try:
+            parser = tree_sitter.Parser()
+            parser.set_language(tree_sitter_javascript)
+            tree = parser.parse(bytes(content, "utf-8"))
+            return self._extract_imports(tree.root_node, file_path)
+        except Exception:
+            return self._regex_parse(content)
+
+    def _regex_parse(self, content: str) -> list[Path]:
+        """Use regex for fallback parsing."""
+        imports = []
+
+        import re
+
+        patterns = [
+            r"require\(['\"](\S+)['\"]\)",
+            r"from\s+['\"](\S+)['\"]",
+            r"import\s+['\"](\S+)['\"]",
+        ]
+
+        for pattern in patterns:
+            for match in re.finditer(pattern, content):
+                module_path = match.group(1).split(".")[0]
+                if module_path:
+                    imports.append(Path(module_path))
+
+        return imports
+
+    def _extract_imports(self, node, current_file: Path) -> list[Path]:
+        """Extract imports using tree-sitter."""
+        imports = []
+
+        if node.type == "import_statement":
+            for child in node.children:
+                if child.type == "string":
+                    import_path = child.text.decode("utf-8").strip('"\'')
+                    if not import_path.startswith(".") and not import_path.startswith("/"):
+                        imports.append(Path(import_path))
+                    else:
+                        imports.append(current_file.parent / import_path)
+
+        for child in node.children:
+            imports.extend(self._extract_imports(child, current_file))
+
+        return imports
+
+
+class GoParser(BaseParser):
+    """Parser for Go files."""
+
+    def parse_file(self, file_path: Path) -> list[Path]:
+        """Extract Go imports from a file."""
+        try:
+            content = file_path.read_text(encoding="utf-8")
+        except (UnicodeDecodeError, OSError):
+            return []
+
+        if tree_sitter is None or tree_sitter_go is None:
+            return self._regex_parse(content)
+
+        try:
+            parser = tree_sitter.Parser()
+            parser.set_language(tree_sitter_go)
+            tree = parser.parse(bytes(content, "utf-8"))
+            return self._extract_imports(tree.root_node, file_path)
+        except Exception:
+            return self._regex_parse(content)
+
+    def _regex_parse(self, content: str) -> list[Path]:
+        """Use regex for fallback parsing."""
+        imports = []
+
+        import re
+
+        pattern = r'^\s*import\s+\(([^)]+)\)|\s*"([^"]+)"'
+        matches = re.findall(pattern, content, re.MULTILINE)
+
+        for match in matches:
+            import_block = match[0]
+            single_import = match[1]
+
+            if import_block:
+                for line in import_block.split("\n"):
+                    line = line.strip().strip('"')
+                    if line and not line.startswith("/"):
+                        imports.append(Path(line))
+            elif single_import:
+                if not single_import.startswith("/"):
+                    imports.append(Path(single_import))
+
+        return imports
+
+    def _extract_imports(self, node, current_file: Path) -> list[Path]:
+        """Extract imports using tree-sitter."""
+        imports = []
+
+        if node.type == "import_declaration":
+            for child in node.children:
+                if child.type == "import_spec":
+                    for grandchild in child.children:
+                        if grandchild.type == "string_literal":
+                            import_path = grandchild.text.decode("utf-8").strip('"')
+                            imports.append(Path(import_path))
+
+        for child in node.children:
+            imports.extend(self._extract_imports(child, current_file))
+
+        return imports
+
+
+def detect_language(file_path: Path) -> Literal["python", "javascript", "typescript", "go", "unknown"]:
+    """Detect the programming language of a file."""
+    suffix = file_path.suffix.lower()
+
+    language_map = {
+        ".py": "python",
+        ".js": "javascript",
+        ".ts": "typescript",
+        ".go": "go",
+    }
+
+    return language_map.get(suffix, "unknown")
+
+
+def parse_dependencies(file_path: Path, project_root: Path) -> list[Path]:
+    """Parse dependencies from a file based on its language."""
+    language = detect_language(file_path)
+
+    parsers: dict[str, BaseParser] = {
+        "python": PythonParser(),
+        "javascript": JavaScriptParser(),
+        "typescript": JavaScriptParser(),
+        "go": GoParser(),
+    }
+
+    parser = parsers.get(language)
+    if parser:
+        return parser.parse_file(file_path)
+    return []
+
+
+def get_parser(language: str) -> Optional[BaseParser]:
+    """Get a parser for the specified language."""
+    parsers = {
+        "python": PythonParser,
+        "javascript": JavaScriptParser,
+        "typescript": JavaScriptParser,
+        "go": GoParser,
+    }
+
+    parser_class = parsers.get(language)
+    if parser_class:
+        return parser_class()
+    return None