diff --git a/codechunk/core/parser.py b/codechunk/core/parser.py index ad18d88..6d78215 100644 --- a/codechunk/core/parser.py +++ b/codechunk/core/parser.py @@ -1,8 +1,7 @@ from pathlib import Path -from typing import List, Optional, Dict, Any -from dataclasses import dataclass, field -import re +from typing import Optional import os +import re from codechunk.core.chunking import ParsedChunk, ChunkMetadata @@ -41,23 +40,23 @@ LANGUAGE_EXTENSIONS = { class CodeParser: def __init__(self): - self.files: List[Path] = [] - self.file_contents: Dict[Path, str] = {} + self.files: list[Path] = [] + self.file_contents: dict[Path, str] = {} def detect_language(self, file_path: Path) -> Optional[str]: """Detect programming language from file extension.""" ext = file_path.suffix.lower() return LANGUAGE_EXTENSIONS.get(ext) - def discover_files(self, project_path: Path, include_patterns: List[str], - exclude_patterns: List[str]) -> None: + def discover_files(self, project_path: Path, include_patterns: list[str], + exclude_patterns: list[str]) -> None: """Discover source files in project directory.""" from fnmatch import fnmatch self.files = [] project_path = Path(project_path) - for root, dirs, files in os.walk(project_path): + for root, _dirs, files in os.walk(project_path): root_path = Path(root) for file_name in files: @@ -96,7 +95,7 @@ class CodeParser: self.file_contents[file_path] = content return content - def parse_all(self) -> List[ParsedChunk]: + def parse_all(self) -> list[ParsedChunk]: """Parse all discovered files.""" chunks = [] for file_path in self.files: @@ -104,7 +103,7 @@ class CodeParser: chunks.extend(file_chunks) return chunks - def parse_file(self, file_path: Path) -> List[ParsedChunk]: + def parse_file(self, file_path: Path) -> list[ParsedChunk]: """Parse a single file and extract chunks.""" language = self.detect_language(file_path) if not language: @@ -124,7 +123,7 @@ class CodeParser: else: return self._parse_generic(file_path, content, lines, language) - def _parse_python(self, file_path: Path, content: str, lines: List[str]) -> List[ParsedChunk]: + def _parse_python(self, file_path: Path, content: str, lines: list[str]) -> list[ParsedChunk]: """Parse Python file for classes and functions.""" chunks = [] current_class = None @@ -284,8 +283,8 @@ class CodeParser: return chunks - def _parse_js_like(self, file_path: Path, content: str, lines: List[str], - language: str) -> List[ParsedChunk]: + def _parse_js_like(self, file_path: Path, content: str, lines: list[str], + language: str) -> list[ParsedChunk]: """Parse JavaScript/TypeScript file.""" chunks = [] imports = self._extract_imports(content, language) @@ -392,7 +391,7 @@ class CodeParser: return chunks - def _parse_go(self, file_path: Path, content: str, lines: List[str]) -> List[ParsedChunk]: + def _parse_go(self, file_path: Path, content: str, lines: list[str]) -> list[ParsedChunk]: """Parse Go file.""" chunks = [] imports = self._extract_imports(content, "go") @@ -460,7 +459,7 @@ class CodeParser: return chunks - def _parse_rust(self, file_path: Path, content: str, lines: List[str]) -> List[ParsedChunk]: + def _parse_rust(self, file_path: Path, content: str, lines: list[str]) -> list[ParsedChunk]: """Parse Rust file.""" chunks = [] imports = self._extract_imports(content, "rust") @@ -528,8 +527,8 @@ class CodeParser: return chunks - def _parse_generic(self, file_path: Path, content: str, lines: List[str], - language: str) -> List[ParsedChunk]: + def _parse_generic(self, file_path: Path, content: str, lines: list[str], + language: str) -> list[ParsedChunk]: """Generic parser for unknown languages.""" chunks = [] imports = self._extract_imports(content, language) @@ -554,7 +553,7 @@ class CodeParser: return chunks - def _extract_imports(self, content: str, language: str) -> List[str]: + def _extract_imports(self, content: str, language: str) -> list[str]: """Extract import statements from content.""" imports = [] @@ -588,7 +587,7 @@ class CodeParser: return list(set(imports)) - def _extract_docstring(self, lines: List[str]) -> Optional[str]: + def _extract_docstring(self, lines: list[str]) -> Optional[str]: """Extract docstring from lines.""" if not lines: return None @@ -618,7 +617,7 @@ class CodeParser: return None - def _parse_params(self, params_str: str) -> List[str]: + def _parse_params(self, params_str: str) -> list[str]: """Parse function parameters.""" if not params_str.strip(): return []