diff --git a/vibeguard/utils/file_finder.py b/vibeguard/utils/file_finder.py new file mode 100644 index 0000000..ab12b7a --- /dev/null +++ b/vibeguard/utils/file_finder.py @@ -0,0 +1,77 @@ +"""File finder for VibeGuard.""" + +import os +from pathlib import Path +from typing import Generator + +import pathspec + + +class FileFinder: + """Finds files to analyze based on patterns.""" + + LANGUAGE_EXTENSIONS = { + ".py": "python", + ".pyi": "python", + ".js": "javascript", + ".jsx": "javascript", + ".ts": "typescript", + ".tsx": "typescript", + ".go": "go", + ".rs": "rust", + } + + def __init__(self, ignore_patterns: list[str] | None = None) -> None: + """Initialize file finder with ignore patterns.""" + self.ignore_patterns = ignore_patterns or [] + self._spec = pathspec.PathSpec.from_lines("gitwildmatch", self.ignore_patterns) + + def find_files( + self, path: str | Path, recursive: bool = True + ) -> list[Path]: + """Find all source files in the given path.""" + root = Path(path) + + if not root.exists(): + return [] + + if root.is_file() and self._is_source_file(root): + return [root] + + files: list[Path] = [] + + if root.is_dir(): + for item in root.rglob("*") if recursive else root.iterdir(): + if item.is_file() and self._is_source_file(item): + if not self._should_ignore(item): + files.append(item) + + return sorted(files) + + def _is_source_file(self, path: Path) -> bool: + """Check if the file is a supported source file.""" + return path.suffix.lower() in self.LANGUAGE_EXTENSIONS + + def _should_ignore(self, path: Path) -> bool: + """Check if the path should be ignored.""" + path_str = str(path) + return self._spec.match_file(path_str) + + def get_language(self, path: Path) -> str | None: + """Get the language for a file path.""" + return self.LANGUAGE_EXTENSIONS.get(path.suffix.lower()) + + def get_supported_extensions(self) -> list[str]: + """Get list of supported file extensions.""" + return list(self.LANGUAGE_EXTENSIONS.keys()) + + def get_language_counts(self, files: list[Path]) -> dict[str, int]: + """Count files by language.""" + counts: dict[str, int] = {} + + for file_path in files: + lang = self.get_language(file_path) + if lang: + counts[lang] = counts.get(lang, 0) + 1 + + return counts