diff --git a/src/analyzer.py b/src/analyzer.py new file mode 100644 index 0000000..49677e9 --- /dev/null +++ b/src/analyzer.py @@ -0,0 +1,215 @@ +"""Git change analysis for staged changes.""" + +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import List, Optional + +from git import Diff, Repo +from git.exc import InvalidGitRepositoryError + + +class ChangeType(Enum): + """Enum representing types of git changes.""" + ADDED = "added" + DELETED = "deleted" + MODIFIED = "modified" + RENAMED = "renamed" + TYPE_CHANGE = "type_change" + UNMERGED = "unmerged" + UNKNOWN = "unknown" + + +@dataclass +class StagedChange: + """Represents a single staged change.""" + path: str + change_type: ChangeType + old_path: Optional[str] = None + new_path: Optional[str] = None + + @property + def filename(self) -> str: + """Get the filename from the path.""" + return Path(self.path).name + + @property + def is_new(self) -> bool: + """Check if this is a new file.""" + return self.change_type == ChangeType.ADDED + + @property + def is_deleted(self) -> bool: + """Check if this file was deleted.""" + return self.change_type == ChangeType.DELETED + + +@dataclass +class ChangeSet: + """Collection of staged changes.""" + changes: List[StagedChange] + + @property + def added(self) -> List[StagedChange]: + """Get list of added files.""" + return [c for c in self.changes if c.change_type == ChangeType.ADDED] + + @property + def deleted(self) -> List[StagedChange]: + """Get list of deleted files.""" + return [c for c in self.changes if c.change_type == ChangeType.DELETED] + + @property + def modified(self) -> List[StagedChange]: + """Get list of modified files.""" + return [c for c in self.changes if c.change_type == ChangeType.MODIFIED] + + @property + def renamed(self) -> List[StagedChange]: + """Get list of renamed files.""" + return [c for c in self.changes if c.change_type == ChangeType.RENAMED] + + @property + def total_count(self) -> int: + """Get total number of changes.""" + return len(self.changes) + + @property + def file_paths(self) -> List[str]: + """Get list of all file paths.""" + return [c.path for c in self.changes] + + @property + def has_changes(self) -> bool: + """Check if there are any changes.""" + return len(self.changes) > 0 + + +class ChangeAnalyzer: + """Analyzes staged git changes.""" + + def __init__(self, repo_path: Optional[str] = None): + """Initialize the analyzer. + + Args: + repo_path: Optional path to git repository. Uses current directory if not provided. + """ + self.repo_path = repo_path + self._repo: Optional[Repo] = None + + @property + def repo(self) -> Repo: + """Get the git repository.""" + if self._repo is None: + try: + path = self.repo_path or "." + self._repo = Repo(path) + except InvalidGitRepositoryError: + raise ValueError(f"Not a git repository: {self.repo_path or 'current directory'}") + return self._repo + + def get_staged_changes(self) -> ChangeSet: + """Get all staged changes in the repository. + + Returns: + ChangeSet containing all staged changes. + + Raises: + ValueError: If not in a git repository. + """ + try: + staged_diff = self.repo.index.diff("HEAD") + staged_new = self.repo.index.diff(None) + + changes = [] + for diff in staged_diff: + change = self._diff_to_change(diff) + if change: + changes.append(change) + + for diff in staged_new: + change = self._diff_to_change(diff) + if change: + changes.append(change) + + unmerged = self.repo.index.unmerged_blobs() + for path, (stage_a, stage_b, stage_c) in unmerged.items(): + changes.append(StagedChange( + path=path, + change_type=ChangeType.UNMERGED + )) + + return ChangeSet(changes) + + except InvalidGitRepositoryError: + raise ValueError(f"Not a git repository: {self.repo_path or 'current directory'}") + + def _diff_to_change(self, diff: Diff) -> Optional[StagedChange]: + """Convert a git Diff object to a StagedChange. + + Args: + diff: Git Diff object. + + Returns: + StagedChange object or None if conversion fails. + """ + try: + change_type = self._get_change_type(diff) + return StagedChange( + path=diff.b_path or diff.a_path or "", + change_type=change_type, + old_path=diff.a_path, + new_path=diff.b_path + ) + except (AttributeError, TypeError): + return None + + def _get_change_type(self, diff: Diff) -> ChangeType: + """Determine the change type from a Diff object. + + Args: + diff: Git Diff object. + + Returns: + ChangeType enum value. + """ + if diff.new_file: + return ChangeType.ADDED + elif diff.deleted_file: + return ChangeType.DELETED + elif diff.renamed_file: + return ChangeType.RENAMED + elif diff.type_changed: + return ChangeType.TYPE_CHANGE + elif diff.a_path and diff.b_path and diff.a_path != diff.b_path: + return ChangeType.RENAMED + else: + return ChangeType.MODIFIED + + def get_changed_extensions(self) -> List[str]: + """Get list of file extensions from staged changes. + + Returns: + List of unique file extensions (with dot). + """ + changes = self.get_staged_changes() + extensions = set() + for path in changes.file_paths: + ext = Path(path).suffix + if ext: + extensions.add(ext) + return sorted(extensions) + + def get_changed_directories(self) -> List[str]: + """Get list of unique directories from staged changes. + + Returns: + List of unique directory paths. + """ + changes = self.get_staged_changes() + directories = set() + for path in changes.file_paths: + dir_path = str(Path(path).parent) + if dir_path and dir_path != ".": + directories.add(dir_path) + return sorted(directories)