diff --git a/src/analyzers/code_churn.py b/src/analyzers/code_churn.py index c3b7c73..dd38acd 100644 --- a/src/analyzers/code_churn.py +++ b/src/analyzers/code_churn.py @@ -1,50 +1,67 @@ from collections import defaultdict -from dataclasses import dataclass -from typing import Dict, Optional +from typing import Dict, List from src.analyzers.git_repository import GitRepository -from src.models.data_structures import CodeChurnAnalysis +from src.models import CodeChurnAnalysis, FileChange -@dataclass class CodeChurnAnalyzer: - """Analyzes code churn.""" + """Analyze code churn and changes.""" - repo: GitRepository - days: int + def __init__( + self, + repo: GitRepository, + days: int = 30, + churn_threshold: int = 500, + ) -> None: + """Initialize CodeChurnAnalyzer.""" + self.repo = repo + self.days = days + self.churn_threshold = churn_threshold - def analyze(self) -> Optional[CodeChurnAnalysis]: + def analyze(self) -> CodeChurnAnalysis: """Analyze code churn.""" - commits = self.repo.get_commits() + commits = self.repo.get_commits(since_days=self.days) if not commits: - return None + return CodeChurnAnalysis() - total_lines_added = sum(c.lines_added for c in commits) - total_lines_deleted = sum(c.lines_deleted for c in commits) + total_lines_added = sum(c.additions for c in commits) + total_lines_deleted = sum(c.deletions for c in commits) net_change = total_lines_added - total_lines_deleted - churn_by_file: Dict[str, Dict[str, int]] = defaultdict(lambda: {"added": 0, "deleted": 0}) - churn_by_author: Dict[str, Dict[str, int]] = defaultdict(lambda: {"added": 0, "deleted": 0}) - + churn_by_file = defaultdict(int) + churn_by_author = defaultdict(int) + high_churn_files = [] high_churn_commits = [] - churn_threshold = 500 for commit in commits: + churn_by_author[commit.author_name] += commit.lines_changed_count + for filepath in commit.files_changed: - churn_by_file[filepath]["added"] += commit.lines_added - churn_by_file[filepath]["deleted"] += commit.lines_deleted + file_churn = commit.lines_changed_count + churn_by_file[filepath] += file_churn - churn_by_author[commit.author]["added"] += commit.lines_added - churn_by_author[commit.author]["deleted"] += commit.lines_deleted + if file_churn >= self.churn_threshold: + high_churn_files.append( + FileChange( + filepath=filepath, + lines_added=commit.additions, + lines_deleted=commit.deletions, + ) + ) - if commit.lines_added + commit.lines_deleted > churn_threshold: + if commit.lines_changed_count >= self.churn_threshold: high_churn_commits.append(commit) - high_churn_commits.sort(key=lambda c: c.lines_added + c.lines_deleted, reverse=True) + high_churn_files.sort( + key=lambda x: x.lines_added + x.lines_deleted, + reverse=True, + ) + high_churn_files = high_churn_files[:20] - total_changes = sum(c.lines_added + c.lines_deleted for c in commits) - avg_churn_per_commit = total_changes / max(1, len(commits)) + total_churn = sum(c.lines_changed_count for c in commits) + average_churn_per_commit = total_churn / max(len(commits), 1) return CodeChurnAnalysis( total_lines_added=total_lines_added, @@ -52,6 +69,7 @@ class CodeChurnAnalyzer: net_change=net_change, churn_by_file=dict(churn_by_file), churn_by_author=dict(churn_by_author), - high_churn_commits=high_churn_commits[:50], - average_churn_per_commit=avg_churn_per_commit, + high_churn_files=high_churn_files[:10], + high_churn_commits=high_churn_commits[:10], + average_churn_per_commit=average_churn_per_commit, )