from collections import defaultdict from dataclasses import dataclass from typing import Dict, Optional from src.analyzers.git_repository import GitRepository from src.models.data_structures import CodeChurnAnalysis @dataclass class CodeChurnAnalyzer: """Analyzes code churn.""" repo: GitRepository days: int def analyze(self) -> Optional[CodeChurnAnalysis]: """Analyze code churn.""" commits = self.repo.get_commits() if not commits: return None total_lines_added = sum(c.lines_added for c in commits) total_lines_deleted = sum(c.lines_deleted for c in commits) net_change = total_lines_added - total_lines_deleted churn_by_file: Dict[str, Dict[str, int]] = defaultdict(lambda: {"added": 0, "deleted": 0}) churn_by_author: Dict[str, Dict[str, int]] = defaultdict(lambda: {"added": 0, "deleted": 0}) high_churn_commits = [] churn_threshold = 500 for commit in commits: for filepath in commit.files_changed: churn_by_file[filepath]["added"] += commit.lines_added churn_by_file[filepath]["deleted"] += commit.lines_deleted churn_by_author[commit.author]["added"] += commit.lines_added churn_by_author[commit.author]["deleted"] += commit.lines_deleted if commit.lines_added + commit.lines_deleted > churn_threshold: high_churn_commits.append(commit) high_churn_commits.sort(key=lambda c: c.lines_added + c.lines_deleted, reverse=True) total_changes = sum(c.lines_added + c.lines_deleted for c in commits) avg_churn_per_commit = total_changes / max(1, len(commits)) return CodeChurnAnalysis( total_lines_added=total_lines_added, total_lines_deleted=total_lines_deleted, net_change=net_change, churn_by_file=dict(churn_by_file), churn_by_author=dict(churn_by_author), high_churn_commits=high_churn_commits[:50], average_churn_per_commit=avg_churn_per_commit, )